2020-07-15 19:28:36 +02:00
import sys
import random
import datetime
2022-12-11 20:00:36 +01:00
import requests
2022-12-12 21:03:40 +01:00
import json
2022-12-11 20:00:36 +01:00
from requests . packages . urllib3 . exceptions import InsecureRequestWarning
requests . packages . urllib3 . disable_warnings ( InsecureRequestWarning )
2020-07-15 19:28:36 +02:00
def main_loop ( ) :
2020-07-21 00:00:09 +02:00
json_object = [ ]
2020-07-15 19:28:36 +02:00
for i in range ( times ) :
2021-03-07 16:30:48 +01:00
url = url_generator ( i , times )
2020-07-15 19:28:36 +02:00
try :
2022-12-11 20:00:36 +01:00
response = requests . get ( url , verify = False , timeout = 40 )
2020-07-15 19:28:36 +02:00
print ( url + " exists! " )
2022-12-11 20:00:36 +01:00
json_object . append ( ' { " website_url " : " ' + url + ' " , " response_type " : " SUCCESS " , " response_code " : " ' + str ( response . status_code ) + ' " , " response_details " : " ' + str ( response . reason ) + ' " } ' )
except Exception as e : # Exception should always be ConnectionError (**usually** bad) or ReadTimeout (good)
# Exception handling seems to be a pain because most errors return ConnectionError, so ConnectionError in itself can mean the website exists OR the website does NOT exist
2022-12-12 21:03:40 +01:00
err = str ( e )
if " not known " not in err . lower ( ) and " no address " not in err . lower ( ) :
2022-12-11 20:00:36 +01:00
print ( url + " exists! " )
2022-12-12 21:03:40 +01:00
err_code = err [ err . index ( " [ " ) + 1 : err . index ( " ] " ) ] if " [ " in err and " ] " in err else " NO CODE FOUND "
json_object . append ( ' { " website_url " : " ' + url + ' " , " response_type " : " ERROR " , " response_code " : " ' + err_code + ' " , " response_details " : " ' + err . replace ( " \\ " , " " ) . replace ( ' " ' , " " ) + ' " } ' )
2020-07-15 19:28:36 +02:00
2022-12-12 21:03:40 +01:00
report_file . write ( str ( json_object ) . replace ( " ' " , " " ) . replace ( " \\ " , " " ) )
report_file . close ( )
2020-07-15 19:28:36 +02:00
print ( " Finished at " + str ( datetime . datetime . now ( ) . time ( ) ) [ 0 : 5 ] . replace ( " : " , " h " ) + " m " )
2021-03-07 16:30:48 +01:00
def url_generator ( num_url , times ) :
2022-12-11 20:00:36 +01:00
result = protocols [ random . randint ( 0 , len ( protocols ) - 1 ) ] + " :// "
2020-07-15 19:28:36 +02:00
characters = " abcdefghijklmnopqrstuvwxyz0123456789 "
2022-12-11 20:00:36 +01:00
url_length = random . randint ( min , max )
2020-07-15 19:28:36 +02:00
result + = ' ' . join ( random . choice ( characters ) for i in range ( url_length ) )
2022-12-12 21:03:40 +01:00
result + = f " . { domains [ random . randint ( 0 , len ( domains ) - 1 ) ] } "
if random . randint ( 1 , 100 ) < = second : result + = " . %s " % ( domains [ random . randint ( 0 , len ( domains ) - 1 ) ] )
2021-03-07 16:30:48 +01:00
if log : print ( result + " ( " + str ( num_url + 1 ) + " / " + str ( times ) + " ) " )
2020-07-15 19:28:36 +02:00
return result
2022-12-12 21:03:40 +01:00
defaults = json . load ( open ( " ../defaults.json " , " rb " ) )
times = int ( sys . argv [ sys . argv . index ( ' -t ' ) + 1 ] ) if ' -t ' in sys . argv else defaults [ " times " ]
protocols = sys . argv [ sys . argv . index ( ' -p ' ) + 1 ] . split ( " , " ) if ' -p ' in sys . argv else defaults [ " protocols " ]
domains = sys . argv [ sys . argv . index ( ' -d ' ) + 1 ] . split ( " , " ) if ' -d ' in sys . argv else defaults [ " domains " ]
second = int ( sys . argv [ sys . argv . index ( ' -s ' ) + 1 ] ) if ' -s ' in sys . argv else defaults [ " second " ]
log = True if ' -l ' in sys . argv else defaults [ " log " ]
2022-12-11 20:00:36 +01:00
# lmao what if we literally get rid of two built-in functions
2022-12-12 21:03:40 +01:00
min = int ( sys . argv [ sys . argv . index ( ' -min ' ) + 1 ] ) if ' -min ' in sys . argv else defaults [ " min " ]
max = int ( sys . argv [ sys . argv . index ( ' -max ' ) + 1 ] ) if ' -max ' in sys . argv else defaults [ " max " ] # Avoid >50
2020-07-15 19:28:36 +02:00
2022-12-12 21:03:40 +01:00
date = datetime . datetime . now ( )
2022-12-11 20:00:36 +01:00
print ( " \n I am going to look for websites through " + str ( times ) + " random URLs (min length " + str ( min ) + " and max length " + str ( max ) + " ) with the following domains: " + str ( domains ) )
2022-12-12 21:03:40 +01:00
print ( " These URLs will use the protocols " + str ( protocols ) + " and each of those URLs have " + str ( second ) + " in 100 chance to have a second level domain " )
print ( " Started at " + str ( date . time ( ) ) [ 0 : 5 ] . replace ( " : " , " h " ) + " m \n " )
2020-07-15 19:28:36 +02:00
2022-12-12 21:03:40 +01:00
report_file = open ( " PY_report_ " + str ( date . strftime ( " %d % H % M " ) ) + " .json " , " a+ " )
2020-07-15 19:28:36 +02:00
main_loop ( )