2020-07-15 19:28:36 +02:00
import sys
import random
import datetime
2022-12-11 20:00:36 +01:00
import requests
from requests . packages . urllib3 . exceptions import InsecureRequestWarning
requests . packages . urllib3 . disable_warnings ( InsecureRequestWarning )
2020-07-15 19:28:36 +02:00
def main_loop ( ) :
2020-07-21 00:00:09 +02:00
json_object = [ ]
2020-07-15 19:28:36 +02:00
for i in range ( times ) :
2021-03-07 16:30:48 +01:00
url = url_generator ( i , times )
2020-07-15 19:28:36 +02:00
try :
2022-12-11 20:00:36 +01:00
response = requests . get ( url , verify = False , timeout = 40 )
2020-07-15 19:28:36 +02:00
print ( url + " exists! " )
2022-12-11 20:00:36 +01:00
json_object . append ( ' { " website_url " : " ' + url + ' " , " response_type " : " SUCCESS " , " response_code " : " ' + str ( response . status_code ) + ' " , " response_details " : " ' + str ( response . reason ) + ' " } ' )
except Exception as e : # Exception should always be ConnectionError (**usually** bad) or ReadTimeout (good)
# Exception handling seems to be a pain because most errors return ConnectionError, so ConnectionError in itself can mean the website exists OR the website does NOT exist
if " not known " not in str ( e ) . lower ( ) and " no address " not in str ( e ) . lower ( ) :
print ( url + " exists! " )
err_code = str ( e ) [ str ( e ) . index ( " [ " ) + 1 : str ( e ) . index ( " ] " ) ] if " [ " in str ( e ) and " ] " in str ( e ) else " NO CODE FOUND "
json_object . append ( ' { " website_url " : " ' + url + ' " , " response_type " : " ERROR " , " response_code " : " ' + err_code + ' " , " response_details " : " ' + str ( e ) . replace ( " \\ " , " " ) . replace ( ' " ' , " " ) + ' " } ' )
2020-07-15 19:28:36 +02:00
2020-07-24 01:37:47 +02:00
f . write ( str ( json_object ) . replace ( " ' " , " " ) . replace ( " \\ " , " " ) )
2020-07-15 19:28:36 +02:00
f . close ( )
print ( " Finished at " + str ( datetime . datetime . now ( ) . time ( ) ) [ 0 : 5 ] . replace ( " : " , " h " ) + " m " )
2021-03-07 16:30:48 +01:00
def url_generator ( num_url , times ) :
2022-12-11 20:00:36 +01:00
result = protocols [ random . randint ( 0 , len ( protocols ) - 1 ) ] + " :// "
2020-07-15 19:28:36 +02:00
characters = " abcdefghijklmnopqrstuvwxyz0123456789 "
2022-12-11 20:00:36 +01:00
url_length = random . randint ( min , max )
2020-07-15 19:28:36 +02:00
result + = ' ' . join ( random . choice ( characters ) for i in range ( url_length ) )
result + = domains [ random . randint ( 0 , len ( domains ) - 1 ) ]
2020-07-24 01:37:47 +02:00
if random . randint ( 1 , 100 ) < = second : result + = domains [ random . randint ( 0 , len ( domains ) - 1 ) ]
2021-03-07 16:30:48 +01:00
if log : print ( result + " ( " + str ( num_url + 1 ) + " / " + str ( times ) + " ) " )
2020-07-15 19:28:36 +02:00
return result
2020-07-24 01:37:47 +02:00
times = int ( sys . argv [ sys . argv . index ( ' -t ' ) + 1 ] ) if ' -t ' in sys . argv else 3000
2022-12-11 20:00:36 +01:00
protocols = sys . argv [ sys . argv . index ( ' -p ' ) + 1 ] . split ( " , " ) if ' -p ' in sys . argv else [ ' http ' ]
2020-07-24 01:37:47 +02:00
domains = sys . argv [ sys . argv . index ( ' -d ' ) + 1 ] . split ( " , " ) if ' -d ' in sys . argv else [ ' .co ' , ' .com ' , ' .net ' , ' .edu ' , ' .gov ' , ' .cn ' , ' .org ' , ' .cc ' , ' .us ' , ' .mil ' , ' .ac ' , ' .it ' , ' .de ' ]
second = int ( sys . argv [ sys . argv . index ( ' -s ' ) + 1 ] ) if ' -s ' in sys . argv else 1
2022-12-11 20:00:36 +01:00
log = ' -l ' in sys . argv
# lmao what if we literally get rid of two built-in functions
min = int ( sys . argv [ sys . argv . index ( ' -min ' ) + 1 ] ) if ' -min ' in sys . argv else 2
max = int ( sys . argv [ sys . argv . index ( ' -max ' ) + 1 ] ) if ' -max ' in sys . argv else 50 # Python cannot look for URLs longer than 50ish, so be careful!
2020-07-15 19:28:36 +02:00
2022-12-11 20:00:36 +01:00
print ( " \n I am going to look for websites through " + str ( times ) + " random URLs (min length " + str ( min ) + " and max length " + str ( max ) + " ) with the following domains: " + str ( domains ) )
print ( " These URLs will use the protocols " + str ( protocols ) + " and each of those URLs have " + str ( second ) + " in 100 chance to have a second level domain. " )
2020-07-24 01:37:47 +02:00
print ( " Started at " + str ( datetime . datetime . now ( ) . time ( ) ) [ 0 : 5 ] . replace ( " : " , " h " ) + " m \n " )
2020-07-15 19:28:36 +02:00
2020-07-21 00:00:09 +02:00
f = open ( " PY_report_ " + str ( datetime . datetime . now ( ) . strftime ( " %d % H % M " ) ) + " .json " , " a+ " )
2020-07-15 19:28:36 +02:00
main_loop ( )