2020-07-15 19:28:36 +02:00
import sys
import random
import datetime
import urllib . request
def main_loop ( ) :
2020-07-21 00:00:09 +02:00
json_object = [ ]
2020-07-15 19:28:36 +02:00
for i in range ( times ) :
2021-03-07 16:30:48 +01:00
url = url_generator ( i , times )
2020-07-15 19:28:36 +02:00
try :
response = urllib . request . urlopen ( url )
print ( url + " exists! " )
2020-07-21 00:00:09 +02:00
json_object . append ( ' { " website_url " : " ' + url + ' " , " response_type " : " SUCCESS " , " response_code " : " ' + str ( response . getcode ( ) ) + ' " , " response_details " : " Server seems to be ' + str ( response . info ( ) [ " Server " ] ) + ' " } ' )
2020-07-15 19:28:36 +02:00
except Exception as e :
if " [Errno 11001] " in str ( e ) : continue
print ( url + " exists! " )
2020-07-24 01:37:47 +02:00
err_code = str ( e ) [ str ( e ) . index ( " [ " ) + 1 : str ( e ) . index ( " ] " ) ] if " [ " in str ( e ) and " ] " in str ( e ) else " NO CODE FOUND "
2020-07-21 00:00:09 +02:00
json_object . append ( ' { " website_url " : " ' + url + ' " , " response_type " : " ERROR " , " response_code " : " ' + err_code + ' " , " response_details " : " ' + str ( e ) . replace ( " \\ " , " " ) + ' " } ' )
2020-07-15 19:28:36 +02:00
2020-07-24 01:37:47 +02:00
f . write ( str ( json_object ) . replace ( " ' " , " " ) . replace ( " \\ " , " " ) )
2020-07-15 19:28:36 +02:00
f . close ( )
print ( " Finished at " + str ( datetime . datetime . now ( ) . time ( ) ) [ 0 : 5 ] . replace ( " : " , " h " ) + " m " )
2021-03-07 16:30:48 +01:00
def url_generator ( num_url , times ) :
2020-07-15 19:28:36 +02:00
result = mode [ random . randint ( 0 , len ( mode ) - 1 ) ] + " :// "
characters = " abcdefghijklmnopqrstuvwxyz0123456789 "
2020-07-24 01:37:47 +02:00
url_length = random . randint ( mini , maxi )
2020-07-15 19:28:36 +02:00
result + = ' ' . join ( random . choice ( characters ) for i in range ( url_length ) )
result + = domains [ random . randint ( 0 , len ( domains ) - 1 ) ]
2020-07-24 01:37:47 +02:00
if random . randint ( 1 , 100 ) < = second : result + = domains [ random . randint ( 0 , len ( domains ) - 1 ) ]
2021-03-07 16:30:48 +01:00
if log : print ( result + " ( " + str ( num_url + 1 ) + " / " + str ( times ) + " ) " )
2020-07-15 19:28:36 +02:00
return result
2020-07-24 01:37:47 +02:00
times = int ( sys . argv [ sys . argv . index ( ' -t ' ) + 1 ] ) if ' -t ' in sys . argv else 3000
domains = sys . argv [ sys . argv . index ( ' -d ' ) + 1 ] . split ( " , " ) if ' -d ' in sys . argv else [ ' .co ' , ' .com ' , ' .net ' , ' .edu ' , ' .gov ' , ' .cn ' , ' .org ' , ' .cc ' , ' .us ' , ' .mil ' , ' .ac ' , ' .it ' , ' .de ' ]
2020-07-15 19:28:36 +02:00
mode = sys . argv [ sys . argv . index ( ' -m ' ) + 1 ] . split ( " , " ) if ' -m ' in sys . argv else [ ' http ' ]
log = ' -l ' in sys . argv
2020-07-24 01:37:47 +02:00
mini = int ( sys . argv [ sys . argv . index ( ' -MIN ' ) + 1 ] ) if ' -MIN ' in sys . argv else 2
2021-03-07 16:30:48 +01:00
maxi = int ( sys . argv [ sys . argv . index ( ' -MAX ' ) + 1 ] ) if ' -MAX ' in sys . argv else 50 # Python cannot look for URLs longer than 50ish, so be careful!
2020-07-24 01:37:47 +02:00
second = int ( sys . argv [ sys . argv . index ( ' -s ' ) + 1 ] ) if ' -s ' in sys . argv else 1
2020-07-15 19:28:36 +02:00
2020-07-24 01:37:47 +02:00
print ( " \n I am going to look for websites through " + str ( times ) + " random URLs (min length " + str ( mini ) + " and max length " + str ( maxi ) + " ) with the following domains: " + str ( domains ) )
print ( " These URLs will use the protocols " + str ( mode ) + " and each of those URLs have " + str ( second ) + " in 100 chance to have a second level domain. " )
print ( " Started at " + str ( datetime . datetime . now ( ) . time ( ) ) [ 0 : 5 ] . replace ( " : " , " h " ) + " m \n " )
2020-07-15 19:28:36 +02:00
2020-07-21 00:00:09 +02:00
f = open ( " PY_report_ " + str ( datetime . datetime . now ( ) . strftime ( " %d % H % M " ) ) + " .json " , " a+ " )
2020-07-15 19:28:36 +02:00
main_loop ( )