Website-Finder/Python/index.py

import sys
import random
import datetime
import requests

from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

def main_loop():
	json_object = []
	for i in range(times):
		url = url_generator(i, times)
		try:
			response = requests.get(url, verify=False, timeout=40)
			print(url + " exists!")
			json_object.append('{"website_url":"' + url + '","response_type":"SUCCESS","response_code":"' + str(response.status_code) + '","response_details":"' + str(response.reason) + '"}')
		except Exception as e: # Exception should always be ConnectionError (**usually** bad) or ReadTimeout (good)
			# Exception handling seems to be a pain because most errors return ConnectionError, so ConnectionError in itself can mean the website exists OR the website does NOT exist
			if "not known" not in str(e).lower() and "no address" not in str(e).lower():
				print(url + " exists!")
				err_code = str(e)[str(e).index("[") + 1:str(e).index("]")] if "[" in str(e) and "]" in str(e) else "NO CODE FOUND"
				json_object.append('{"website_url":"' + url + '","response_type":"ERROR","response_code":"' + err_code + '","response_details":"' + str(e).replace("\\", "").replace('"', "") + '"}')

	f.write(str(json_object).replace("'", "").replace("\\", ""))
	f.close()
	print("Finished at " + str(datetime.datetime.now().time())[0:5].replace(":", "h") + "m")

def url_generator(num_url, times):
	result = protocols[random.randint(0, len(protocols) - 1)] + "://"
	characters = "abcdefghijklmnopqrstuvwxyz0123456789"
	url_length = random.randint(min, max)
	result += ''.join(random.choice(characters) for i in range(url_length))
	result += domains[random.randint(0, len(domains) - 1)]
	if random.randint(1, 100) <= second: result += domains[random.randint(0, len(domains) - 1)]
	if log: print(result +  " (" + str(num_url + 1) + "/" + str(times) + ")")
	return result

times = int(sys.argv[sys.argv.index('-t') + 1]) if '-t' in sys.argv else 3000
protocols = sys.argv[sys.argv.index('-p') + 1].split(",") if '-p' in sys.argv else ['http']
domains = sys.argv[sys.argv.index('-d') + 1].split(",") if '-d' in sys.argv else ['.co', '.com', '.net', '.edu', '.gov', '.cn', '.org', '.cc', '.us', '.mil', '.ac', '.it', '.de']
second = int(sys.argv[sys.argv.index('-s') + 1]) if '-s' in sys.argv else 1
log = '-l' in sys.argv
# lmao what if we literally get rid of two built-in functions
min = int(sys.argv[sys.argv.index('-min') + 1]) if '-min' in sys.argv else 2
max = int(sys.argv[sys.argv.index('-max') + 1]) if '-max' in sys.argv else 50 # Python cannot look for URLs longer than 50ish, so be careful!

print("\nI am going to look for websites through " + str(times) + " random URLs (min length " + str(min) + " and max length " + str(max) + ") with the following domains: " + str(domains))
print("These URLs will use the protocols " + str(protocols) + " and each of those URLs have " + str(second) + " in 100 chance to have a second level domain.")
print("Started at " + str(datetime.datetime.now().time())[0:5].replace(":", "h") + "m\n")

f = open("PY_report_" + str(datetime.datetime.now().strftime("%d%H%M")) + ".json", "a+")
main_loop()