Cleaned stuff up

2022-12-11 20:00:36 +01:00 · 2022-12-11 20:00:36 +01:00 · a8bda4704a
commit a8bda4704a
parent 0613fb7fc4
14 changed files with 215 additions and 160 deletions
--- a/Python/README.md
+++ b/Python/README.md
@ -0,0 +1,21 @@
+# Website-Finder: Python
+
+This is specifically for Python 3.8+, this has not been tested for other versions
+
+## REQUIREMENT
+
+[Python](https://www.python.org/)
+
+## HOW TO RUN
+
+```sh
+$ python index.py
+```
+
+```sh
+$ python3 index.py
+```
+
+## OTHER STUFF
+
+More details are available on [the readme in the root folder](../README.md)
--- a/Python/index.py
+++ b/Python/index.py
@ -1,30 +1,34 @@
 import sys
 import random
 import datetime
-import urllib.request
+import requests
+
+from requests.packages.urllib3.exceptions import InsecureRequestWarning
+requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

 def main_loop():
 	json_object = []
 	for i in range(times):
 		url = url_generator(i, times)
 		try:
-			response = urllib.request.urlopen(url)
+			response = requests.get(url, verify=False, timeout=40)
 			print(url + " exists!")
-			json_object.append('{"website_url":"' + url + '","response_type":"SUCCESS","response_code":"' + str(response.getcode()) + '","response_details":"Server seems to be ' + str(response.info()["Server"]) + '"}')
-		except Exception as e:
-			if "[Errno 11001]" in str(e): continue
-			print(url + " exists!")
-			err_code = str(e)[str(e).index("[") + 1:str(e).index("]")] if "[" in str(e) and "]" in str(e) else "NO CODE FOUND"
-			json_object.append('{"website_url":"' + url + '","response_type":"ERROR","response_code":"' + err_code + '","response_details":"' + str(e).replace("\\", "") + '"}')
+			json_object.append('{"website_url":"' + url + '","response_type":"SUCCESS","response_code":"' + str(response.status_code) + '","response_details":"' + str(response.reason) + '"}')
+		except Exception as e: # Exception should always be ConnectionError (**usually** bad) or ReadTimeout (good)
+			# Exception handling seems to be a pain because most errors return ConnectionError, so ConnectionError in itself can mean the website exists OR the website does NOT exist 
+			if "not known" not in str(e).lower() and "no address" not in str(e).lower():
+				print(url + " exists!")
+				err_code = str(e)[str(e).index("[") + 1:str(e).index("]")] if "[" in str(e) and "]" in str(e) else "NO CODE FOUND"
+				json_object.append('{"website_url":"' + url + '","response_type":"ERROR","response_code":"' + err_code + '","response_details":"' + str(e).replace("\\", "").replace('"', "") + '"}')

 	f.write(str(json_object).replace("'", "").replace("\\", ""))
 	f.close()
 	print("Finished at " + str(datetime.datetime.now().time())[0:5].replace(":", "h") + "m")

 def url_generator(num_url, times):
-	result = mode[random.randint(0, len(mode) - 1)] + "://"
+	result = protocols[random.randint(0, len(protocols) - 1)] + "://"
 	characters = "abcdefghijklmnopqrstuvwxyz0123456789"
-	url_length = random.randint(mini, maxi)
+	url_length = random.randint(min, max)
 	result += ''.join(random.choice(characters) for i in range(url_length))
 	result += domains[random.randint(0, len(domains) - 1)]
 	if random.randint(1, 100) <= second: result += domains[random.randint(0, len(domains) - 1)]
@ -32,15 +36,16 @@ def url_generator(num_url, times):
 	return result

 times = int(sys.argv[sys.argv.index('-t') + 1]) if '-t' in sys.argv else 3000
+protocols = sys.argv[sys.argv.index('-p') + 1].split(",") if '-p' in sys.argv else ['http']
 domains = sys.argv[sys.argv.index('-d') + 1].split(",") if '-d' in sys.argv else ['.co', '.com', '.net', '.edu', '.gov', '.cn', '.org', '.cc', '.us', '.mil', '.ac', '.it', '.de']
-mode = sys.argv[sys.argv.index('-m') + 1].split(",") if '-m' in sys.argv else ['http']
-log = '-l' in sys.argv
-mini = int(sys.argv[sys.argv.index('-MIN') + 1]) if '-MIN' in sys.argv else 2
-maxi = int(sys.argv[sys.argv.index('-MAX') + 1]) if '-MAX' in sys.argv else 50 # Python cannot look for URLs longer than 50ish, so be careful!
 second = int(sys.argv[sys.argv.index('-s') + 1]) if '-s' in sys.argv else 1
+log = '-l' in sys.argv
+# lmao what if we literally get rid of two built-in functions
+min = int(sys.argv[sys.argv.index('-min') + 1]) if '-min' in sys.argv else 2
+max = int(sys.argv[sys.argv.index('-max') + 1]) if '-max' in sys.argv else 50 # Python cannot look for URLs longer than 50ish, so be careful!

-print("\nI am going to look for websites through " + str(times) + " random URLs (min length " + str(mini) + " and max length " + str(maxi) + ") with the following domains: " + str(domains))
-print("These URLs will use the protocols " + str(mode) + " and each of those URLs have " + str(second) + " in 100 chance to have a second level domain.")
+print("\nI am going to look for websites through " + str(times) + " random URLs (min length " + str(min) + " and max length " + str(max) + ") with the following domains: " + str(domains))
+print("These URLs will use the protocols " + str(protocols) + " and each of those URLs have " + str(second) + " in 100 chance to have a second level domain.")
 print("Started at " + str(datetime.datetime.now().time())[0:5].replace(":", "h") + "m\n")

 f = open("PY_report_" + str(datetime.datetime.now().strftime("%d%H%M")) + ".json", "a+")