Cleaned stuff up

This commit is contained in:
Taevas 2022-12-11 20:00:36 +01:00
parent 0613fb7fc4
commit a8bda4704a
14 changed files with 215 additions and 160 deletions

21
Python/README.md Normal file
View file

@ -0,0 +1,21 @@
# Website-Finder: Python
This is specifically for Python 3.8+, this has not been tested for other versions
## REQUIREMENT
[Python](https://www.python.org/)
## HOW TO RUN
```sh
$ python index.py
```
```sh
$ python3 index.py
```
## OTHER STUFF
More details are available on [the readme in the root folder](../README.md)

View file

@ -1,30 +1,34 @@
import sys
import random
import datetime
import urllib.request
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
def main_loop():
json_object = []
for i in range(times):
url = url_generator(i, times)
try:
response = urllib.request.urlopen(url)
response = requests.get(url, verify=False, timeout=40)
print(url + " exists!")
json_object.append('{"website_url":"' + url + '","response_type":"SUCCESS","response_code":"' + str(response.getcode()) + '","response_details":"Server seems to be ' + str(response.info()["Server"]) + '"}')
except Exception as e:
if "[Errno 11001]" in str(e): continue
print(url + " exists!")
err_code = str(e)[str(e).index("[") + 1:str(e).index("]")] if "[" in str(e) and "]" in str(e) else "NO CODE FOUND"
json_object.append('{"website_url":"' + url + '","response_type":"ERROR","response_code":"' + err_code + '","response_details":"' + str(e).replace("\\", "") + '"}')
json_object.append('{"website_url":"' + url + '","response_type":"SUCCESS","response_code":"' + str(response.status_code) + '","response_details":"' + str(response.reason) + '"}')
except Exception as e: # Exception should always be ConnectionError (**usually** bad) or ReadTimeout (good)
# Exception handling seems to be a pain because most errors return ConnectionError, so ConnectionError in itself can mean the website exists OR the website does NOT exist
if "not known" not in str(e).lower() and "no address" not in str(e).lower():
print(url + " exists!")
err_code = str(e)[str(e).index("[") + 1:str(e).index("]")] if "[" in str(e) and "]" in str(e) else "NO CODE FOUND"
json_object.append('{"website_url":"' + url + '","response_type":"ERROR","response_code":"' + err_code + '","response_details":"' + str(e).replace("\\", "").replace('"', "") + '"}')
f.write(str(json_object).replace("'", "").replace("\\", ""))
f.close()
print("Finished at " + str(datetime.datetime.now().time())[0:5].replace(":", "h") + "m")
def url_generator(num_url, times):
result = mode[random.randint(0, len(mode) - 1)] + "://"
result = protocols[random.randint(0, len(protocols) - 1)] + "://"
characters = "abcdefghijklmnopqrstuvwxyz0123456789"
url_length = random.randint(mini, maxi)
url_length = random.randint(min, max)
result += ''.join(random.choice(characters) for i in range(url_length))
result += domains[random.randint(0, len(domains) - 1)]
if random.randint(1, 100) <= second: result += domains[random.randint(0, len(domains) - 1)]
@ -32,15 +36,16 @@ def url_generator(num_url, times):
return result
times = int(sys.argv[sys.argv.index('-t') + 1]) if '-t' in sys.argv else 3000
protocols = sys.argv[sys.argv.index('-p') + 1].split(",") if '-p' in sys.argv else ['http']
domains = sys.argv[sys.argv.index('-d') + 1].split(",") if '-d' in sys.argv else ['.co', '.com', '.net', '.edu', '.gov', '.cn', '.org', '.cc', '.us', '.mil', '.ac', '.it', '.de']
mode = sys.argv[sys.argv.index('-m') + 1].split(",") if '-m' in sys.argv else ['http']
log = '-l' in sys.argv
mini = int(sys.argv[sys.argv.index('-MIN') + 1]) if '-MIN' in sys.argv else 2
maxi = int(sys.argv[sys.argv.index('-MAX') + 1]) if '-MAX' in sys.argv else 50 # Python cannot look for URLs longer than 50ish, so be careful!
second = int(sys.argv[sys.argv.index('-s') + 1]) if '-s' in sys.argv else 1
log = '-l' in sys.argv
# lmao what if we literally get rid of two built-in functions
min = int(sys.argv[sys.argv.index('-min') + 1]) if '-min' in sys.argv else 2
max = int(sys.argv[sys.argv.index('-max') + 1]) if '-max' in sys.argv else 50 # Python cannot look for URLs longer than 50ish, so be careful!
print("\nI am going to look for websites through " + str(times) + " random URLs (min length " + str(mini) + " and max length " + str(maxi) + ") with the following domains: " + str(domains))
print("These URLs will use the protocols " + str(mode) + " and each of those URLs have " + str(second) + " in 100 chance to have a second level domain.")
print("\nI am going to look for websites through " + str(times) + " random URLs (min length " + str(min) + " and max length " + str(max) + ") with the following domains: " + str(domains))
print("These URLs will use the protocols " + str(protocols) + " and each of those URLs have " + str(second) + " in 100 chance to have a second level domain.")
print("Started at " + str(datetime.datetime.now().time())[0:5].replace(":", "h") + "m\n")
f = open("PY_report_" + str(datetime.datetime.now().strftime("%d%H%M")) + ".json", "a+")