Make use of defaults.json (except C# for now)

2022-12-12 21:03:40 +01:00 · 2022-12-12 21:03:40 +01:00 · 8656fd134f
commit 8656fd134f
parent a8bda4704a
6 changed files with 102 additions and 68 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,4 +1,4 @@
-*.json
+*_report_*.json

 ## Ignore Visual Studio temporary files, build results, and
 ## files generated by popular Visual Studio add-ons.
--- a/Crystal/index.cr
+++ b/Crystal/index.cr
@ -8,7 +8,7 @@ def main_loop(times, domains, protocols, log, min, max, second, report_file)
 		json.array do
 			i = 0
 			while i < times
-				url = url_generator(domains, protocols, min.as(UInt8), max.as(UInt8), second)
+				url = url_generator(domains, protocols, min, max, second)
 				puts "#{url} (#{i + 1}/#{times})" if log
 				client = HTTP::Client.new(URI.parse url)
 				client.connect_timeout = 40.seconds
@ -46,7 +46,7 @@ end

 def url_generator(domains, protocols, min, max, second)
 	result = String.build do |str|
-		str << protocols[Random.rand(protocols.size)] + "://"
+		str << "#{protocols[Random.rand(protocols.size)]}://"
 		url_length = Random.rand(min..max)
 		characters = "abcdefghijklmnopqrstuvwxyz0123456789"
 		i = 0
@ -54,19 +54,20 @@ def url_generator(domains, protocols, min, max, second)
 			str << characters[Random.rand(characters.size - 1)]	
 			i += 1
 		end
-		str << domains[Random.rand(domains.size)] if Random.rand(1..100) <= second
-		str << domains[Random.rand(domains.size)]
+		str << ".#{domains[Random.rand(domains.size)]}" if Random.rand(1..100) <= second
+		str << ".#{domains[Random.rand(domains.size)]}"
 	end
 	result
 end

-times = UInt32.new "3000"
-protocols = ["http"]
-domains = [".co", ".com", ".net", ".edu", ".gov", ".cn", ".org", ".cc", ".us", ".mil", ".ac", ".it", ".de"]
-second = UInt8.new "1"
-log = false
-min = UInt8.new "2"
-max = UInt8.new "50"
+defaults = JSON.parse(File.read("../defaults.json"))
+times = defaults["times"].as_i
+protocols = defaults["protocols"].as_a
+domains = defaults["domains"].as_a
+second = defaults["second"].as_i
+log = defaults["log"].as_bool
+min = defaults["min"].as_i
+max = defaults["max"].as_i

 OptionParser.parse do |parser|
 	parser.banner = "Website-Finder"
@ -74,19 +75,19 @@ OptionParser.parse do |parser|
 		puts parser
 		exit 
 	end
-	parser.on("-t TIMES", "--times=TIMES", "Number of requests / DEFAULT: #{times}") {|p_times| times = p_times.to_u32}
+	parser.on("-t TIMES", "--times=TIMES", "Number of requests / DEFAULT: #{times}") {|p_times| times = p_times.to_i}
 	parser.on("-d DOMAINS", "--domains=DOMAINS", "Domains used in URLS, like: .com,.net,.gov / DEFAULT: #{domains}") {|p_domains| domains = p_domains.split(",")}
 	parser.on("-p protocols", "--protocols=PROTOCOLS", "You may choose between: http | https | http,https / DEFAULT: #{protocols}") {|p_protocols| protocols = p_protocols.split(",")}
 	parser.on("-l", "--log", "Log all requests / DEFAULT: #{log}") {log = true}
-	parser.on("", "--min=LENGTH", "Minimum length of URLs / DEFAULT: #{min}") {|p_length| min = p_length.to_u8}
-	parser.on("", "--max=LENGTH", "Maximum length of URLs / DEFAULT: #{max}") {|p_length| max = p_length.to_u8}
-	parser.on("-s SECOND", "--second=SECOND", "Likelihood of a URL featuring a second-level domain / DEFAULT: #{second}") {|p_second| second = p_second.to_u8}
+	parser.on("", "--min=LENGTH", "Minimum length of URLs / DEFAULT: #{min}") {|p_length| min = p_length.to_i}
+	parser.on("", "--max=LENGTH", "Maximum length of URLs / DEFAULT: #{max}") {|p_length| max = p_length.to_i}
+	parser.on("-s SECOND", "--second=SECOND", "Likelihood of a URL featuring a second-level domain / DEFAULT: #{second}") {|p_second| second = p_second.to_i}
 end

 date = Time.local
 puts "\nI am going to look for websites through #{times} random URLs (min length #{min} and max length #{max} with the following domains: #{domains}"
 puts "These URLs will use the protocols #{protocols}"
-puts "and each of those URLs have #{second} in a 100 chance to have a second level domain."
+puts "and each of those URLs have #{second} in a 100 chance to have a second level domain"
 puts "Started at #{date.hour}h#{date.minute}m\n"

 report_file = "CR_report_#{date.day}#{date.hour}#{date.minute}.json"
--- a/Node.js/index.js
+++ b/Node.js/index.js
@ -5,16 +5,16 @@ const fs = require('fs')
 async function main_loop() {
 	json_object = []
 	for (let i = 0; i < times; i++) {
-		const url = await url_generator(i, times)
+		const url = url_generator(i, times)
 		try {
 			const response = await fetch(url)
 			console.log(`${url} exists!`)
-			json_object.push(`{"website_url":"${url}","response_type":"SUCCESS","response_code":"${String(response.statusCode)}","response_details":"${String(response.statusMessage)}"}`)
+			json_object.push(`{"website_url":"${url}","response_type":"SUCCESS","response_code":"${response.statusCode}","response_details":"${response.statusMessage}"}`)
 		}
 		catch(e) {
 			if (e.code != 'ENOTFOUND') {
 				console.log(`${url} exists!`)
-				json_object.push(`{"website_url":"${url}","response_type":"ERROR","response_code":"${String(e.code)}","response_details":"${String(e.syscall)}"}`)
+				json_object.push(`{"website_url":"${url}","response_type":"ERROR","response_code":"${e.code}","response_details":"${e.syscall}"}`)
 			}
 		}
 	}
@ -27,21 +27,18 @@ function url_generator(num_url, times) {
 	const characters = "abcdefghijklmnopqrstuvwxyz0123456789"
 	const url_length = Math.floor(Math.random() * (maxi - mini) + mini)
 	for (let i = 0; i < url_length; i++) {result += characters.charAt(Math.floor(Math.random() * characters.length))}
-	result += domains[Math.floor(Math.random() * domains.length)]
-	if (Math.floor(Math.random() * (100 - 1) + 1) <= second) result += domains[Math.floor(Math.random() * domains.length)]
+	result += `.${domains[Math.floor(Math.random() * domains.length)]}`
+	if (Math.floor(Math.random() * (100 - 1) + 1) <= second) result += `.${domains[Math.floor(Math.random() * domains.length)]}`
 	if (log) console.log(`${result} (${num_url + 1}/${times})`)
 	return result
 }

 function fetch(url, options = {}) {
 	return new Promise((resolve, reject) => {
-		if (!url) return reject(new Error('URL was not provided')) // Cannot happen; exists just for the sake of it
-
-		const {body, method = 'GET', ...restOptions} = options
 		const client = url.startsWith('https') ? https : http
-		const request = client.request(url, {method, ...restOptions}, (res) => {
+		const request = client.request(url, {method: "GET"}, (res) => {
 			res.setEncoding('utf8')
-			res.on('data', (chunk) => {}) // Do nothing, it must handle receiving data but we do not need the received data
+			res.on('data', () => {}) // Do nothing, deleting this line actually makes the software exit upon finding a website (wtf)
 			res.on('end', () => {resolve({statusCode: res.statusCode, statusMessage: res.statusMessage})})
 		})
 		request.on('error', (err) => {reject(err)})
@ -49,22 +46,23 @@ function fetch(url, options = {}) {
 	})
 }

-const times = process.argv.indexOf('-t') > -1 ? Math.round(Number(process.argv[process.argv.indexOf('-t') + 1])) : 3000
-const protocols = process.argv.indexOf('-p') > -1 ? process.argv[process.argv.indexOf('-p') + 1].split(',') : ['http']
-const domains = process.argv.indexOf('-d') > -1 ? process.argv[process.argv.indexOf('-d') + 1].split(',') : ['.co', '.com', '.net', '.edu', '.gov', '.cn', '.org', '.cc', '.us', '.mil', '.ac', '.it', '.de']
-const second = process.argv.indexOf('-s') > -1 ? Math.round(Number(process.argv[process.argv.indexOf('-s') + 1])) : 1
-const log = process.argv.indexOf('-l') > -1
-const mini = process.argv.indexOf('-min') > -1 ? Math.round(Number(process.argv[process.argv.indexOf('-min') + 1])) : 2
-const maxi = process.argv.indexOf('-max') > -1 ? Math.round(Number(process.argv[process.argv.indexOf('-max') + 1])) : 50
-
-const report_file = "JS_report_" + String(new Date().getUTCDate()) + String(new Date().getHours()) + String(new Date().getMinutes()) + ".json"
+const defaults = require("../defaults.json")
+const times = process.argv.indexOf('-t') > -1 ? Math.round(Number(process.argv[process.argv.indexOf('-t') + 1])) : defaults.times
+const protocols = process.argv.indexOf('-p') > -1 ? process.argv[process.argv.indexOf('-p') + 1].split(',') : defaults.protocols
+const domains = process.argv.indexOf('-d') > -1 ? process.argv[process.argv.indexOf('-d') + 1].split(',') : defaults.domains
+const second = process.argv.indexOf('-s') > -1 ? Math.round(Number(process.argv[process.argv.indexOf('-s') + 1])) : defaults.second
+const log = process.argv.indexOf('-l') > -1 ? true : defaults.log
+const mini = process.argv.indexOf('-min') > -1 ? Math.round(Number(process.argv[process.argv.indexOf('-min') + 1])) : defaults.min
+const maxi = process.argv.indexOf('-max') > -1 ? Math.round(Number(process.argv[process.argv.indexOf('-max') + 1])) : defaults.max

+const date = new Date()
 process.stdout.write(`\nI am going to look for websites through ${times} random URLs (min length ${mini} and max length ${maxi}) with the following domains: `)
 console.log(domains)
 process.stdout.write("These URLs will use the protocols ")
 console.log(protocols)
-console.log(`and each of them have ${second} in a 100 chance to have a second level domain.`)
-console.log('Started at ' + String(new Date().getHours()) + 'h' + String(new Date().getMinutes()) + 'm\n')
+console.log(`and each of them have ${second} in a 100 chance to have a second level domain`)
+console.log("Started at %dh%dm\n", date.getHours(), date.getMinutes())

+const report_file = "JS_report_" + String(date.getUTCDate()) + String(date.getHours()) + String(date.getMinutes()) + ".json"
 fs.open(report_file, "w", function(err) {if (err) throw err})
 main_loop()
--- a/Python/index.py
+++ b/Python/index.py
@ -2,6 +2,7 @@ import sys
 import random
 import datetime
 import requests
+import json

 from requests.packages.urllib3.exceptions import InsecureRequestWarning
 requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
@ -16,13 +17,14 @@ def main_loop():
 			json_object.append('{"website_url":"' + url + '","response_type":"SUCCESS","response_code":"' + str(response.status_code) + '","response_details":"' + str(response.reason) + '"}')
 		except Exception as e: # Exception should always be ConnectionError (**usually** bad) or ReadTimeout (good)
 			# Exception handling seems to be a pain because most errors return ConnectionError, so ConnectionError in itself can mean the website exists OR the website does NOT exist 
-			if "not known" not in str(e).lower() and "no address" not in str(e).lower():
+			err = str(e)
+			if "not known" not in err.lower() and "no address" not in err.lower():
 				print(url + " exists!")
-				err_code = str(e)[str(e).index("[") + 1:str(e).index("]")] if "[" in str(e) and "]" in str(e) else "NO CODE FOUND"
-				json_object.append('{"website_url":"' + url + '","response_type":"ERROR","response_code":"' + err_code + '","response_details":"' + str(e).replace("\\", "").replace('"', "") + '"}')
+				err_code = err[err.index("[")+1 : err.index("]")] if "[" in err and "]" in err else "NO CODE FOUND"
+				json_object.append('{"website_url":"' + url + '","response_type":"ERROR","response_code":"' + err_code + '","response_details":"' + err.replace("\\", "").replace('"', "") + '"}')

-	f.write(str(json_object).replace("'", "").replace("\\", ""))
-	f.close()
+	report_file.write(str(json_object).replace("'", "").replace("\\", ""))
+	report_file.close()
 	print("Finished at " + str(datetime.datetime.now().time())[0:5].replace(":", "h") + "m")

 def url_generator(num_url, times):
@ -30,23 +32,25 @@ def url_generator(num_url, times):
 	characters = "abcdefghijklmnopqrstuvwxyz0123456789"
 	url_length = random.randint(min, max)
 	result += ''.join(random.choice(characters) for i in range(url_length))
-	result += domains[random.randint(0, len(domains) - 1)]
-	if random.randint(1, 100) <= second: result += domains[random.randint(0, len(domains) - 1)]
+	result += f".{domains[random.randint(0, len(domains) - 1)]}"
+	if random.randint(1, 100) <= second: result += ".%s"%(domains[random.randint(0, len(domains) - 1)])
 	if log: print(result +  " (" + str(num_url + 1) + "/" + str(times) + ")")
 	return result

-times = int(sys.argv[sys.argv.index('-t') + 1]) if '-t' in sys.argv else 3000
-protocols = sys.argv[sys.argv.index('-p') + 1].split(",") if '-p' in sys.argv else ['http']
-domains = sys.argv[sys.argv.index('-d') + 1].split(",") if '-d' in sys.argv else ['.co', '.com', '.net', '.edu', '.gov', '.cn', '.org', '.cc', '.us', '.mil', '.ac', '.it', '.de']
-second = int(sys.argv[sys.argv.index('-s') + 1]) if '-s' in sys.argv else 1
-log = '-l' in sys.argv
+defaults = json.load(open("../defaults.json", "rb"))
+times = int(sys.argv[sys.argv.index('-t') + 1]) if '-t' in sys.argv else defaults["times"]
+protocols = sys.argv[sys.argv.index('-p') + 1].split(",") if '-p' in sys.argv else defaults["protocols"]
+domains = sys.argv[sys.argv.index('-d') + 1].split(",") if '-d' in sys.argv else defaults["domains"]
+second = int(sys.argv[sys.argv.index('-s') + 1]) if '-s' in sys.argv else defaults["second"]
+log = True if '-l' in sys.argv else defaults["log"]
 # lmao what if we literally get rid of two built-in functions
-min = int(sys.argv[sys.argv.index('-min') + 1]) if '-min' in sys.argv else 2
-max = int(sys.argv[sys.argv.index('-max') + 1]) if '-max' in sys.argv else 50 # Python cannot look for URLs longer than 50ish, so be careful!
+min = int(sys.argv[sys.argv.index('-min') + 1]) if '-min' in sys.argv else defaults["min"]
+max = int(sys.argv[sys.argv.index('-max') + 1]) if '-max' in sys.argv else defaults["max"] # Avoid >50

+date = datetime.datetime.now()
 print("\nI am going to look for websites through " + str(times) + " random URLs (min length " + str(min) + " and max length " + str(max) + ") with the following domains: " + str(domains))
-print("These URLs will use the protocols " + str(protocols) + " and each of those URLs have " + str(second) + " in 100 chance to have a second level domain.")
-print("Started at " + str(datetime.datetime.now().time())[0:5].replace(":", "h") + "m\n")
+print("These URLs will use the protocols " + str(protocols) + " and each of those URLs have " + str(second) + " in 100 chance to have a second level domain")
+print("Started at " + str(date.time())[0:5].replace(":", "h") + "m\n")

-f = open("PY_report_" + str(datetime.datetime.now().strftime("%d%H%M")) + ".json", "a+")
+report_file = open("PY_report_" + str(date.strftime("%d%H%M")) + ".json", "a+")
 main_loop()
--- a/Ruby/index.rb
+++ b/Ruby/index.rb
@ -25,23 +25,24 @@ def url_generator()
 	result = PROTOCOLS[rand(0...PROTOCOLS.length)] + '://'
 	url_length = rand(MIN..MAX)
 	result += rand(36 ** url_length).to_s(36)
-	result += DOMAINS[rand(0...DOMAINS.length)] if rand(1...100) <= SECOND
-	result += DOMAINS[rand(0...DOMAINS.length)]
+	result += "." + DOMAINS[rand(0...DOMAINS.length)] if rand(1...100) <= SECOND
+	result += "." + DOMAINS[rand(0...DOMAINS.length)]
 end

-TIMES = ARGV.include?('-t') ? ARGV[ARGV.index('-t') + 1].to_i : 3000
-PROTOCOLS = ARGV.include?('-p') ? ARGV[ARGV.index('-p') + 1].split(",") : ['http']
-DOMAINS = ARGV.include?('-d') ? ARGV[ARGV.index('-d') + 1].split(",") : ['.co', '.com', '.net', '.edu', '.gov', '.cn', '.org', '.cc', '.us', '.mil', '.ac', '.it', '.de']
-SECOND = ARGV.include?('-s') ? ARGV[ARGV.index('-s') + 1].to_i : 1
-LOG = ARGV.index('-l').class == Integer
-MIN = ARGV.include?('-min') ? ARGV[ARGV.index('-max') + 1].to_i : 2
-MAX = ARGV.include?('-min') ? ARGV[ARGV.index('-max') + 1].to_i : 50
-
-REPORT_FILE = "RB_report_#{Time.new.day}#{Time.new.hour}#{Time.new.min}.json"
+DEFAULTS = JSON.parse(File.read("../defaults.json"))
+TIMES = ARGV.include?('-t') ? ARGV[ARGV.index('-t') + 1].to_i : DEFAULTS["times"]
+PROTOCOLS = ARGV.include?('-p') ? ARGV[ARGV.index('-p') + 1].split(",") : DEFAULTS["protocols"]
+DOMAINS = ARGV.include?('-d') ? ARGV[ARGV.index('-d') + 1].split(",") : DEFAULTS["domains"]
+SECOND = ARGV.include?('-s') ? ARGV[ARGV.index('-s') + 1].to_i : DEFAULTS["second"]
+LOG = ARGV.include?('-l') ? true : DEFAULTS["log"]
+MIN = ARGV.include?('-min') ? ARGV[ARGV.index('-min') + 1].to_i : DEFAULTS["min"]
+MAX = ARGV.include?('-max') ? ARGV[ARGV.index('-max') + 1].to_i : DEFAULTS["max"]

+DATE = Time.new
 puts("\nI am going to look for websites through #{TIMES} random URLs (min length #{MIN} and max length #{MAX}) with the following domains: #{DOMAINS}")
-puts("These URLs will use the protocols #{PROTOCOLS} and each of those URLs have #{SECOND} in 100 chance to have a second level domain.")
-puts("Started at #{Time.new.hour}h#{Time.new.min}m\n")
+puts("These URLs will use the protocols #{PROTOCOLS} and each of those URLs have #{SECOND} in 100 chance to have a second level domain")
+puts("Started at #{DATE.hour}h#{DATE.min}m\n")

+REPORT_FILE = "RB_report_#{DATE.day}#{DATE.hour}#{DATE.min}.json"
 File.open(REPORT_FILE, 'a+')
 main_loop
--- a/defaults.json
+++ b/defaults.json
@ -0,0 +1,30 @@
+{
+	"times": 2000,
+	"protocols": [
+		"http"
+	],
+	"domains": [
+		"com",
+		"org",
+		"net",
+		"tk",
+		"cn",
+		"de",
+		"ru",
+		"uk",
+		"nl",
+		"ca",
+		"au",
+		"in",
+		"ir",
+		"cc",
+		"tv",
+		"live",
+		"gov",
+		"edu"
+	],
+	"second": 0,
+	"log": false,
+	"min": 2,
+	"max": 15
+}