Cleaned stuff & Added flexibility + second level domains

2020-07-24 01:37:47 +02:00 · 2020-07-24 01:37:47 +02:00 · 451cc10561
commit 451cc10561
parent c509637865
4 changed files with 52 additions and 52 deletions
--- a/Node.js/index.js
+++ b/Node.js/index.js
@ -5,7 +5,7 @@ const fs = require('fs')
 async function main_loop() {
 	json_object = []
 	for (let i = 0; i < times; i++) {
-		const url = await url_generator(domains, mode, log)
+		const url = await url_generator()
 		try {
 			const response = await fetch(url)
 			console.log(`${url} exists!`)
@ -22,14 +22,13 @@ async function main_loop() {
 	console.log('\nFinished at ' + String(new Date().getHours()) + 'h' + String(new Date().getMinutes()) + 'm')
 }

-function url_generator(domains, mode, log) {
+function url_generator() {
 	let result = mode[Math.round(Math.random() * (mode.length - 1))] + "://"
 	const characters = "abcdefghijklmnopqrstuvwxyz0123456789"
-	const url_length = Math.floor(Math.random() * (30 - 2) + 2)
-	for (let i = 0; i < url_length; i++) {
-		result += characters.charAt(Math.floor(Math.random() * characters.length))
-	}
+	const url_length = Math.floor(Math.random() * (maxi - mini) + mini)
+	for (let i = 0; i < url_length; i++) {result += characters.charAt(Math.floor(Math.random() * characters.length))}
 	result += domains[Math.floor(Math.random() * domains.length)]
+	if (Math.floor(Math.random() * (100 - 1) + 1) <= second) result += domains[Math.floor(Math.random() * domains.length)]
 	if (log) console.log(result)
 	return result
 }
@ -42,33 +41,29 @@ function fetch(url, options = {}) {
 		const client = url.startsWith('https') ? https : http
 		const request = client.request(url, {method, ...restOptions}, (res) => {
 			res.setEncoding('utf8')
-			let chunks = ''
-			res.on('data', (chunk) => {
-				chunks += chunk
-			})
-			res.on('end', () => {
-				resolve({statusCode: res.statusCode, statusMessage: res.statusMessage})
-			})
-		})
-		request.on('error', (err) => {
-			reject(err)
+			res.on('data', (chunk) => {}) //Do nothing, it must handle receiving data but we do not need the received data
+			res.on('end', () => {resolve({statusCode: res.statusCode, statusMessage: res.statusMessage})})
 		})
+		request.on('error', (err) => {reject(err)})
 		request.end()
 	})
 }

 const times = process.argv.indexOf('-t') > -1 ? Math.round(Number(process.argv[process.argv.indexOf('-t') + 1])) : 3000
-if (isNaN(times)) return console.error("-t argument expected a number!")
-const domains = process.argv.indexOf('-d') > -1 ? process.argv[process.argv.indexOf('-d') + 1].split(',') : ['.co', '.com', '.net', '.edu', '.gov', '.cn', '.org', '.cc']
+const domains = process.argv.indexOf('-d') > -1 ? process.argv[process.argv.indexOf('-d') + 1].split(',') : ['.co', '.com', '.net', '.edu', '.gov', '.cn', '.org', '.cc', '.us', '.mil', '.ac', '.it', '.de']
 const mode = process.argv.indexOf('-m') > -1 ? process.argv[process.argv.indexOf('-m') + 1].split(',') : ['http']
 const log = process.argv.indexOf('-l') > -1
+const mini = process.argv.indexOf('-MIN') > -1 ? Math.round(Number(process.argv[process.argv.indexOf('-MIN') + 1])) : 2
+const maxi = process.argv.indexOf('-MAX') > -1 ? Math.round(Number(process.argv[process.argv.indexOf('-MAX') + 1])) : 50
+const second = process.argv.indexOf('-s') > -1 ? Math.round(Number(process.argv[process.argv.indexOf('-s') + 1])) : 1

 const report_file = "JS_report_" + String(new Date().getUTCDate()) + String(new Date().getHours()) + String(new Date().getMinutes()) + ".json"

-process.stdout.write(`\nI am going to look for images through ${times} random URLs with the following domains: `)
+process.stdout.write(`\nI am going to look for websites through ${times} random URLs (min length ${mini} and max length ${maxi}) with the following domains: `)
 console.log(domains)
-process.stdout.write("These URLs will use the following protocols: ")
+process.stdout.write("These URLs will use the protocols ")
 console.log(mode)
+console.log(`and each of them have ${second} in a 100 chance to have a second level domain.`)
 console.log('Started at ' + String(new Date().getHours()) + 'h' + String(new Date().getMinutes()) + 'm\n')

 fs.open(report_file, "w", function(err) {if (err) throw err})
--- a/Python/index.py
+++ b/Python/index.py
@ -6,7 +6,7 @@ import urllib.request
 def main_loop():
 	json_object = []
 	for i in range(times):
-		url = url_generator(domains, log)
+		url = url_generator()
 		try:
 			response = urllib.request.urlopen(url)
 			print(url + " exists!")
@ -14,35 +14,34 @@ def main_loop():
 		except Exception as e:
 			if "[Errno 11001]" in str(e): continue
 			print(url + " exists!")
-			err_code = str(e)[str(e).index("[") + 1:str(e).index("]")] if "[" in str(e) and "]" in str(e) else "NONE FOUND"
+			err_code = str(e)[str(e).index("[") + 1:str(e).index("]")] if "[" in str(e) and "]" in str(e) else "NO CODE FOUND"
 			json_object.append('{"website_url":"' + url + '","response_type":"ERROR","response_code":"' + err_code + '","response_details":"' + str(e).replace("\\", "") + '"}')

-	f.write(str(json_object).replace("'", ""))
+	f.write(str(json_object).replace("'", "").replace("\\", ""))
 	f.close()
 	print("Finished at " + str(datetime.datetime.now().time())[0:5].replace(":", "h") + "m")

-def url_generator(domains, log):
+def url_generator():
 	result = mode[random.randint(0, len(mode) - 1)] + "://"
 	characters = "abcdefghijklmnopqrstuvwxyz0123456789"
-	url_length = random.randint(2, 30)
+	url_length = random.randint(mini, maxi)
 	result += ''.join(random.choice(characters) for i in range(url_length))
 	result += domains[random.randint(0, len(domains) - 1)]
+	if random.randint(1, 100) <= second: result += domains[random.randint(0, len(domains) - 1)]
 	if log: print(result)
 	return result

-times = sys.argv[sys.argv.index('-t') + 1] if '-t' in sys.argv else 3000
-try:
-	times = int(times)
-except:
-	print("-t argument expected a number!")
-	sys.exit()
-domains = sys.argv[sys.argv.index('-d') + 1] if '-d' in sys.argv else ['.co', '.com', '.net', '.edu', '.gov', '.cn', '.org', '.cc']
+times = int(sys.argv[sys.argv.index('-t') + 1]) if '-t' in sys.argv else 3000
+domains = sys.argv[sys.argv.index('-d') + 1].split(",") if '-d' in sys.argv else ['.co', '.com', '.net', '.edu', '.gov', '.cn', '.org', '.cc', '.us', '.mil', '.ac', '.it', '.de']
 mode = sys.argv[sys.argv.index('-m') + 1].split(",") if '-m' in sys.argv else ['http']
 log = '-l' in sys.argv
+mini = int(sys.argv[sys.argv.index('-MIN') + 1]) if '-MIN' in sys.argv else 2
+maxi = int(sys.argv[sys.argv.index('-MAX') + 1]) if '-MAX' in sys.argv else 50 #Python cannot look for URLs longer than 50ish, so be careful!
+second = int(sys.argv[sys.argv.index('-s') + 1]) if '-s' in sys.argv else 1

-print("\nI am going to look for images through " + str(times) + " random URLs with the following domains: " + str(domains))
-print("These URLs use the following protocols: " + str(mode))
-print("Started at " + str(datetime.datetime.now().time())[0:5].replace(":", "h") + "m")
+print("\nI am going to look for websites through " + str(times) + " random URLs (min length " + str(mini) + " and max length " + str(maxi) + ") with the following domains: " + str(domains))
+print("These URLs will use the protocols " + str(mode) + " and each of those URLs have " + str(second) + " in 100 chance to have a second level domain.")
+print("Started at " + str(datetime.datetime.now().time())[0:5].replace(":", "h") + "m\n")

 f = open("PY_report_" + str(datetime.datetime.now().strftime("%d%H%M")) + ".json", "a+")
 main_loop()
--- a/README.md
+++ b/README.md
@ -29,18 +29,24 @@ No matter which script, if you wish to use arguments, you are required to use th
 - "-d" defines all the top-level domains the URLs will use, separated only by a ",".
 - "-m" defines the application protocol used. Multiple protocols can be defined by separating them with a ",".
 - "-l" defines by whether or not it is present whether or not all URLs will be logged in the command-line.
+- "-s" defines how likely it will be that the URLs feature a second level domain, <=0 being impossible and >=100 being always.
+- "-MIN" defines the minimum length of the URLs.
+- "-MAX" defines the maximul length of the URLs.

 * "-t" defaults to 3000.
 * "-d" defaults to a lot of popular top-level domains.
 * "-m" defaults to "http".
 * "-l" makes it so URLs will be logged.
+* "-s" defaults to 1.
+* "-MIN" defaults to 2.
+* "-MAX" defaults to 50.

 ```sh
 # To make the Python script go through 3000 URLs in HTTP with various top-level domains without logging:
 $ index.py

-# To make the Ruby script go through 500 URLs in HTTP and HTTPS with only the .com and .fr top-level domains with logging:
-$ index.rb -t 500 -m http,https -l -d .com,.fr
+# To make the Ruby script go through 500 URLs of min length 5 and max length 7 in HTTP and HTTPS with only the .com and .fr top-level domains with a 30% chance for each URL to feature a second level domain with logging:
+$ index.rb -MAX 7 -t 500 -MIN 5 -m http,https -l -s 30 -d .com,.fr

 # To make the Node.js script go through 3000 URLs in HTTPS with various top-level domains with logging:
 $ node index.js -m https -l
@ -53,7 +59,3 @@ A: As far as I am aware, nope! However, the reports are generated differently de

 Q: Why does the "-m" argument defaults to "http" rather than "https"?  
 A: Requests in "http" receive more status codes than error codes compared to "https". I suspect it's because some websites don't support "https" very well, even in the current year.
-
-## TO DO
-
-Second-level domains
--- a/Ruby/index.rb
+++ b/Ruby/index.rb
@ -4,7 +4,7 @@ require 'json'
 def main_loop
 	json_object = []
 	TIMES.times do
-		url = url_generator(DOMAINS, MODE)
+		url = url_generator()
 		puts(url) if LOG
 		begin
 			response = Net::HTTP.get_response(URI(url))
@ -21,22 +21,26 @@ def main_loop
 	puts("Finished at #{Time.new.hour}h#{Time.new.min}m\n")
 end

-def url_generator(domains, mode)
-	result = mode[rand(0..mode.length - 1)] + '://'
-	url_length = rand(2..30)
+def url_generator()
+	result = MODE[rand(0...MODE.length)] + '://'
+	url_length = rand(MINI..MAXI)
 	result += rand(36 ** url_length).to_s(36)
-	result += domains[rand(0..domains.length - 1)]
+	result += DOMAINS[rand(0...DOMAINS.length)] if rand(1...100) <= SECOND
+	result += DOMAINS[rand(0...DOMAINS.length)]
 end

-TIMES = ARGV.include?('-t') ? ARGV[ARGV.index("-t") + 1].to_i : 3000
-DOMAINS = ARGV.include?('-d') ? ARGV[ARGV.index("-d") + 1].split(",") : ['.co', '.com', '.net', '.edu', '.gov', '.cn', '.org', '.cc']
-MODE = ARGV.include?('-m') ? ARGV[ARGV.index("-m") + 1].split(",") : ['http']
-LOG = ARGV.index("-l").class == Integer
+TIMES = ARGV.include?('-t') ? ARGV[ARGV.index('-t') + 1].to_i : 3000
+DOMAINS = ARGV.include?('-d') ? ARGV[ARGV.index('-d') + 1].split(",") : ['.co', '.com', '.net', '.edu', '.gov', '.cn', '.org', '.cc', '.us', '.mil', '.ac', '.it', '.de']
+MODE = ARGV.include?('-m') ? ARGV[ARGV.index('-m') + 1].split(",") : ['http']
+LOG = ARGV.index('-l').class == Integer
+MINI = ARGV.include?('-MIN') ? ARGV[ARGV.index('-MIN') + 1].to_i : 2
+MAXI = ARGV.include?('-MAX') ? ARGV[ARGV.index('-MAX') + 1].to_i : 50
+SECOND = ARGV.include?('-s') ? ARGV[ARGV.index('-s') + 1].to_i : 1

 REPORT_FILE = "RB_report_#{Time.new.day}#{Time.new.hour}#{Time.new.min}.json"

-puts("\nI am going to look for images through #{TIMES} random URLs with the following domains: #{DOMAINS}")
-puts("These URLs will use the following protocols: #{MODE}")
+puts("\nI am going to look for websites through #{TIMES} random URLs (min length #{MINI} and max length #{MAXI}) with the following domains: #{DOMAINS}")
+puts("These URLs will use the protocols #{MODE} and each of those URLs have #{SECOND} in 100 chance to have a second level domain.")
 puts("Started at #{Time.new.hour}h#{Time.new.min}m\n")

 File.open(REPORT_FILE, 'a+')