Cleaned stuff & Added flexibility + second level domains

This commit is contained in:
isterix 2020-07-24 01:37:47 +02:00
parent c509637865
commit 451cc10561
4 changed files with 52 additions and 52 deletions

View file

@ -5,7 +5,7 @@ const fs = require('fs')
async function main_loop() {
json_object = []
for (let i = 0; i < times; i++) {
const url = await url_generator(domains, mode, log)
const url = await url_generator()
try {
const response = await fetch(url)
console.log(`${url} exists!`)
@ -22,14 +22,13 @@ async function main_loop() {
console.log('\nFinished at ' + String(new Date().getHours()) + 'h' + String(new Date().getMinutes()) + 'm')
}
function url_generator(domains, mode, log) {
function url_generator() {
let result = mode[Math.round(Math.random() * (mode.length - 1))] + "://"
const characters = "abcdefghijklmnopqrstuvwxyz0123456789"
const url_length = Math.floor(Math.random() * (30 - 2) + 2)
for (let i = 0; i < url_length; i++) {
result += characters.charAt(Math.floor(Math.random() * characters.length))
}
const url_length = Math.floor(Math.random() * (maxi - mini) + mini)
for (let i = 0; i < url_length; i++) {result += characters.charAt(Math.floor(Math.random() * characters.length))}
result += domains[Math.floor(Math.random() * domains.length)]
if (Math.floor(Math.random() * (100 - 1) + 1) <= second) result += domains[Math.floor(Math.random() * domains.length)]
if (log) console.log(result)
return result
}
@ -42,33 +41,29 @@ function fetch(url, options = {}) {
const client = url.startsWith('https') ? https : http
const request = client.request(url, {method, ...restOptions}, (res) => {
res.setEncoding('utf8')
let chunks = ''
res.on('data', (chunk) => {
chunks += chunk
})
res.on('end', () => {
resolve({statusCode: res.statusCode, statusMessage: res.statusMessage})
})
})
request.on('error', (err) => {
reject(err)
res.on('data', (chunk) => {}) //Do nothing, it must handle receiving data but we do not need the received data
res.on('end', () => {resolve({statusCode: res.statusCode, statusMessage: res.statusMessage})})
})
request.on('error', (err) => {reject(err)})
request.end()
})
}
const times = process.argv.indexOf('-t') > -1 ? Math.round(Number(process.argv[process.argv.indexOf('-t') + 1])) : 3000
if (isNaN(times)) return console.error("-t argument expected a number!")
const domains = process.argv.indexOf('-d') > -1 ? process.argv[process.argv.indexOf('-d') + 1].split(',') : ['.co', '.com', '.net', '.edu', '.gov', '.cn', '.org', '.cc']
const domains = process.argv.indexOf('-d') > -1 ? process.argv[process.argv.indexOf('-d') + 1].split(',') : ['.co', '.com', '.net', '.edu', '.gov', '.cn', '.org', '.cc', '.us', '.mil', '.ac', '.it', '.de']
const mode = process.argv.indexOf('-m') > -1 ? process.argv[process.argv.indexOf('-m') + 1].split(',') : ['http']
const log = process.argv.indexOf('-l') > -1
const mini = process.argv.indexOf('-MIN') > -1 ? Math.round(Number(process.argv[process.argv.indexOf('-MIN') + 1])) : 2
const maxi = process.argv.indexOf('-MAX') > -1 ? Math.round(Number(process.argv[process.argv.indexOf('-MAX') + 1])) : 50
const second = process.argv.indexOf('-s') > -1 ? Math.round(Number(process.argv[process.argv.indexOf('-s') + 1])) : 1
const report_file = "JS_report_" + String(new Date().getUTCDate()) + String(new Date().getHours()) + String(new Date().getMinutes()) + ".json"
process.stdout.write(`\nI am going to look for images through ${times} random URLs with the following domains: `)
process.stdout.write(`\nI am going to look for websites through ${times} random URLs (min length ${mini} and max length ${maxi}) with the following domains: `)
console.log(domains)
process.stdout.write("These URLs will use the following protocols: ")
process.stdout.write("These URLs will use the protocols ")
console.log(mode)
console.log(`and each of them have ${second} in a 100 chance to have a second level domain.`)
console.log('Started at ' + String(new Date().getHours()) + 'h' + String(new Date().getMinutes()) + 'm\n')
fs.open(report_file, "w", function(err) {if (err) throw err})

View file

@ -6,7 +6,7 @@ import urllib.request
def main_loop():
json_object = []
for i in range(times):
url = url_generator(domains, log)
url = url_generator()
try:
response = urllib.request.urlopen(url)
print(url + " exists!")
@ -14,35 +14,34 @@ def main_loop():
except Exception as e:
if "[Errno 11001]" in str(e): continue
print(url + " exists!")
err_code = str(e)[str(e).index("[") + 1:str(e).index("]")] if "[" in str(e) and "]" in str(e) else "NONE FOUND"
err_code = str(e)[str(e).index("[") + 1:str(e).index("]")] if "[" in str(e) and "]" in str(e) else "NO CODE FOUND"
json_object.append('{"website_url":"' + url + '","response_type":"ERROR","response_code":"' + err_code + '","response_details":"' + str(e).replace("\\", "") + '"}')
f.write(str(json_object).replace("'", ""))
f.write(str(json_object).replace("'", "").replace("\\", ""))
f.close()
print("Finished at " + str(datetime.datetime.now().time())[0:5].replace(":", "h") + "m")
def url_generator(domains, log):
def url_generator():
result = mode[random.randint(0, len(mode) - 1)] + "://"
characters = "abcdefghijklmnopqrstuvwxyz0123456789"
url_length = random.randint(2, 30)
url_length = random.randint(mini, maxi)
result += ''.join(random.choice(characters) for i in range(url_length))
result += domains[random.randint(0, len(domains) - 1)]
if random.randint(1, 100) <= second: result += domains[random.randint(0, len(domains) - 1)]
if log: print(result)
return result
times = sys.argv[sys.argv.index('-t') + 1] if '-t' in sys.argv else 3000
try:
times = int(times)
except:
print("-t argument expected a number!")
sys.exit()
domains = sys.argv[sys.argv.index('-d') + 1] if '-d' in sys.argv else ['.co', '.com', '.net', '.edu', '.gov', '.cn', '.org', '.cc']
times = int(sys.argv[sys.argv.index('-t') + 1]) if '-t' in sys.argv else 3000
domains = sys.argv[sys.argv.index('-d') + 1].split(",") if '-d' in sys.argv else ['.co', '.com', '.net', '.edu', '.gov', '.cn', '.org', '.cc', '.us', '.mil', '.ac', '.it', '.de']
mode = sys.argv[sys.argv.index('-m') + 1].split(",") if '-m' in sys.argv else ['http']
log = '-l' in sys.argv
mini = int(sys.argv[sys.argv.index('-MIN') + 1]) if '-MIN' in sys.argv else 2
maxi = int(sys.argv[sys.argv.index('-MAX') + 1]) if '-MAX' in sys.argv else 50 #Python cannot look for URLs longer than 50ish, so be careful!
second = int(sys.argv[sys.argv.index('-s') + 1]) if '-s' in sys.argv else 1
print("\nI am going to look for images through " + str(times) + " random URLs with the following domains: " + str(domains))
print("These URLs use the following protocols: " + str(mode))
print("Started at " + str(datetime.datetime.now().time())[0:5].replace(":", "h") + "m")
print("\nI am going to look for websites through " + str(times) + " random URLs (min length " + str(mini) + " and max length " + str(maxi) + ") with the following domains: " + str(domains))
print("These URLs will use the protocols " + str(mode) + " and each of those URLs have " + str(second) + " in 100 chance to have a second level domain.")
print("Started at " + str(datetime.datetime.now().time())[0:5].replace(":", "h") + "m\n")
f = open("PY_report_" + str(datetime.datetime.now().strftime("%d%H%M")) + ".json", "a+")
main_loop()

View file

@ -29,18 +29,24 @@ No matter which script, if you wish to use arguments, you are required to use th
- "-d" defines all the top-level domains the URLs will use, separated only by a ",".
- "-m" defines the application protocol used. Multiple protocols can be defined by separating them with a ",".
- "-l" defines by whether or not it is present whether or not all URLs will be logged in the command-line.
- "-s" defines how likely it will be that the URLs feature a second level domain, <=0 being impossible and >=100 being always.
- "-MIN" defines the minimum length of the URLs.
- "-MAX" defines the maximul length of the URLs.
* "-t" defaults to 3000.
* "-d" defaults to a lot of popular top-level domains.
* "-m" defaults to "http".
* "-l" makes it so URLs will be logged.
* "-s" defaults to 1.
* "-MIN" defaults to 2.
* "-MAX" defaults to 50.
```sh
# To make the Python script go through 3000 URLs in HTTP with various top-level domains without logging:
$ index.py
# To make the Ruby script go through 500 URLs in HTTP and HTTPS with only the .com and .fr top-level domains with logging:
$ index.rb -t 500 -m http,https -l -d .com,.fr
# To make the Ruby script go through 500 URLs of min length 5 and max length 7 in HTTP and HTTPS with only the .com and .fr top-level domains with a 30% chance for each URL to feature a second level domain with logging:
$ index.rb -MAX 7 -t 500 -MIN 5 -m http,https -l -s 30 -d .com,.fr
# To make the Node.js script go through 3000 URLs in HTTPS with various top-level domains with logging:
$ node index.js -m https -l
@ -53,7 +59,3 @@ A: As far as I am aware, nope! However, the reports are generated differently de
Q: Why does the "-m" argument defaults to "http" rather than "https"?
A: Requests in "http" receive more status codes than error codes compared to "https". I suspect it's because some websites don't support "https" very well, even in the current year.
## TO DO
Second-level domains

View file

@ -4,7 +4,7 @@ require 'json'
def main_loop
json_object = []
TIMES.times do
url = url_generator(DOMAINS, MODE)
url = url_generator()
puts(url) if LOG
begin
response = Net::HTTP.get_response(URI(url))
@ -21,22 +21,26 @@ def main_loop
puts("Finished at #{Time.new.hour}h#{Time.new.min}m\n")
end
def url_generator(domains, mode)
result = mode[rand(0..mode.length - 1)] + '://'
url_length = rand(2..30)
def url_generator()
result = MODE[rand(0...MODE.length)] + '://'
url_length = rand(MINI..MAXI)
result += rand(36 ** url_length).to_s(36)
result += domains[rand(0..domains.length - 1)]
result += DOMAINS[rand(0...DOMAINS.length)] if rand(1...100) <= SECOND
result += DOMAINS[rand(0...DOMAINS.length)]
end
TIMES = ARGV.include?('-t') ? ARGV[ARGV.index("-t") + 1].to_i : 3000
DOMAINS = ARGV.include?('-d') ? ARGV[ARGV.index("-d") + 1].split(",") : ['.co', '.com', '.net', '.edu', '.gov', '.cn', '.org', '.cc']
MODE = ARGV.include?('-m') ? ARGV[ARGV.index("-m") + 1].split(",") : ['http']
LOG = ARGV.index("-l").class == Integer
TIMES = ARGV.include?('-t') ? ARGV[ARGV.index('-t') + 1].to_i : 3000
DOMAINS = ARGV.include?('-d') ? ARGV[ARGV.index('-d') + 1].split(",") : ['.co', '.com', '.net', '.edu', '.gov', '.cn', '.org', '.cc', '.us', '.mil', '.ac', '.it', '.de']
MODE = ARGV.include?('-m') ? ARGV[ARGV.index('-m') + 1].split(",") : ['http']
LOG = ARGV.index('-l').class == Integer
MINI = ARGV.include?('-MIN') ? ARGV[ARGV.index('-MIN') + 1].to_i : 2
MAXI = ARGV.include?('-MAX') ? ARGV[ARGV.index('-MAX') + 1].to_i : 50
SECOND = ARGV.include?('-s') ? ARGV[ARGV.index('-s') + 1].to_i : 1
REPORT_FILE = "RB_report_#{Time.new.day}#{Time.new.hour}#{Time.new.min}.json"
puts("\nI am going to look for images through #{TIMES} random URLs with the following domains: #{DOMAINS}")
puts("These URLs will use the following protocols: #{MODE}")
puts("\nI am going to look for websites through #{TIMES} random URLs (min length #{MINI} and max length #{MAXI}) with the following domains: #{DOMAINS}")
puts("These URLs will use the protocols #{MODE} and each of those URLs have #{SECOND} in 100 chance to have a second level domain.")
puts("Started at #{Time.new.hour}h#{Time.new.min}m\n")
File.open(REPORT_FILE, 'a+')