Cleaned stuff & Added flexibility + second level domains
This commit is contained in:
parent
c509637865
commit
451cc10561
4 changed files with 52 additions and 52 deletions
|
@ -5,7 +5,7 @@ const fs = require('fs')
|
|||
async function main_loop() {
|
||||
json_object = []
|
||||
for (let i = 0; i < times; i++) {
|
||||
const url = await url_generator(domains, mode, log)
|
||||
const url = await url_generator()
|
||||
try {
|
||||
const response = await fetch(url)
|
||||
console.log(`${url} exists!`)
|
||||
|
@ -22,14 +22,13 @@ async function main_loop() {
|
|||
console.log('\nFinished at ' + String(new Date().getHours()) + 'h' + String(new Date().getMinutes()) + 'm')
|
||||
}
|
||||
|
||||
function url_generator(domains, mode, log) {
|
||||
function url_generator() {
|
||||
let result = mode[Math.round(Math.random() * (mode.length - 1))] + "://"
|
||||
const characters = "abcdefghijklmnopqrstuvwxyz0123456789"
|
||||
const url_length = Math.floor(Math.random() * (30 - 2) + 2)
|
||||
for (let i = 0; i < url_length; i++) {
|
||||
result += characters.charAt(Math.floor(Math.random() * characters.length))
|
||||
}
|
||||
const url_length = Math.floor(Math.random() * (maxi - mini) + mini)
|
||||
for (let i = 0; i < url_length; i++) {result += characters.charAt(Math.floor(Math.random() * characters.length))}
|
||||
result += domains[Math.floor(Math.random() * domains.length)]
|
||||
if (Math.floor(Math.random() * (100 - 1) + 1) <= second) result += domains[Math.floor(Math.random() * domains.length)]
|
||||
if (log) console.log(result)
|
||||
return result
|
||||
}
|
||||
|
@ -42,33 +41,29 @@ function fetch(url, options = {}) {
|
|||
const client = url.startsWith('https') ? https : http
|
||||
const request = client.request(url, {method, ...restOptions}, (res) => {
|
||||
res.setEncoding('utf8')
|
||||
let chunks = ''
|
||||
res.on('data', (chunk) => {
|
||||
chunks += chunk
|
||||
})
|
||||
res.on('end', () => {
|
||||
resolve({statusCode: res.statusCode, statusMessage: res.statusMessage})
|
||||
})
|
||||
})
|
||||
request.on('error', (err) => {
|
||||
reject(err)
|
||||
res.on('data', (chunk) => {}) //Do nothing, it must handle receiving data but we do not need the received data
|
||||
res.on('end', () => {resolve({statusCode: res.statusCode, statusMessage: res.statusMessage})})
|
||||
})
|
||||
request.on('error', (err) => {reject(err)})
|
||||
request.end()
|
||||
})
|
||||
}
|
||||
|
||||
const times = process.argv.indexOf('-t') > -1 ? Math.round(Number(process.argv[process.argv.indexOf('-t') + 1])) : 3000
|
||||
if (isNaN(times)) return console.error("-t argument expected a number!")
|
||||
const domains = process.argv.indexOf('-d') > -1 ? process.argv[process.argv.indexOf('-d') + 1].split(',') : ['.co', '.com', '.net', '.edu', '.gov', '.cn', '.org', '.cc']
|
||||
const domains = process.argv.indexOf('-d') > -1 ? process.argv[process.argv.indexOf('-d') + 1].split(',') : ['.co', '.com', '.net', '.edu', '.gov', '.cn', '.org', '.cc', '.us', '.mil', '.ac', '.it', '.de']
|
||||
const mode = process.argv.indexOf('-m') > -1 ? process.argv[process.argv.indexOf('-m') + 1].split(',') : ['http']
|
||||
const log = process.argv.indexOf('-l') > -1
|
||||
const mini = process.argv.indexOf('-MIN') > -1 ? Math.round(Number(process.argv[process.argv.indexOf('-MIN') + 1])) : 2
|
||||
const maxi = process.argv.indexOf('-MAX') > -1 ? Math.round(Number(process.argv[process.argv.indexOf('-MAX') + 1])) : 50
|
||||
const second = process.argv.indexOf('-s') > -1 ? Math.round(Number(process.argv[process.argv.indexOf('-s') + 1])) : 1
|
||||
|
||||
const report_file = "JS_report_" + String(new Date().getUTCDate()) + String(new Date().getHours()) + String(new Date().getMinutes()) + ".json"
|
||||
|
||||
process.stdout.write(`\nI am going to look for images through ${times} random URLs with the following domains: `)
|
||||
process.stdout.write(`\nI am going to look for websites through ${times} random URLs (min length ${mini} and max length ${maxi}) with the following domains: `)
|
||||
console.log(domains)
|
||||
process.stdout.write("These URLs will use the following protocols: ")
|
||||
process.stdout.write("These URLs will use the protocols ")
|
||||
console.log(mode)
|
||||
console.log(`and each of them have ${second} in a 100 chance to have a second level domain.`)
|
||||
console.log('Started at ' + String(new Date().getHours()) + 'h' + String(new Date().getMinutes()) + 'm\n')
|
||||
|
||||
fs.open(report_file, "w", function(err) {if (err) throw err})
|
||||
|
|
|
@ -6,7 +6,7 @@ import urllib.request
|
|||
def main_loop():
|
||||
json_object = []
|
||||
for i in range(times):
|
||||
url = url_generator(domains, log)
|
||||
url = url_generator()
|
||||
try:
|
||||
response = urllib.request.urlopen(url)
|
||||
print(url + " exists!")
|
||||
|
@ -14,35 +14,34 @@ def main_loop():
|
|||
except Exception as e:
|
||||
if "[Errno 11001]" in str(e): continue
|
||||
print(url + " exists!")
|
||||
err_code = str(e)[str(e).index("[") + 1:str(e).index("]")] if "[" in str(e) and "]" in str(e) else "NONE FOUND"
|
||||
err_code = str(e)[str(e).index("[") + 1:str(e).index("]")] if "[" in str(e) and "]" in str(e) else "NO CODE FOUND"
|
||||
json_object.append('{"website_url":"' + url + '","response_type":"ERROR","response_code":"' + err_code + '","response_details":"' + str(e).replace("\\", "") + '"}')
|
||||
|
||||
f.write(str(json_object).replace("'", ""))
|
||||
f.write(str(json_object).replace("'", "").replace("\\", ""))
|
||||
f.close()
|
||||
print("Finished at " + str(datetime.datetime.now().time())[0:5].replace(":", "h") + "m")
|
||||
|
||||
def url_generator(domains, log):
|
||||
def url_generator():
|
||||
result = mode[random.randint(0, len(mode) - 1)] + "://"
|
||||
characters = "abcdefghijklmnopqrstuvwxyz0123456789"
|
||||
url_length = random.randint(2, 30)
|
||||
url_length = random.randint(mini, maxi)
|
||||
result += ''.join(random.choice(characters) for i in range(url_length))
|
||||
result += domains[random.randint(0, len(domains) - 1)]
|
||||
if random.randint(1, 100) <= second: result += domains[random.randint(0, len(domains) - 1)]
|
||||
if log: print(result)
|
||||
return result
|
||||
|
||||
times = sys.argv[sys.argv.index('-t') + 1] if '-t' in sys.argv else 3000
|
||||
try:
|
||||
times = int(times)
|
||||
except:
|
||||
print("-t argument expected a number!")
|
||||
sys.exit()
|
||||
domains = sys.argv[sys.argv.index('-d') + 1] if '-d' in sys.argv else ['.co', '.com', '.net', '.edu', '.gov', '.cn', '.org', '.cc']
|
||||
times = int(sys.argv[sys.argv.index('-t') + 1]) if '-t' in sys.argv else 3000
|
||||
domains = sys.argv[sys.argv.index('-d') + 1].split(",") if '-d' in sys.argv else ['.co', '.com', '.net', '.edu', '.gov', '.cn', '.org', '.cc', '.us', '.mil', '.ac', '.it', '.de']
|
||||
mode = sys.argv[sys.argv.index('-m') + 1].split(",") if '-m' in sys.argv else ['http']
|
||||
log = '-l' in sys.argv
|
||||
mini = int(sys.argv[sys.argv.index('-MIN') + 1]) if '-MIN' in sys.argv else 2
|
||||
maxi = int(sys.argv[sys.argv.index('-MAX') + 1]) if '-MAX' in sys.argv else 50 #Python cannot look for URLs longer than 50ish, so be careful!
|
||||
second = int(sys.argv[sys.argv.index('-s') + 1]) if '-s' in sys.argv else 1
|
||||
|
||||
print("\nI am going to look for images through " + str(times) + " random URLs with the following domains: " + str(domains))
|
||||
print("These URLs use the following protocols: " + str(mode))
|
||||
print("Started at " + str(datetime.datetime.now().time())[0:5].replace(":", "h") + "m")
|
||||
print("\nI am going to look for websites through " + str(times) + " random URLs (min length " + str(mini) + " and max length " + str(maxi) + ") with the following domains: " + str(domains))
|
||||
print("These URLs will use the protocols " + str(mode) + " and each of those URLs have " + str(second) + " in 100 chance to have a second level domain.")
|
||||
print("Started at " + str(datetime.datetime.now().time())[0:5].replace(":", "h") + "m\n")
|
||||
|
||||
f = open("PY_report_" + str(datetime.datetime.now().strftime("%d%H%M")) + ".json", "a+")
|
||||
main_loop()
|
||||
|
|
14
README.md
14
README.md
|
@ -29,18 +29,24 @@ No matter which script, if you wish to use arguments, you are required to use th
|
|||
- "-d" defines all the top-level domains the URLs will use, separated only by a ",".
|
||||
- "-m" defines the application protocol used. Multiple protocols can be defined by separating them with a ",".
|
||||
- "-l" defines by whether or not it is present whether or not all URLs will be logged in the command-line.
|
||||
- "-s" defines how likely it will be that the URLs feature a second level domain, <=0 being impossible and >=100 being always.
|
||||
- "-MIN" defines the minimum length of the URLs.
|
||||
- "-MAX" defines the maximul length of the URLs.
|
||||
|
||||
* "-t" defaults to 3000.
|
||||
* "-d" defaults to a lot of popular top-level domains.
|
||||
* "-m" defaults to "http".
|
||||
* "-l" makes it so URLs will be logged.
|
||||
* "-s" defaults to 1.
|
||||
* "-MIN" defaults to 2.
|
||||
* "-MAX" defaults to 50.
|
||||
|
||||
```sh
|
||||
# To make the Python script go through 3000 URLs in HTTP with various top-level domains without logging:
|
||||
$ index.py
|
||||
|
||||
# To make the Ruby script go through 500 URLs in HTTP and HTTPS with only the .com and .fr top-level domains with logging:
|
||||
$ index.rb -t 500 -m http,https -l -d .com,.fr
|
||||
# To make the Ruby script go through 500 URLs of min length 5 and max length 7 in HTTP and HTTPS with only the .com and .fr top-level domains with a 30% chance for each URL to feature a second level domain with logging:
|
||||
$ index.rb -MAX 7 -t 500 -MIN 5 -m http,https -l -s 30 -d .com,.fr
|
||||
|
||||
# To make the Node.js script go through 3000 URLs in HTTPS with various top-level domains with logging:
|
||||
$ node index.js -m https -l
|
||||
|
@ -53,7 +59,3 @@ A: As far as I am aware, nope! However, the reports are generated differently de
|
|||
|
||||
Q: Why does the "-m" argument defaults to "http" rather than "https"?
|
||||
A: Requests in "http" receive more status codes than error codes compared to "https". I suspect it's because some websites don't support "https" very well, even in the current year.
|
||||
|
||||
## TO DO
|
||||
|
||||
Second-level domains
|
||||
|
|
|
@ -4,7 +4,7 @@ require 'json'
|
|||
def main_loop
|
||||
json_object = []
|
||||
TIMES.times do
|
||||
url = url_generator(DOMAINS, MODE)
|
||||
url = url_generator()
|
||||
puts(url) if LOG
|
||||
begin
|
||||
response = Net::HTTP.get_response(URI(url))
|
||||
|
@ -21,22 +21,26 @@ def main_loop
|
|||
puts("Finished at #{Time.new.hour}h#{Time.new.min}m\n")
|
||||
end
|
||||
|
||||
def url_generator(domains, mode)
|
||||
result = mode[rand(0..mode.length - 1)] + '://'
|
||||
url_length = rand(2..30)
|
||||
def url_generator()
|
||||
result = MODE[rand(0...MODE.length)] + '://'
|
||||
url_length = rand(MINI..MAXI)
|
||||
result += rand(36 ** url_length).to_s(36)
|
||||
result += domains[rand(0..domains.length - 1)]
|
||||
result += DOMAINS[rand(0...DOMAINS.length)] if rand(1...100) <= SECOND
|
||||
result += DOMAINS[rand(0...DOMAINS.length)]
|
||||
end
|
||||
|
||||
TIMES = ARGV.include?('-t') ? ARGV[ARGV.index("-t") + 1].to_i : 3000
|
||||
DOMAINS = ARGV.include?('-d') ? ARGV[ARGV.index("-d") + 1].split(",") : ['.co', '.com', '.net', '.edu', '.gov', '.cn', '.org', '.cc']
|
||||
MODE = ARGV.include?('-m') ? ARGV[ARGV.index("-m") + 1].split(",") : ['http']
|
||||
LOG = ARGV.index("-l").class == Integer
|
||||
TIMES = ARGV.include?('-t') ? ARGV[ARGV.index('-t') + 1].to_i : 3000
|
||||
DOMAINS = ARGV.include?('-d') ? ARGV[ARGV.index('-d') + 1].split(",") : ['.co', '.com', '.net', '.edu', '.gov', '.cn', '.org', '.cc', '.us', '.mil', '.ac', '.it', '.de']
|
||||
MODE = ARGV.include?('-m') ? ARGV[ARGV.index('-m') + 1].split(",") : ['http']
|
||||
LOG = ARGV.index('-l').class == Integer
|
||||
MINI = ARGV.include?('-MIN') ? ARGV[ARGV.index('-MIN') + 1].to_i : 2
|
||||
MAXI = ARGV.include?('-MAX') ? ARGV[ARGV.index('-MAX') + 1].to_i : 50
|
||||
SECOND = ARGV.include?('-s') ? ARGV[ARGV.index('-s') + 1].to_i : 1
|
||||
|
||||
REPORT_FILE = "RB_report_#{Time.new.day}#{Time.new.hour}#{Time.new.min}.json"
|
||||
|
||||
puts("\nI am going to look for images through #{TIMES} random URLs with the following domains: #{DOMAINS}")
|
||||
puts("These URLs will use the following protocols: #{MODE}")
|
||||
puts("\nI am going to look for websites through #{TIMES} random URLs (min length #{MINI} and max length #{MAXI}) with the following domains: #{DOMAINS}")
|
||||
puts("These URLs will use the protocols #{MODE} and each of those URLs have #{SECOND} in 100 chance to have a second level domain.")
|
||||
puts("Started at #{Time.new.hour}h#{Time.new.min}m\n")
|
||||
|
||||
File.open(REPORT_FILE, 'a+')
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue