Cleaned stuff up
This commit is contained in:
parent
0613fb7fc4
commit
a8bda4704a
14 changed files with 215 additions and 160 deletions
|
@ -12,17 +12,17 @@ class WebRequests
|
|||
public static void Main(string[] args)
|
||||
{
|
||||
int times = Array.IndexOf(args, "-t") > -1 ? int.Parse(args[Array.IndexOf(args, "-t") + 1]) : 3000;
|
||||
string[] protocols = Array.IndexOf(args, "-p") > -1 ? args[Array.IndexOf(args, "-p") + 1].Split(",") : new string[]{"http"};
|
||||
string[] domains = Array.IndexOf(args, "-d") > -1 ? args[Array.IndexOf(args, "-d") + 1].Split(",") : new string[]{".co", ".com", ".net", ".edu", ".gov", ".cn", ".org", ".cc", ".us", ".mil", ".ac", ".it", ".de"};
|
||||
string[] mode = Array.IndexOf(args, "-m") > -1 ? args[Array.IndexOf(args, "-m") + 1].Split(",") : new string[]{"http"};
|
||||
bool log = Array.IndexOf(args, "-l") > -1;
|
||||
int mini = Array.IndexOf(args, "-MIN") > -1 ? int.Parse(args[Array.IndexOf(args, "-MIN") + 1]) : 2;
|
||||
int maxi = Array.IndexOf(args, "-MAX") > -1 ? int.Parse(args[Array.IndexOf(args, "-MAX") + 1]) : 50;
|
||||
int second = Array.IndexOf(args, "-s") > -1 ? int.Parse(args[Array.IndexOf(args, "-s") + 1]) : 1;
|
||||
bool log = Array.IndexOf(args, "-l") > -1;
|
||||
int min = Array.IndexOf(args, "-MIN") > -1 ? int.Parse(args[Array.IndexOf(args, "-MIN") + 1]) : 2;
|
||||
int max = Array.IndexOf(args, "-MAX") > -1 ? int.Parse(args[Array.IndexOf(args, "-MAX") + 1]) : 50;
|
||||
|
||||
DateTime time = DateTime.Now;
|
||||
|
||||
Console.WriteLine($"\nI am going to look for websites through {times} random URLs (min length {mini} and max length {maxi}) with the following domains: {String.Join(", ", domains)}");
|
||||
Console.WriteLine($"These URLs will use the protocols {String.Join(", ", mode)} and each of them have {second} in a 100 chance to have a second level domain.");
|
||||
Console.WriteLine($"\nI am going to look for websites through {times} random URLs (min length {min} and max length {max}) with the following domains: {String.Join(", ", domains)}");
|
||||
Console.WriteLine($"These URLs will use the protocols {String.Join(", ", protocols)} and each of those URLs have {second} in a 100 chance to have a second level domain.");
|
||||
Console.WriteLine($"Started at {time.Hour}h{time.Minute}m\n");
|
||||
|
||||
List<data> _data = new List<data>();
|
||||
|
@ -30,7 +30,7 @@ class WebRequests
|
|||
for (int i = 0; i < times; i++)
|
||||
{
|
||||
|
||||
string url = RandomURL(domains, mode, mini, maxi, second);
|
||||
string url = RandomURL(domains, protocols, min, max, second);
|
||||
if (log) Console.WriteLine($"{url} ({i+1}/{times})");
|
||||
|
||||
try
|
||||
|
@ -85,12 +85,12 @@ class WebRequests
|
|||
}
|
||||
|
||||
private static Random random = new Random();
|
||||
public static string RandomURL(string[] d, string[] m, int mini, int maxi, int second)
|
||||
public static string RandomURL(string[] d, string[] p, int min, int max, int second)
|
||||
{
|
||||
const string chars = "abcdefghijklmnopqrstuvwyxz0123456789";
|
||||
|
||||
string full_url = m[random.Next(m.Length)] + "://"; // Mode (http/https)
|
||||
full_url += new string (Enumerable.Repeat(chars, random.Next(mini, maxi))
|
||||
string full_url = p[random.Next(m.Length)] + "://"; // protocols (http/https)
|
||||
full_url += new string (Enumerable.Repeat(chars, random.Next(min, max))
|
||||
.Select(s => s[random.Next(s.Length)]).ToArray()); // Domain name (abc69)
|
||||
full_url += d[random.Next(d.Length)]; // Top-level domain (.fr)
|
||||
if (random.Next(100) <= second) full_url += d[random.Next(d.Length)]; // Second-level domain (.co)
|
||||
|
|
26
Crystal/README.md
Normal file
26
Crystal/README.md
Normal file
|
@ -0,0 +1,26 @@
|
|||
# Website-Finder: Crystal
|
||||
|
||||
## REQUIREMENT
|
||||
|
||||
[Crystal](https://crystal-lang.org)
|
||||
|
||||
## HOW TO RUN
|
||||
|
||||
Please note that using arguments with the Crystal script is slightly different from using arguments with other scripts, due to how Crystal works
|
||||
|
||||
To use arguments, you will need to add " -- " (without the ") between the name of the file and the first argument
|
||||
|
||||
In the Crystal script, the "-min" argument has been replaced by the "--min" argument, same for "-max" being replaced by "--max"
|
||||
|
||||
For both "--min" and "--max", you'll need to use an equal sign "=" instead of a space " " before the value
|
||||
|
||||
```sh
|
||||
$ crystal run index.cr
|
||||
|
||||
# To make the script go through 1000 URLs, each of maximum length 20:
|
||||
$ crystal run index.cr -- -t 1000 --max=20
|
||||
```
|
||||
|
||||
## OTHER STUFF
|
||||
|
||||
More details are available on [the readme in the root folder](../README.md)
|
|
@ -3,12 +3,12 @@ require "http"
|
|||
require "uri"
|
||||
require "json"
|
||||
|
||||
def main_loop(times, domains, mode, log, mini, maxi, second, report_file)
|
||||
def main_loop(times, domains, protocols, log, min, max, second, report_file)
|
||||
json_text = JSON.build do |json|
|
||||
json.array do
|
||||
i = 0
|
||||
while i < times
|
||||
url = url_generator(domains, mode, mini.as(UInt8), maxi.as(UInt8), second)
|
||||
url = url_generator(domains, protocols, min.as(UInt8), max.as(UInt8), second)
|
||||
puts "#{url} (#{i + 1}/#{times})" if log
|
||||
client = HTTP::Client.new(URI.parse url)
|
||||
client.connect_timeout = 40.seconds
|
||||
|
@ -18,7 +18,7 @@ def main_loop(times, domains, mode, log, mini, maxi, second, report_file)
|
|||
json.object do
|
||||
json.field "website_url", url
|
||||
json.field "response_type", "SUCCESS"
|
||||
json.field "response_code", response.status_code
|
||||
json.field "response_code", "#{response.status_code}"
|
||||
json.field "response_details", HTTP::Status.new(response.status_code)
|
||||
end
|
||||
rescue e : Socket::Addrinfo::Error
|
||||
|
@ -44,10 +44,10 @@ def main_loop(times, domains, mode, log, mini, maxi, second, report_file)
|
|||
puts "\nFinished at #{end_date.hour}h#{end_date.minute}m"
|
||||
end
|
||||
|
||||
def url_generator(domains, mode, mini, maxi, second)
|
||||
def url_generator(domains, protocols, min, max, second)
|
||||
result = String.build do |str|
|
||||
str << mode[Random.rand(mode.size)] + "://"
|
||||
url_length = Random.rand(mini..maxi)
|
||||
str << protocols[Random.rand(protocols.size)] + "://"
|
||||
url_length = Random.rand(min..max)
|
||||
characters = "abcdefghijklmnopqrstuvwxyz0123456789"
|
||||
i = 0
|
||||
while i < url_length
|
||||
|
@ -57,17 +57,16 @@ def url_generator(domains, mode, mini, maxi, second)
|
|||
str << domains[Random.rand(domains.size)] if Random.rand(1..100) <= second
|
||||
str << domains[Random.rand(domains.size)]
|
||||
end
|
||||
puts result
|
||||
result
|
||||
end
|
||||
|
||||
times = UInt32.new "3000"
|
||||
protocols = ["http"]
|
||||
domains = [".co", ".com", ".net", ".edu", ".gov", ".cn", ".org", ".cc", ".us", ".mil", ".ac", ".it", ".de"]
|
||||
mode = ["http"]
|
||||
log = false
|
||||
mini = UInt8.new "3"
|
||||
maxi = UInt8.new "50"
|
||||
second = UInt8.new "1"
|
||||
log = false
|
||||
min = UInt8.new "2"
|
||||
max = UInt8.new "50"
|
||||
|
||||
OptionParser.parse do |parser|
|
||||
parser.banner = "Website-Finder"
|
||||
|
@ -77,18 +76,18 @@ OptionParser.parse do |parser|
|
|||
end
|
||||
parser.on("-t TIMES", "--times=TIMES", "Number of requests / DEFAULT: #{times}") {|p_times| times = p_times.to_u32}
|
||||
parser.on("-d DOMAINS", "--domains=DOMAINS", "Domains used in URLS, like: .com,.net,.gov / DEFAULT: #{domains}") {|p_domains| domains = p_domains.split(",")}
|
||||
parser.on("-m MODE", "--modes=MODES", "You may choose between: http | https | http,https / DEFAULT: #{mode}") {|p_modes| mode = p_modes.split(",")}
|
||||
parser.on("-p protocols", "--protocols=PROTOCOLS", "You may choose between: http | https | http,https / DEFAULT: #{protocols}") {|p_protocols| protocols = p_protocols.split(",")}
|
||||
parser.on("-l", "--log", "Log all requests / DEFAULT: #{log}") {log = true}
|
||||
parser.on("", "--MIN=LENGTH", "Minimum length of URLs / DEFAULT: #{mini}") {|p_length| mini = p_length.to_u8}
|
||||
parser.on("", "--MAX=LENGTH", "Maximum length of URLs / DEFAULT: #{maxi}") {|p_length| maxi = p_length.to_u8}
|
||||
parser.on("", "--min=LENGTH", "Minimum length of URLs / DEFAULT: #{min}") {|p_length| min = p_length.to_u8}
|
||||
parser.on("", "--max=LENGTH", "Maximum length of URLs / DEFAULT: #{max}") {|p_length| max = p_length.to_u8}
|
||||
parser.on("-s SECOND", "--second=SECOND", "Likelihood of a URL featuring a second-level domain / DEFAULT: #{second}") {|p_second| second = p_second.to_u8}
|
||||
end
|
||||
|
||||
date = Time.local
|
||||
puts "\nI am going to look for websites through #{times} random URLs (min length #{mini} and max length #{maxi} with the following domains: #{domains}"
|
||||
puts "These URLs will use the protocols #{mode}"
|
||||
puts "and each of them have #{second} in a 100 chance to have a second level domain."
|
||||
puts "\nI am going to look for websites through #{times} random URLs (min length #{min} and max length #{max} with the following domains: #{domains}"
|
||||
puts "These URLs will use the protocols #{protocols}"
|
||||
puts "and each of those URLs have #{second} in a 100 chance to have a second level domain."
|
||||
puts "Started at #{date.hour}h#{date.minute}m\n"
|
||||
|
||||
report_file = "CR_report_#{date.day}#{date.hour}#{date.minute}.json"
|
||||
main_loop(times, domains, mode, log, mini, maxi, second, report_file)
|
||||
main_loop(times, domains, protocols, log, min, max, second, report_file)
|
||||
|
|
38
Javascript/README.md
Normal file
38
Javascript/README.md
Normal file
|
@ -0,0 +1,38 @@
|
|||
# Website-Finder: Javascript
|
||||
|
||||
While this is called Javascript, it also makes use of HTML (and CSS, somewhat)
|
||||
|
||||
## HOW TO RUN
|
||||
|
||||
It is already being run by [GitHub Pages](https://tttaevas.github.io/Website-Finder/Javascript/index.html), but you may run it yourself simply by opening [index.html](./index.html) in your web browser
|
||||
|
||||
Do note that using arguments and launching the search is done through the HTML, so you cannot use the cli/terminal to use arguments, and opening the file will not instantly trigger the search
|
||||
|
||||
```sh
|
||||
# You should be able to double-click the file or drag the file to the web browser, but if you wanna be fancy
|
||||
|
||||
# Linux
|
||||
$ xdg-open index.html
|
||||
|
||||
# Windows 10
|
||||
$ explorer index.html
|
||||
|
||||
# macOS
|
||||
$ open index.html
|
||||
```
|
||||
|
||||
## REGARDING REPORTS
|
||||
|
||||
Your web browser should be unable to create files on your computer, so unlike other scripts, no report in json is made
|
||||
|
||||
Instead, a report is created in real time within the HTML, while the logging is done by the browser in its development tools
|
||||
|
||||
## REGARDING ARGUMENTS
|
||||
|
||||
The GitHub Pages version cannot use HTTP due to [Mixed Content](https://developer.mozilla.org/en-US/docs/Web/Security/Mixed_content)
|
||||
|
||||
The default arguments may not match the ones specified in [defaults.json](../defaults.json), as they have to be independent from each other
|
||||
|
||||
## OTHER STUFF
|
||||
|
||||
More details are available on [the readme in the root folder](../README.md)
|
|
@ -12,18 +12,18 @@
|
|||
<form>
|
||||
<input type="button" id="btn" onclick="findWebsites()" value="Find websites!">
|
||||
<label>Number of URLs to check:</label>
|
||||
<input type="number" id="times" value="3000">
|
||||
<label>Domains to check (separated with commas):</label>
|
||||
<input type="text" id="domains" value=".co, .com, .net, .edu, .gov, .cn, .org, .cc, .us, .mil, .ac, .it, .de">
|
||||
<label>Chances out of 100 for a URL to have two domains:</label>
|
||||
<input type="number" id="second" value="1">
|
||||
<input type="number" id="times" value="2000">
|
||||
<label>Application protocols (separated with ", "):</label>
|
||||
Please note that http requests (not https) will not work if the file is not run locally, <b>putting http in that field is useless if you're using GitHub Pages</b>
|
||||
<input type="text" id="mode" value="https">
|
||||
Please note that http requests (not https) will not work if the page is loaded on a HTTPS website, <b>putting http in that field is useless if you're using GitHub Pages</b>
|
||||
<input type="text" id="protocols" value="https">
|
||||
<label>Domains to check (separated with ", "):</label>
|
||||
<input type="text" id="domains" value="com, org, net, tk, cn, de, ru, uk, nl, ca, au, in, ir, tv, live, gov, edu">
|
||||
<label>Chances out of 100 for a URL to have two domains:</label>
|
||||
<input type="number" id="second" value="0">
|
||||
<label>Minimum URL length (excluding domain and protocol):</label>
|
||||
<input type="number" id="mini" value="2">
|
||||
<input type="number" id="min" value="2">
|
||||
<label>Maximum URL length (excluding domain and protocol):</label>
|
||||
<input type="number" id="maxi" value="50">
|
||||
<input type="number" id="max" value="15">
|
||||
</form>
|
||||
<p>STATUS: STOPPED</p>
|
||||
<p>COUNT:</p>
|
||||
|
|
|
@ -3,7 +3,7 @@ function findWebsites() {
|
|||
async function main_loop() {
|
||||
for (let i = 0; i < times; i++) {
|
||||
count.innerHTML = `COUNT: ${i+1}/${times}`
|
||||
const url = await url_generator()
|
||||
const url = url_generator()
|
||||
url_show.innerHTML = `CHECKING: ${url}`
|
||||
|
||||
try {
|
||||
|
@ -25,23 +25,23 @@ function findWebsites() {
|
|||
}
|
||||
|
||||
function url_generator() {
|
||||
let result = mode[Math.round(Math.random() * (mode.length - 1))] + "://"
|
||||
let result = protocols[Math.round(Math.random() * (protocols.length - 1))] + "://"
|
||||
const url_length = Math.floor(Math.random() * (max - min) + min)
|
||||
const characters = "abcdefghijklmnopqrstuvwxyz0123456789"
|
||||
const url_length = Math.floor(Math.random() * (maxi - mini) + mini)
|
||||
for (let i = 0; i < url_length; i++) {result += characters.charAt(Math.floor(Math.random() * characters.length))}
|
||||
result += domains[Math.floor(Math.random() * domains.length)]
|
||||
result += `.${domains[Math.floor(Math.random() * domains.length)]}`
|
||||
if (Math.floor(Math.random() * (100 - 1) + 1) <= second) result += domains[Math.floor(Math.random() * domains.length)]
|
||||
return result
|
||||
}
|
||||
|
||||
const audio = new Audio("found.mp3")
|
||||
|
||||
const times = document.getElementById("times").value ? Math.round(Number(document.getElementById("times").value)) : 3000
|
||||
const domains = document.getElementById("domains").value ? document.getElementById("domains").value.split(", ") : ['.co', '.com', '.net', '.edu', '.gov', '.cn', '.org', '.cc', '.us', '.mil', '.ac', '.it', '.de']
|
||||
const second = document.getElementById("second").value ? Math.round(Number(document.getElementById("second").value)) : 1
|
||||
const mode = document.getElementById("mode").value ? document.getElementById("mode").value.split(", ") : ['https']
|
||||
const mini = document.getElementById("mini").value ? Math.round(Number(document.getElementById("mini").value)) : 2
|
||||
const maxi = document.getElementById("maxi").value ? Math.round(Number(document.getElementById("maxi").value)) : 50
|
||||
const times = document.getElementById("times").value ? Math.round(Number(document.getElementById("times").value)) : 2000
|
||||
const protocols = document.getElementById("protocols").value ? document.getElementById("protocols").value.split(", ") : ['https']
|
||||
const domains = document.getElementById("domains").value ? document.getElementById("domains").value.split(", ") : ["com", "org", "net", "tk", "cn", "de", "ru", "uk", "nl", "ca", "au", "in", "ir", "tv", "live", "gov", "edu"]
|
||||
const second = document.getElementById("second").value ? Math.round(Number(document.getElementById("second").value)) : 0
|
||||
const min = document.getElementById("min").value ? Math.round(Number(document.getElementById("min").value)) : 2
|
||||
const max = document.getElementById("max").value ? Math.round(Number(document.getElementById("max").value)) : 15
|
||||
|
||||
const list = document.getElementsByTagName("UL")[0]
|
||||
const status = document.getElementsByTagName("P")[0]
|
||||
|
@ -52,9 +52,9 @@ function findWebsites() {
|
|||
console.log('Number of URLs being checked:', times)
|
||||
console.log('Domains used in URLs:', domains)
|
||||
console.log('How many URLs out of 100 will feature two domains:', second)
|
||||
console.log('Application protocols used by URLs:', mode)
|
||||
console.log('Minimum length of URLs:', mini)
|
||||
console.log('Maximum length of URLs:', maxi)
|
||||
console.log('Application protocols used by URLs:', protocols)
|
||||
console.log('Minimum length of URLs:', min)
|
||||
console.log('Maximum length of URLs:', max)
|
||||
|
||||
|
||||
status.innerHTML = "STATUS: ACTIVE"
|
||||
|
|
15
Node.js/README.md
Normal file
15
Node.js/README.md
Normal file
|
@ -0,0 +1,15 @@
|
|||
# Website-Finder: Node.js
|
||||
|
||||
## REQUIREMENT
|
||||
|
||||
[Node.js](https://nodejs.org)
|
||||
|
||||
## HOW TO RUN
|
||||
|
||||
```sh
|
||||
$ node index.js
|
||||
```
|
||||
|
||||
## OTHER STUFF
|
||||
|
||||
More details are available on [the readme in the root folder](../README.md)
|
|
@ -23,7 +23,7 @@ async function main_loop() {
|
|||
}
|
||||
|
||||
function url_generator(num_url, times) {
|
||||
let result = mode[Math.round(Math.random() * (mode.length - 1))] + "://"
|
||||
let result = protocols[Math.round(Math.random() * (protocols.length - 1))] + "://"
|
||||
const characters = "abcdefghijklmnopqrstuvwxyz0123456789"
|
||||
const url_length = Math.floor(Math.random() * (maxi - mini) + mini)
|
||||
for (let i = 0; i < url_length; i++) {result += characters.charAt(Math.floor(Math.random() * characters.length))}
|
||||
|
@ -50,19 +50,19 @@ function fetch(url, options = {}) {
|
|||
}
|
||||
|
||||
const times = process.argv.indexOf('-t') > -1 ? Math.round(Number(process.argv[process.argv.indexOf('-t') + 1])) : 3000
|
||||
const protocols = process.argv.indexOf('-p') > -1 ? process.argv[process.argv.indexOf('-p') + 1].split(',') : ['http']
|
||||
const domains = process.argv.indexOf('-d') > -1 ? process.argv[process.argv.indexOf('-d') + 1].split(',') : ['.co', '.com', '.net', '.edu', '.gov', '.cn', '.org', '.cc', '.us', '.mil', '.ac', '.it', '.de']
|
||||
const mode = process.argv.indexOf('-m') > -1 ? process.argv[process.argv.indexOf('-m') + 1].split(',') : ['http']
|
||||
const log = process.argv.indexOf('-l') > -1
|
||||
const mini = process.argv.indexOf('-MIN') > -1 ? Math.round(Number(process.argv[process.argv.indexOf('-MIN') + 1])) : 2
|
||||
const maxi = process.argv.indexOf('-MAX') > -1 ? Math.round(Number(process.argv[process.argv.indexOf('-MAX') + 1])) : 50
|
||||
const second = process.argv.indexOf('-s') > -1 ? Math.round(Number(process.argv[process.argv.indexOf('-s') + 1])) : 1
|
||||
const log = process.argv.indexOf('-l') > -1
|
||||
const mini = process.argv.indexOf('-min') > -1 ? Math.round(Number(process.argv[process.argv.indexOf('-min') + 1])) : 2
|
||||
const maxi = process.argv.indexOf('-max') > -1 ? Math.round(Number(process.argv[process.argv.indexOf('-max') + 1])) : 50
|
||||
|
||||
const report_file = "JS_report_" + String(new Date().getUTCDate()) + String(new Date().getHours()) + String(new Date().getMinutes()) + ".json"
|
||||
|
||||
process.stdout.write(`\nI am going to look for websites through ${times} random URLs (min length ${mini} and max length ${maxi}) with the following domains: `)
|
||||
console.log(domains)
|
||||
process.stdout.write("These URLs will use the protocols ")
|
||||
console.log(mode)
|
||||
console.log(protocols)
|
||||
console.log(`and each of them have ${second} in a 100 chance to have a second level domain.`)
|
||||
console.log('Started at ' + String(new Date().getHours()) + 'h' + String(new Date().getMinutes()) + 'm\n')
|
||||
|
||||
|
|
21
Python/README.md
Normal file
21
Python/README.md
Normal file
|
@ -0,0 +1,21 @@
|
|||
# Website-Finder: Python
|
||||
|
||||
This is specifically for Python 3.8+, this has not been tested for other versions
|
||||
|
||||
## REQUIREMENT
|
||||
|
||||
[Python](https://www.python.org/)
|
||||
|
||||
## HOW TO RUN
|
||||
|
||||
```sh
|
||||
$ python index.py
|
||||
```
|
||||
|
||||
```sh
|
||||
$ python3 index.py
|
||||
```
|
||||
|
||||
## OTHER STUFF
|
||||
|
||||
More details are available on [the readme in the root folder](../README.md)
|
|
@ -1,30 +1,34 @@
|
|||
import sys
|
||||
import random
|
||||
import datetime
|
||||
import urllib.request
|
||||
import requests
|
||||
|
||||
from requests.packages.urllib3.exceptions import InsecureRequestWarning
|
||||
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
|
||||
|
||||
def main_loop():
|
||||
json_object = []
|
||||
for i in range(times):
|
||||
url = url_generator(i, times)
|
||||
try:
|
||||
response = urllib.request.urlopen(url)
|
||||
response = requests.get(url, verify=False, timeout=40)
|
||||
print(url + " exists!")
|
||||
json_object.append('{"website_url":"' + url + '","response_type":"SUCCESS","response_code":"' + str(response.getcode()) + '","response_details":"Server seems to be ' + str(response.info()["Server"]) + '"}')
|
||||
except Exception as e:
|
||||
if "[Errno 11001]" in str(e): continue
|
||||
json_object.append('{"website_url":"' + url + '","response_type":"SUCCESS","response_code":"' + str(response.status_code) + '","response_details":"' + str(response.reason) + '"}')
|
||||
except Exception as e: # Exception should always be ConnectionError (**usually** bad) or ReadTimeout (good)
|
||||
# Exception handling seems to be a pain because most errors return ConnectionError, so ConnectionError in itself can mean the website exists OR the website does NOT exist
|
||||
if "not known" not in str(e).lower() and "no address" not in str(e).lower():
|
||||
print(url + " exists!")
|
||||
err_code = str(e)[str(e).index("[") + 1:str(e).index("]")] if "[" in str(e) and "]" in str(e) else "NO CODE FOUND"
|
||||
json_object.append('{"website_url":"' + url + '","response_type":"ERROR","response_code":"' + err_code + '","response_details":"' + str(e).replace("\\", "") + '"}')
|
||||
json_object.append('{"website_url":"' + url + '","response_type":"ERROR","response_code":"' + err_code + '","response_details":"' + str(e).replace("\\", "").replace('"', "") + '"}')
|
||||
|
||||
f.write(str(json_object).replace("'", "").replace("\\", ""))
|
||||
f.close()
|
||||
print("Finished at " + str(datetime.datetime.now().time())[0:5].replace(":", "h") + "m")
|
||||
|
||||
def url_generator(num_url, times):
|
||||
result = mode[random.randint(0, len(mode) - 1)] + "://"
|
||||
result = protocols[random.randint(0, len(protocols) - 1)] + "://"
|
||||
characters = "abcdefghijklmnopqrstuvwxyz0123456789"
|
||||
url_length = random.randint(mini, maxi)
|
||||
url_length = random.randint(min, max)
|
||||
result += ''.join(random.choice(characters) for i in range(url_length))
|
||||
result += domains[random.randint(0, len(domains) - 1)]
|
||||
if random.randint(1, 100) <= second: result += domains[random.randint(0, len(domains) - 1)]
|
||||
|
@ -32,15 +36,16 @@ def url_generator(num_url, times):
|
|||
return result
|
||||
|
||||
times = int(sys.argv[sys.argv.index('-t') + 1]) if '-t' in sys.argv else 3000
|
||||
protocols = sys.argv[sys.argv.index('-p') + 1].split(",") if '-p' in sys.argv else ['http']
|
||||
domains = sys.argv[sys.argv.index('-d') + 1].split(",") if '-d' in sys.argv else ['.co', '.com', '.net', '.edu', '.gov', '.cn', '.org', '.cc', '.us', '.mil', '.ac', '.it', '.de']
|
||||
mode = sys.argv[sys.argv.index('-m') + 1].split(",") if '-m' in sys.argv else ['http']
|
||||
log = '-l' in sys.argv
|
||||
mini = int(sys.argv[sys.argv.index('-MIN') + 1]) if '-MIN' in sys.argv else 2
|
||||
maxi = int(sys.argv[sys.argv.index('-MAX') + 1]) if '-MAX' in sys.argv else 50 # Python cannot look for URLs longer than 50ish, so be careful!
|
||||
second = int(sys.argv[sys.argv.index('-s') + 1]) if '-s' in sys.argv else 1
|
||||
log = '-l' in sys.argv
|
||||
# lmao what if we literally get rid of two built-in functions
|
||||
min = int(sys.argv[sys.argv.index('-min') + 1]) if '-min' in sys.argv else 2
|
||||
max = int(sys.argv[sys.argv.index('-max') + 1]) if '-max' in sys.argv else 50 # Python cannot look for URLs longer than 50ish, so be careful!
|
||||
|
||||
print("\nI am going to look for websites through " + str(times) + " random URLs (min length " + str(mini) + " and max length " + str(maxi) + ") with the following domains: " + str(domains))
|
||||
print("These URLs will use the protocols " + str(mode) + " and each of those URLs have " + str(second) + " in 100 chance to have a second level domain.")
|
||||
print("\nI am going to look for websites through " + str(times) + " random URLs (min length " + str(min) + " and max length " + str(max) + ") with the following domains: " + str(domains))
|
||||
print("These URLs will use the protocols " + str(protocols) + " and each of those URLs have " + str(second) + " in 100 chance to have a second level domain.")
|
||||
print("Started at " + str(datetime.datetime.now().time())[0:5].replace(":", "h") + "m\n")
|
||||
|
||||
f = open("PY_report_" + str(datetime.datetime.now().strftime("%d%H%M")) + ".json", "a+")
|
||||
|
|
90
README.md
90
README.md
|
@ -1,89 +1,25 @@
|
|||
# Website-Finder
|
||||
|
||||
Website-Finder is a collection of light scripts written in various programming languages without the need for external libraries that finds websites of all sorts for you and make reports of that either in the form of automatically generated json files or in the form of a webpage.
|
||||
|
||||
Keep in mind that this software will find ANY website that exists, no matter how morally wrong it may be. It may also (on purpose) find websites which are hosted by a server that simply doesn't reply to requests.
|
||||
|
||||
## REQUIREMENTS
|
||||
|
||||
Each script has its own requirements.
|
||||
|
||||
* index.py, the Python script, requires [Python 3](https://www.python.org/downloads/)
|
||||
* index.js, the Node.js script, requires [Node.js](https://nodejs.org/en/download/)
|
||||
* index.rb, the Ruby script, requires [Ruby](https://rubyinstaller.org/downloads/)
|
||||
* index.html, which runs a Javascript script within a HTML webpage, only requires a web browser supporting [JS](https://developer.mozilla.org/en-US/docs/Learn/JavaScript/First_steps/What_is_JavaScript)
|
||||
* Program.cs, the C# script, requires [.NET SDK](https://dotnet.microsoft.com/download) **to be built**
|
||||
|
||||
An already built C# script for Windows (x64) that doesn't have any requirement is available in this repository's releases. (C#.exe in win-x64)
|
||||
|
||||
## HOW TO RUN
|
||||
|
||||
You can run the Python, the (built) C# script or the Ruby script by simply double clicking on it or going into the command-line, moving into the right directory and entering the file name.
|
||||
|
||||
To run the Node.js script or the (unbuilt) C# script, you will have to use the command-line.
|
||||
|
||||
```sh
|
||||
$ cd Website-Finder/Node.js
|
||||
$ node index.js
|
||||
```
|
||||
```sh
|
||||
$ cd Website-Finder/C#
|
||||
$ dotnet run
|
||||
```
|
||||
|
||||
For the Javascript script, you can:
|
||||
|
||||
* Run the HTML file into your web browser, by either double-clicking it or by dragging the file into it
|
||||
* Visit that same file hosted on [GitHub Pages](https://tttaevas.github.io/Website-Finder/Javascript/index.html)
|
||||
|
||||
Please keep in mind that **any website that this software finds can contain dangerous resources, therefore please proceed with caution, [update your web browser](https://www.whatismybrowser.com/), use an [adblocker](https://ublockorigin.com/) and [check the URL](https://www.virustotal.com/gui/home/url) for anything dangerous.** I am not responsible for anything my software finds for you.
|
||||
Website-Finder is a collection of light scripts written in various programming languages without the need for external libraries that finds websites of all sorts for you and automatically makes reports of that, usually in the form of a json file
|
||||
|
||||
## ARGUMENTS
|
||||
|
||||
You can use arguments by launching the scripts through the command-line.
|
||||
You can use arguments when launching scripts through the cli/terminal
|
||||
|
||||
- "-t" defines the number of URLs the script will go through.
|
||||
- "-d" defines all the top-level domains the URLs will use, separated only by a ",".
|
||||
- "-m" defines the application protocol used. Multiple protocols can be defined by separating them with a ",".
|
||||
- "-l" defines by whether or not it is present whether or not all URLs will be logged in the command-line.
|
||||
- "-s" defines how likely it will be that the URLs feature a second level domain, <=0 being impossible and >=100 being always.
|
||||
- "-MIN" defines the minimum length of the URLs.
|
||||
- "-MAX" defines the maximul length of the URLs.
|
||||
- "-t" (times) defines the number of URLs the script will go through
|
||||
- "-p" (protocols) defines the application protocol used, multiple protocols can be defined by separating them with a ","
|
||||
- "-d" (domains) defines all the top-level and second-level domains the URLs will use, separated only by a ","
|
||||
- "-s" (second) defines how likely it will be that the URLs feature a second-level domain, 0 being impossible and 100 being always
|
||||
- "-l" (log) will make all URLs be logged in the cli/terminal if it's present
|
||||
- "-min" (minimum) defines the minimum length of the URLs
|
||||
- "-max" (maximum) defines the maximul length of the URLs
|
||||
|
||||
* "-t" defaults to 3000.
|
||||
* "-d" defaults to a lot of popular top-level domains.
|
||||
* "-m" defaults to "http", but the Javascript script defaults to "https" due to [requests made with the "http" application protocol being blocked when not run locally](https://developer.mozilla.org/en-US/docs/Web/Security/Mixed_content).
|
||||
* "-l" is off by default.
|
||||
* "-s" defaults to 1.
|
||||
* "-MIN" defaults to 2.
|
||||
* "-MAX" defaults to 50.
|
||||
|
||||
Using arguments with the Javascript script is simple, as you can enter values in labeled fields. Leaving those fields empty will make the script use the default values.
|
||||
|
||||
```sh
|
||||
# To make the Python script go through 3000 URLs in HTTP with various top-level domains without logging:
|
||||
$ index.py
|
||||
|
||||
# To make the Ruby script go through 500 URLs of min length 5 and max length 7 in HTTP and HTTPS with only the .com and .fr top-level domains with a 30% chance for each URL to feature a second level domain with logging:
|
||||
$ index.rb -MAX 7 -t 500 -MIN 5 -m http,https -l -s 30 -d .com,.fr
|
||||
|
||||
# To make the Node.js script go through 3000 URLs in HTTPS with various top-level domains with logging:
|
||||
$ node index.js -m https -l
|
||||
|
||||
# To make the (built) C# script go through 100 URLs with the .com top-level domains with a 0% chance for each URL to feature a second level domain without logging:
|
||||
$ C#.exe -d .com -s 0
|
||||
```
|
||||
Default values can be found in [defaults.json](./defaults.json)
|
||||
|
||||
## REPORTS
|
||||
|
||||
Once a script is done running, it will generate a .json report in its folder.
|
||||
Once a script is done running, it will fill a .json report in its directory
|
||||
|
||||
The Javascript script generates the report in real-time on the webpage, in the black box on the right side of the screen.
|
||||
## OTHER STUFF
|
||||
|
||||
## FAQ
|
||||
|
||||
Q: Is there a script that is better than the other?
|
||||
A: As far as I am aware, nope! However, the reports are generated differently depending of the script and some websites send different codes depending of the script.
|
||||
|
||||
Q: Why does the "-m" argument default to "http" rather than "https"?
|
||||
A: Requests in "http" receive more status codes than error codes compared to "https". I suspect it's because some websites don't support "https" very well, even in the current year.
|
||||
For information exclusive to a script, read the README.md file in its directory
|
||||
|
|
15
Ruby/README.md
Normal file
15
Ruby/README.md
Normal file
|
@ -0,0 +1,15 @@
|
|||
# Website-Finder: Ruby
|
||||
|
||||
## REQUIREMENT
|
||||
|
||||
Ruby ([Windows Installer](https://rubyinstaller.org/downloads)) ([Snap Store for Linux](https://snapcraft.io/ruby))
|
||||
|
||||
## HOW TO RUN
|
||||
|
||||
```sh
|
||||
$ ruby index.rb
|
||||
```
|
||||
|
||||
## OTHER STUFF
|
||||
|
||||
More details are available on [the readme in the root folder](../README.md)
|
|
@ -22,25 +22,25 @@ def main_loop
|
|||
end
|
||||
|
||||
def url_generator()
|
||||
result = MODE[rand(0...MODE.length)] + '://'
|
||||
url_length = rand(MINI..MAXI)
|
||||
result = PROTOCOLS[rand(0...PROTOCOLS.length)] + '://'
|
||||
url_length = rand(MIN..MAX)
|
||||
result += rand(36 ** url_length).to_s(36)
|
||||
result += DOMAINS[rand(0...DOMAINS.length)] if rand(1...100) <= SECOND
|
||||
result += DOMAINS[rand(0...DOMAINS.length)]
|
||||
end
|
||||
|
||||
TIMES = ARGV.include?('-t') ? ARGV[ARGV.index('-t') + 1].to_i : 3000
|
||||
PROTOCOLS = ARGV.include?('-p') ? ARGV[ARGV.index('-p') + 1].split(",") : ['http']
|
||||
DOMAINS = ARGV.include?('-d') ? ARGV[ARGV.index('-d') + 1].split(",") : ['.co', '.com', '.net', '.edu', '.gov', '.cn', '.org', '.cc', '.us', '.mil', '.ac', '.it', '.de']
|
||||
MODE = ARGV.include?('-m') ? ARGV[ARGV.index('-m') + 1].split(",") : ['http']
|
||||
LOG = ARGV.index('-l').class == Integer
|
||||
MINI = ARGV.include?('-MIN') ? ARGV[ARGV.index('-MIN') + 1].to_i : 2
|
||||
MAXI = ARGV.include?('-MAX') ? ARGV[ARGV.index('-MAX') + 1].to_i : 50
|
||||
SECOND = ARGV.include?('-s') ? ARGV[ARGV.index('-s') + 1].to_i : 1
|
||||
LOG = ARGV.index('-l').class == Integer
|
||||
MIN = ARGV.include?('-min') ? ARGV[ARGV.index('-max') + 1].to_i : 2
|
||||
MAX = ARGV.include?('-min') ? ARGV[ARGV.index('-max') + 1].to_i : 50
|
||||
|
||||
REPORT_FILE = "RB_report_#{Time.new.day}#{Time.new.hour}#{Time.new.min}.json"
|
||||
|
||||
puts("\nI am going to look for websites through #{TIMES} random URLs (min length #{MINI} and max length #{MAXI}) with the following domains: #{DOMAINS}")
|
||||
puts("These URLs will use the protocols #{MODE} and each of those URLs have #{SECOND} in 100 chance to have a second level domain.")
|
||||
puts("\nI am going to look for websites through #{TIMES} random URLs (min length #{MIN} and max length #{MAX}) with the following domains: #{DOMAINS}")
|
||||
puts("These URLs will use the protocols #{PROTOCOLS} and each of those URLs have #{SECOND} in 100 chance to have a second level domain.")
|
||||
puts("Started at #{Time.new.hour}h#{Time.new.min}m\n")
|
||||
|
||||
File.open(REPORT_FILE, 'a+')
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
<meta http-equiv="refresh" content="0; url=https://tttaevas.github.io/Website-Finder/Javascript/index.html">
|
||||
<!-- If you wish to host the Javascript part on GitHub Pages, this file is necessary to redirect to folder
|
||||
<meta http-equiv="refresh" content="0; url=Javascript/index.html">
|
||||
<!-- If you wish to host the Javascript part on GitHub Pages, this file is necessary to redirect to Javascript's index.html
|
||||
Don't forget to adapt the URL to your needs!
|
||||
|
||||
This file is not necessary for anything else, you may delete it if you find it useless-->
|
||||
This file is not necessary for anything else, you may delete it if you find it useless -->
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue