From a8bda4704ac554318a8d047885600ddc913986b4 Mon Sep 17 00:00:00 2001 From: Taevas <67872932+TTTaevas@users.noreply.github.com> Date: Sun, 11 Dec 2022 20:00:36 +0100 Subject: [PATCH] Cleaned stuff up --- C#/Program.cs | 20 +++++----- Crystal/README.md | 26 +++++++++++++ Crystal/index.cr | 35 ++++++++--------- Javascript/README.md | 38 ++++++++++++++++++ Javascript/index.html | 18 ++++----- Javascript/index.js | 26 ++++++------- Node.js/README.md | 15 ++++++++ Node.js/index.js | 12 +++--- Python/README.md | 21 ++++++++++ Python/index.py | 37 ++++++++++-------- README.md | 90 +++++++------------------------------------ Ruby/README.md | 15 ++++++++ Ruby/index.rb | 16 ++++---- index.html | 6 +-- 14 files changed, 215 insertions(+), 160 deletions(-) create mode 100644 Crystal/README.md create mode 100644 Javascript/README.md create mode 100644 Node.js/README.md create mode 100644 Python/README.md create mode 100644 Ruby/README.md diff --git a/C#/Program.cs b/C#/Program.cs index e8a5425..735c032 100644 --- a/C#/Program.cs +++ b/C#/Program.cs @@ -12,17 +12,17 @@ class WebRequests public static void Main(string[] args) { int times = Array.IndexOf(args, "-t") > -1 ? int.Parse(args[Array.IndexOf(args, "-t") + 1]) : 3000; + string[] protocols = Array.IndexOf(args, "-p") > -1 ? args[Array.IndexOf(args, "-p") + 1].Split(",") : new string[]{"http"}; string[] domains = Array.IndexOf(args, "-d") > -1 ? args[Array.IndexOf(args, "-d") + 1].Split(",") : new string[]{".co", ".com", ".net", ".edu", ".gov", ".cn", ".org", ".cc", ".us", ".mil", ".ac", ".it", ".de"}; - string[] mode = Array.IndexOf(args, "-m") > -1 ? args[Array.IndexOf(args, "-m") + 1].Split(",") : new string[]{"http"}; - bool log = Array.IndexOf(args, "-l") > -1; - int mini = Array.IndexOf(args, "-MIN") > -1 ? int.Parse(args[Array.IndexOf(args, "-MIN") + 1]) : 2; - int maxi = Array.IndexOf(args, "-MAX") > -1 ? int.Parse(args[Array.IndexOf(args, "-MAX") + 1]) : 50; int second = Array.IndexOf(args, "-s") > -1 ? int.Parse(args[Array.IndexOf(args, "-s") + 1]) : 1; + bool log = Array.IndexOf(args, "-l") > -1; + int min = Array.IndexOf(args, "-MIN") > -1 ? int.Parse(args[Array.IndexOf(args, "-MIN") + 1]) : 2; + int max = Array.IndexOf(args, "-MAX") > -1 ? int.Parse(args[Array.IndexOf(args, "-MAX") + 1]) : 50; DateTime time = DateTime.Now; - Console.WriteLine($"\nI am going to look for websites through {times} random URLs (min length {mini} and max length {maxi}) with the following domains: {String.Join(", ", domains)}"); - Console.WriteLine($"These URLs will use the protocols {String.Join(", ", mode)} and each of them have {second} in a 100 chance to have a second level domain."); + Console.WriteLine($"\nI am going to look for websites through {times} random URLs (min length {min} and max length {max}) with the following domains: {String.Join(", ", domains)}"); + Console.WriteLine($"These URLs will use the protocols {String.Join(", ", protocols)} and each of those URLs have {second} in a 100 chance to have a second level domain."); Console.WriteLine($"Started at {time.Hour}h{time.Minute}m\n"); List _data = new List(); @@ -30,7 +30,7 @@ class WebRequests for (int i = 0; i < times; i++) { - string url = RandomURL(domains, mode, mini, maxi, second); + string url = RandomURL(domains, protocols, min, max, second); if (log) Console.WriteLine($"{url} ({i+1}/{times})"); try @@ -85,12 +85,12 @@ class WebRequests } private static Random random = new Random(); - public static string RandomURL(string[] d, string[] m, int mini, int maxi, int second) + public static string RandomURL(string[] d, string[] p, int min, int max, int second) { const string chars = "abcdefghijklmnopqrstuvwyxz0123456789"; - string full_url = m[random.Next(m.Length)] + "://"; // Mode (http/https) - full_url += new string (Enumerable.Repeat(chars, random.Next(mini, maxi)) + string full_url = p[random.Next(m.Length)] + "://"; // protocols (http/https) + full_url += new string (Enumerable.Repeat(chars, random.Next(min, max)) .Select(s => s[random.Next(s.Length)]).ToArray()); // Domain name (abc69) full_url += d[random.Next(d.Length)]; // Top-level domain (.fr) if (random.Next(100) <= second) full_url += d[random.Next(d.Length)]; // Second-level domain (.co) diff --git a/Crystal/README.md b/Crystal/README.md new file mode 100644 index 0000000..824e1b5 --- /dev/null +++ b/Crystal/README.md @@ -0,0 +1,26 @@ +# Website-Finder: Crystal + +## REQUIREMENT + +[Crystal](https://crystal-lang.org) + +## HOW TO RUN + +Please note that using arguments with the Crystal script is slightly different from using arguments with other scripts, due to how Crystal works + +To use arguments, you will need to add " -- " (without the ") between the name of the file and the first argument + +In the Crystal script, the "-min" argument has been replaced by the "--min" argument, same for "-max" being replaced by "--max" + +For both "--min" and "--max", you'll need to use an equal sign "=" instead of a space " " before the value + +```sh +$ crystal run index.cr + +# To make the script go through 1000 URLs, each of maximum length 20: +$ crystal run index.cr -- -t 1000 --max=20 +``` + +## OTHER STUFF + +More details are available on [the readme in the root folder](../README.md) diff --git a/Crystal/index.cr b/Crystal/index.cr index 707fa39..20edcd1 100644 --- a/Crystal/index.cr +++ b/Crystal/index.cr @@ -3,12 +3,12 @@ require "http" require "uri" require "json" -def main_loop(times, domains, mode, log, mini, maxi, second, report_file) +def main_loop(times, domains, protocols, log, min, max, second, report_file) json_text = JSON.build do |json| json.array do i = 0 while i < times - url = url_generator(domains, mode, mini.as(UInt8), maxi.as(UInt8), second) + url = url_generator(domains, protocols, min.as(UInt8), max.as(UInt8), second) puts "#{url} (#{i + 1}/#{times})" if log client = HTTP::Client.new(URI.parse url) client.connect_timeout = 40.seconds @@ -18,7 +18,7 @@ def main_loop(times, domains, mode, log, mini, maxi, second, report_file) json.object do json.field "website_url", url json.field "response_type", "SUCCESS" - json.field "response_code", response.status_code + json.field "response_code", "#{response.status_code}" json.field "response_details", HTTP::Status.new(response.status_code) end rescue e : Socket::Addrinfo::Error @@ -44,10 +44,10 @@ def main_loop(times, domains, mode, log, mini, maxi, second, report_file) puts "\nFinished at #{end_date.hour}h#{end_date.minute}m" end -def url_generator(domains, mode, mini, maxi, second) +def url_generator(domains, protocols, min, max, second) result = String.build do |str| - str << mode[Random.rand(mode.size)] + "://" - url_length = Random.rand(mini..maxi) + str << protocols[Random.rand(protocols.size)] + "://" + url_length = Random.rand(min..max) characters = "abcdefghijklmnopqrstuvwxyz0123456789" i = 0 while i < url_length @@ -57,17 +57,16 @@ def url_generator(domains, mode, mini, maxi, second) str << domains[Random.rand(domains.size)] if Random.rand(1..100) <= second str << domains[Random.rand(domains.size)] end - puts result result end times = UInt32.new "3000" +protocols = ["http"] domains = [".co", ".com", ".net", ".edu", ".gov", ".cn", ".org", ".cc", ".us", ".mil", ".ac", ".it", ".de"] -mode = ["http"] -log = false -mini = UInt8.new "3" -maxi = UInt8.new "50" second = UInt8.new "1" +log = false +min = UInt8.new "2" +max = UInt8.new "50" OptionParser.parse do |parser| parser.banner = "Website-Finder" @@ -77,18 +76,18 @@ OptionParser.parse do |parser| end parser.on("-t TIMES", "--times=TIMES", "Number of requests / DEFAULT: #{times}") {|p_times| times = p_times.to_u32} parser.on("-d DOMAINS", "--domains=DOMAINS", "Domains used in URLS, like: .com,.net,.gov / DEFAULT: #{domains}") {|p_domains| domains = p_domains.split(",")} - parser.on("-m MODE", "--modes=MODES", "You may choose between: http | https | http,https / DEFAULT: #{mode}") {|p_modes| mode = p_modes.split(",")} + parser.on("-p protocols", "--protocols=PROTOCOLS", "You may choose between: http | https | http,https / DEFAULT: #{protocols}") {|p_protocols| protocols = p_protocols.split(",")} parser.on("-l", "--log", "Log all requests / DEFAULT: #{log}") {log = true} - parser.on("", "--MIN=LENGTH", "Minimum length of URLs / DEFAULT: #{mini}") {|p_length| mini = p_length.to_u8} - parser.on("", "--MAX=LENGTH", "Maximum length of URLs / DEFAULT: #{maxi}") {|p_length| maxi = p_length.to_u8} + parser.on("", "--min=LENGTH", "Minimum length of URLs / DEFAULT: #{min}") {|p_length| min = p_length.to_u8} + parser.on("", "--max=LENGTH", "Maximum length of URLs / DEFAULT: #{max}") {|p_length| max = p_length.to_u8} parser.on("-s SECOND", "--second=SECOND", "Likelihood of a URL featuring a second-level domain / DEFAULT: #{second}") {|p_second| second = p_second.to_u8} end date = Time.local -puts "\nI am going to look for websites through #{times} random URLs (min length #{mini} and max length #{maxi} with the following domains: #{domains}" -puts "These URLs will use the protocols #{mode}" -puts "and each of them have #{second} in a 100 chance to have a second level domain." +puts "\nI am going to look for websites through #{times} random URLs (min length #{min} and max length #{max} with the following domains: #{domains}" +puts "These URLs will use the protocols #{protocols}" +puts "and each of those URLs have #{second} in a 100 chance to have a second level domain." puts "Started at #{date.hour}h#{date.minute}m\n" report_file = "CR_report_#{date.day}#{date.hour}#{date.minute}.json" -main_loop(times, domains, mode, log, mini, maxi, second, report_file) +main_loop(times, domains, protocols, log, min, max, second, report_file) diff --git a/Javascript/README.md b/Javascript/README.md new file mode 100644 index 0000000..ea86f15 --- /dev/null +++ b/Javascript/README.md @@ -0,0 +1,38 @@ +# Website-Finder: Javascript + +While this is called Javascript, it also makes use of HTML (and CSS, somewhat) + +## HOW TO RUN + +It is already being run by [GitHub Pages](https://tttaevas.github.io/Website-Finder/Javascript/index.html), but you may run it yourself simply by opening [index.html](./index.html) in your web browser + +Do note that using arguments and launching the search is done through the HTML, so you cannot use the cli/terminal to use arguments, and opening the file will not instantly trigger the search + +```sh +# You should be able to double-click the file or drag the file to the web browser, but if you wanna be fancy + +# Linux +$ xdg-open index.html + +# Windows 10 +$ explorer index.html + +# macOS +$ open index.html +``` + +## REGARDING REPORTS + +Your web browser should be unable to create files on your computer, so unlike other scripts, no report in json is made + +Instead, a report is created in real time within the HTML, while the logging is done by the browser in its development tools + +## REGARDING ARGUMENTS + +The GitHub Pages version cannot use HTTP due to [Mixed Content](https://developer.mozilla.org/en-US/docs/Web/Security/Mixed_content) + +The default arguments may not match the ones specified in [defaults.json](../defaults.json), as they have to be independent from each other + +## OTHER STUFF + +More details are available on [the readme in the root folder](../README.md) diff --git a/Javascript/index.html b/Javascript/index.html index 060dc30..2ea7dfd 100644 --- a/Javascript/index.html +++ b/Javascript/index.html @@ -12,18 +12,18 @@
- - - - - + - Please note that http requests (not https) will not work if the file is not run locally, putting http in that field is useless if you're using GitHub Pages - + Please note that http requests (not https) will not work if the page is loaded on a HTTPS website, putting http in that field is useless if you're using GitHub Pages + + + + + - + - +

STATUS: STOPPED

COUNT:

diff --git a/Javascript/index.js b/Javascript/index.js index 7ad17cd..c384c2d 100644 --- a/Javascript/index.js +++ b/Javascript/index.js @@ -3,7 +3,7 @@ function findWebsites() { async function main_loop() { for (let i = 0; i < times; i++) { count.innerHTML = `COUNT: ${i+1}/${times}` - const url = await url_generator() + const url = url_generator() url_show.innerHTML = `CHECKING: ${url}` try { @@ -25,23 +25,23 @@ function findWebsites() { } function url_generator() { - let result = mode[Math.round(Math.random() * (mode.length - 1))] + "://" + let result = protocols[Math.round(Math.random() * (protocols.length - 1))] + "://" + const url_length = Math.floor(Math.random() * (max - min) + min) const characters = "abcdefghijklmnopqrstuvwxyz0123456789" - const url_length = Math.floor(Math.random() * (maxi - mini) + mini) for (let i = 0; i < url_length; i++) {result += characters.charAt(Math.floor(Math.random() * characters.length))} - result += domains[Math.floor(Math.random() * domains.length)] + result += `.${domains[Math.floor(Math.random() * domains.length)]}` if (Math.floor(Math.random() * (100 - 1) + 1) <= second) result += domains[Math.floor(Math.random() * domains.length)] return result } const audio = new Audio("found.mp3") - const times = document.getElementById("times").value ? Math.round(Number(document.getElementById("times").value)) : 3000 - const domains = document.getElementById("domains").value ? document.getElementById("domains").value.split(", ") : ['.co', '.com', '.net', '.edu', '.gov', '.cn', '.org', '.cc', '.us', '.mil', '.ac', '.it', '.de'] - const second = document.getElementById("second").value ? Math.round(Number(document.getElementById("second").value)) : 1 - const mode = document.getElementById("mode").value ? document.getElementById("mode").value.split(", ") : ['https'] - const mini = document.getElementById("mini").value ? Math.round(Number(document.getElementById("mini").value)) : 2 - const maxi = document.getElementById("maxi").value ? Math.round(Number(document.getElementById("maxi").value)) : 50 + const times = document.getElementById("times").value ? Math.round(Number(document.getElementById("times").value)) : 2000 + const protocols = document.getElementById("protocols").value ? document.getElementById("protocols").value.split(", ") : ['https'] + const domains = document.getElementById("domains").value ? document.getElementById("domains").value.split(", ") : ["com", "org", "net", "tk", "cn", "de", "ru", "uk", "nl", "ca", "au", "in", "ir", "tv", "live", "gov", "edu"] + const second = document.getElementById("second").value ? Math.round(Number(document.getElementById("second").value)) : 0 + const min = document.getElementById("min").value ? Math.round(Number(document.getElementById("min").value)) : 2 + const max = document.getElementById("max").value ? Math.round(Number(document.getElementById("max").value)) : 15 const list = document.getElementsByTagName("UL")[0] const status = document.getElementsByTagName("P")[0] @@ -52,9 +52,9 @@ function findWebsites() { console.log('Number of URLs being checked:', times) console.log('Domains used in URLs:', domains) console.log('How many URLs out of 100 will feature two domains:', second) - console.log('Application protocols used by URLs:', mode) - console.log('Minimum length of URLs:', mini) - console.log('Maximum length of URLs:', maxi) + console.log('Application protocols used by URLs:', protocols) + console.log('Minimum length of URLs:', min) + console.log('Maximum length of URLs:', max) status.innerHTML = "STATUS: ACTIVE" diff --git a/Node.js/README.md b/Node.js/README.md new file mode 100644 index 0000000..f5bef31 --- /dev/null +++ b/Node.js/README.md @@ -0,0 +1,15 @@ +# Website-Finder: Node.js + +## REQUIREMENT + +[Node.js](https://nodejs.org) + +## HOW TO RUN + +```sh +$ node index.js +``` + +## OTHER STUFF + +More details are available on [the readme in the root folder](../README.md) diff --git a/Node.js/index.js b/Node.js/index.js index 10b27c0..1de90f1 100644 --- a/Node.js/index.js +++ b/Node.js/index.js @@ -23,7 +23,7 @@ async function main_loop() { } function url_generator(num_url, times) { - let result = mode[Math.round(Math.random() * (mode.length - 1))] + "://" + let result = protocols[Math.round(Math.random() * (protocols.length - 1))] + "://" const characters = "abcdefghijklmnopqrstuvwxyz0123456789" const url_length = Math.floor(Math.random() * (maxi - mini) + mini) for (let i = 0; i < url_length; i++) {result += characters.charAt(Math.floor(Math.random() * characters.length))} @@ -50,19 +50,19 @@ function fetch(url, options = {}) { } const times = process.argv.indexOf('-t') > -1 ? Math.round(Number(process.argv[process.argv.indexOf('-t') + 1])) : 3000 +const protocols = process.argv.indexOf('-p') > -1 ? process.argv[process.argv.indexOf('-p') + 1].split(',') : ['http'] const domains = process.argv.indexOf('-d') > -1 ? process.argv[process.argv.indexOf('-d') + 1].split(',') : ['.co', '.com', '.net', '.edu', '.gov', '.cn', '.org', '.cc', '.us', '.mil', '.ac', '.it', '.de'] -const mode = process.argv.indexOf('-m') > -1 ? process.argv[process.argv.indexOf('-m') + 1].split(',') : ['http'] -const log = process.argv.indexOf('-l') > -1 -const mini = process.argv.indexOf('-MIN') > -1 ? Math.round(Number(process.argv[process.argv.indexOf('-MIN') + 1])) : 2 -const maxi = process.argv.indexOf('-MAX') > -1 ? Math.round(Number(process.argv[process.argv.indexOf('-MAX') + 1])) : 50 const second = process.argv.indexOf('-s') > -1 ? Math.round(Number(process.argv[process.argv.indexOf('-s') + 1])) : 1 +const log = process.argv.indexOf('-l') > -1 +const mini = process.argv.indexOf('-min') > -1 ? Math.round(Number(process.argv[process.argv.indexOf('-min') + 1])) : 2 +const maxi = process.argv.indexOf('-max') > -1 ? Math.round(Number(process.argv[process.argv.indexOf('-max') + 1])) : 50 const report_file = "JS_report_" + String(new Date().getUTCDate()) + String(new Date().getHours()) + String(new Date().getMinutes()) + ".json" process.stdout.write(`\nI am going to look for websites through ${times} random URLs (min length ${mini} and max length ${maxi}) with the following domains: `) console.log(domains) process.stdout.write("These URLs will use the protocols ") -console.log(mode) +console.log(protocols) console.log(`and each of them have ${second} in a 100 chance to have a second level domain.`) console.log('Started at ' + String(new Date().getHours()) + 'h' + String(new Date().getMinutes()) + 'm\n') diff --git a/Python/README.md b/Python/README.md new file mode 100644 index 0000000..909a066 --- /dev/null +++ b/Python/README.md @@ -0,0 +1,21 @@ +# Website-Finder: Python + +This is specifically for Python 3.8+, this has not been tested for other versions + +## REQUIREMENT + +[Python](https://www.python.org/) + +## HOW TO RUN + +```sh +$ python index.py +``` + +```sh +$ python3 index.py +``` + +## OTHER STUFF + +More details are available on [the readme in the root folder](../README.md) diff --git a/Python/index.py b/Python/index.py index 6801fc5..8aca1e0 100644 --- a/Python/index.py +++ b/Python/index.py @@ -1,30 +1,34 @@ import sys import random import datetime -import urllib.request +import requests + +from requests.packages.urllib3.exceptions import InsecureRequestWarning +requests.packages.urllib3.disable_warnings(InsecureRequestWarning) def main_loop(): json_object = [] for i in range(times): url = url_generator(i, times) try: - response = urllib.request.urlopen(url) + response = requests.get(url, verify=False, timeout=40) print(url + " exists!") - json_object.append('{"website_url":"' + url + '","response_type":"SUCCESS","response_code":"' + str(response.getcode()) + '","response_details":"Server seems to be ' + str(response.info()["Server"]) + '"}') - except Exception as e: - if "[Errno 11001]" in str(e): continue - print(url + " exists!") - err_code = str(e)[str(e).index("[") + 1:str(e).index("]")] if "[" in str(e) and "]" in str(e) else "NO CODE FOUND" - json_object.append('{"website_url":"' + url + '","response_type":"ERROR","response_code":"' + err_code + '","response_details":"' + str(e).replace("\\", "") + '"}') + json_object.append('{"website_url":"' + url + '","response_type":"SUCCESS","response_code":"' + str(response.status_code) + '","response_details":"' + str(response.reason) + '"}') + except Exception as e: # Exception should always be ConnectionError (**usually** bad) or ReadTimeout (good) + # Exception handling seems to be a pain because most errors return ConnectionError, so ConnectionError in itself can mean the website exists OR the website does NOT exist + if "not known" not in str(e).lower() and "no address" not in str(e).lower(): + print(url + " exists!") + err_code = str(e)[str(e).index("[") + 1:str(e).index("]")] if "[" in str(e) and "]" in str(e) else "NO CODE FOUND" + json_object.append('{"website_url":"' + url + '","response_type":"ERROR","response_code":"' + err_code + '","response_details":"' + str(e).replace("\\", "").replace('"', "") + '"}') f.write(str(json_object).replace("'", "").replace("\\", "")) f.close() print("Finished at " + str(datetime.datetime.now().time())[0:5].replace(":", "h") + "m") def url_generator(num_url, times): - result = mode[random.randint(0, len(mode) - 1)] + "://" + result = protocols[random.randint(0, len(protocols) - 1)] + "://" characters = "abcdefghijklmnopqrstuvwxyz0123456789" - url_length = random.randint(mini, maxi) + url_length = random.randint(min, max) result += ''.join(random.choice(characters) for i in range(url_length)) result += domains[random.randint(0, len(domains) - 1)] if random.randint(1, 100) <= second: result += domains[random.randint(0, len(domains) - 1)] @@ -32,15 +36,16 @@ def url_generator(num_url, times): return result times = int(sys.argv[sys.argv.index('-t') + 1]) if '-t' in sys.argv else 3000 +protocols = sys.argv[sys.argv.index('-p') + 1].split(",") if '-p' in sys.argv else ['http'] domains = sys.argv[sys.argv.index('-d') + 1].split(",") if '-d' in sys.argv else ['.co', '.com', '.net', '.edu', '.gov', '.cn', '.org', '.cc', '.us', '.mil', '.ac', '.it', '.de'] -mode = sys.argv[sys.argv.index('-m') + 1].split(",") if '-m' in sys.argv else ['http'] -log = '-l' in sys.argv -mini = int(sys.argv[sys.argv.index('-MIN') + 1]) if '-MIN' in sys.argv else 2 -maxi = int(sys.argv[sys.argv.index('-MAX') + 1]) if '-MAX' in sys.argv else 50 # Python cannot look for URLs longer than 50ish, so be careful! second = int(sys.argv[sys.argv.index('-s') + 1]) if '-s' in sys.argv else 1 +log = '-l' in sys.argv +# lmao what if we literally get rid of two built-in functions +min = int(sys.argv[sys.argv.index('-min') + 1]) if '-min' in sys.argv else 2 +max = int(sys.argv[sys.argv.index('-max') + 1]) if '-max' in sys.argv else 50 # Python cannot look for URLs longer than 50ish, so be careful! -print("\nI am going to look for websites through " + str(times) + " random URLs (min length " + str(mini) + " and max length " + str(maxi) + ") with the following domains: " + str(domains)) -print("These URLs will use the protocols " + str(mode) + " and each of those URLs have " + str(second) + " in 100 chance to have a second level domain.") +print("\nI am going to look for websites through " + str(times) + " random URLs (min length " + str(min) + " and max length " + str(max) + ") with the following domains: " + str(domains)) +print("These URLs will use the protocols " + str(protocols) + " and each of those URLs have " + str(second) + " in 100 chance to have a second level domain.") print("Started at " + str(datetime.datetime.now().time())[0:5].replace(":", "h") + "m\n") f = open("PY_report_" + str(datetime.datetime.now().strftime("%d%H%M")) + ".json", "a+") diff --git a/README.md b/README.md index 68ecaca..88d3459 100644 --- a/README.md +++ b/README.md @@ -1,89 +1,25 @@ # Website-Finder -Website-Finder is a collection of light scripts written in various programming languages without the need for external libraries that finds websites of all sorts for you and make reports of that either in the form of automatically generated json files or in the form of a webpage. - -Keep in mind that this software will find ANY website that exists, no matter how morally wrong it may be. It may also (on purpose) find websites which are hosted by a server that simply doesn't reply to requests. - -## REQUIREMENTS - -Each script has its own requirements. - -* index.py, the Python script, requires [Python 3](https://www.python.org/downloads/) -* index.js, the Node.js script, requires [Node.js](https://nodejs.org/en/download/) -* index.rb, the Ruby script, requires [Ruby](https://rubyinstaller.org/downloads/) -* index.html, which runs a Javascript script within a HTML webpage, only requires a web browser supporting [JS](https://developer.mozilla.org/en-US/docs/Learn/JavaScript/First_steps/What_is_JavaScript) -* Program.cs, the C# script, requires [.NET SDK](https://dotnet.microsoft.com/download) **to be built** - -An already built C# script for Windows (x64) that doesn't have any requirement is available in this repository's releases. (C#.exe in win-x64) - -## HOW TO RUN - -You can run the Python, the (built) C# script or the Ruby script by simply double clicking on it or going into the command-line, moving into the right directory and entering the file name. - -To run the Node.js script or the (unbuilt) C# script, you will have to use the command-line. - -```sh -$ cd Website-Finder/Node.js -$ node index.js -``` -```sh -$ cd Website-Finder/C# -$ dotnet run -``` - -For the Javascript script, you can: - -* Run the HTML file into your web browser, by either double-clicking it or by dragging the file into it -* Visit that same file hosted on [GitHub Pages](https://tttaevas.github.io/Website-Finder/Javascript/index.html) - -Please keep in mind that **any website that this software finds can contain dangerous resources, therefore please proceed with caution, [update your web browser](https://www.whatismybrowser.com/), use an [adblocker](https://ublockorigin.com/) and [check the URL](https://www.virustotal.com/gui/home/url) for anything dangerous.** I am not responsible for anything my software finds for you. +Website-Finder is a collection of light scripts written in various programming languages without the need for external libraries that finds websites of all sorts for you and automatically makes reports of that, usually in the form of a json file ## ARGUMENTS -You can use arguments by launching the scripts through the command-line. +You can use arguments when launching scripts through the cli/terminal -- "-t" defines the number of URLs the script will go through. -- "-d" defines all the top-level domains the URLs will use, separated only by a ",". -- "-m" defines the application protocol used. Multiple protocols can be defined by separating them with a ",". -- "-l" defines by whether or not it is present whether or not all URLs will be logged in the command-line. -- "-s" defines how likely it will be that the URLs feature a second level domain, <=0 being impossible and >=100 being always. -- "-MIN" defines the minimum length of the URLs. -- "-MAX" defines the maximul length of the URLs. +- "-t" (times) defines the number of URLs the script will go through +- "-p" (protocols) defines the application protocol used, multiple protocols can be defined by separating them with a "," +- "-d" (domains) defines all the top-level and second-level domains the URLs will use, separated only by a "," +- "-s" (second) defines how likely it will be that the URLs feature a second-level domain, 0 being impossible and 100 being always +- "-l" (log) will make all URLs be logged in the cli/terminal if it's present +- "-min" (minimum) defines the minimum length of the URLs +- "-max" (maximum) defines the maximul length of the URLs -* "-t" defaults to 3000. -* "-d" defaults to a lot of popular top-level domains. -* "-m" defaults to "http", but the Javascript script defaults to "https" due to [requests made with the "http" application protocol being blocked when not run locally](https://developer.mozilla.org/en-US/docs/Web/Security/Mixed_content). -* "-l" is off by default. -* "-s" defaults to 1. -* "-MIN" defaults to 2. -* "-MAX" defaults to 50. - -Using arguments with the Javascript script is simple, as you can enter values in labeled fields. Leaving those fields empty will make the script use the default values. - -```sh -# To make the Python script go through 3000 URLs in HTTP with various top-level domains without logging: -$ index.py - -# To make the Ruby script go through 500 URLs of min length 5 and max length 7 in HTTP and HTTPS with only the .com and .fr top-level domains with a 30% chance for each URL to feature a second level domain with logging: -$ index.rb -MAX 7 -t 500 -MIN 5 -m http,https -l -s 30 -d .com,.fr - -# To make the Node.js script go through 3000 URLs in HTTPS with various top-level domains with logging: -$ node index.js -m https -l - -# To make the (built) C# script go through 100 URLs with the .com top-level domains with a 0% chance for each URL to feature a second level domain without logging: -$ C#.exe -d .com -s 0 -``` +Default values can be found in [defaults.json](./defaults.json) ## REPORTS -Once a script is done running, it will generate a .json report in its folder. +Once a script is done running, it will fill a .json report in its directory -The Javascript script generates the report in real-time on the webpage, in the black box on the right side of the screen. +## OTHER STUFF -## FAQ - -Q: Is there a script that is better than the other? -A: As far as I am aware, nope! However, the reports are generated differently depending of the script and some websites send different codes depending of the script. - -Q: Why does the "-m" argument default to "http" rather than "https"? -A: Requests in "http" receive more status codes than error codes compared to "https". I suspect it's because some websites don't support "https" very well, even in the current year. +For information exclusive to a script, read the README.md file in its directory diff --git a/Ruby/README.md b/Ruby/README.md new file mode 100644 index 0000000..5529dc8 --- /dev/null +++ b/Ruby/README.md @@ -0,0 +1,15 @@ +# Website-Finder: Ruby + +## REQUIREMENT + +Ruby ([Windows Installer](https://rubyinstaller.org/downloads)) ([Snap Store for Linux](https://snapcraft.io/ruby)) + +## HOW TO RUN + +```sh +$ ruby index.rb +``` + +## OTHER STUFF + +More details are available on [the readme in the root folder](../README.md) diff --git a/Ruby/index.rb b/Ruby/index.rb index f18ff02..fe51f1b 100644 --- a/Ruby/index.rb +++ b/Ruby/index.rb @@ -22,25 +22,25 @@ def main_loop end def url_generator() - result = MODE[rand(0...MODE.length)] + '://' - url_length = rand(MINI..MAXI) + result = PROTOCOLS[rand(0...PROTOCOLS.length)] + '://' + url_length = rand(MIN..MAX) result += rand(36 ** url_length).to_s(36) result += DOMAINS[rand(0...DOMAINS.length)] if rand(1...100) <= SECOND result += DOMAINS[rand(0...DOMAINS.length)] end TIMES = ARGV.include?('-t') ? ARGV[ARGV.index('-t') + 1].to_i : 3000 +PROTOCOLS = ARGV.include?('-p') ? ARGV[ARGV.index('-p') + 1].split(",") : ['http'] DOMAINS = ARGV.include?('-d') ? ARGV[ARGV.index('-d') + 1].split(",") : ['.co', '.com', '.net', '.edu', '.gov', '.cn', '.org', '.cc', '.us', '.mil', '.ac', '.it', '.de'] -MODE = ARGV.include?('-m') ? ARGV[ARGV.index('-m') + 1].split(",") : ['http'] -LOG = ARGV.index('-l').class == Integer -MINI = ARGV.include?('-MIN') ? ARGV[ARGV.index('-MIN') + 1].to_i : 2 -MAXI = ARGV.include?('-MAX') ? ARGV[ARGV.index('-MAX') + 1].to_i : 50 SECOND = ARGV.include?('-s') ? ARGV[ARGV.index('-s') + 1].to_i : 1 +LOG = ARGV.index('-l').class == Integer +MIN = ARGV.include?('-min') ? ARGV[ARGV.index('-max') + 1].to_i : 2 +MAX = ARGV.include?('-min') ? ARGV[ARGV.index('-max') + 1].to_i : 50 REPORT_FILE = "RB_report_#{Time.new.day}#{Time.new.hour}#{Time.new.min}.json" -puts("\nI am going to look for websites through #{TIMES} random URLs (min length #{MINI} and max length #{MAXI}) with the following domains: #{DOMAINS}") -puts("These URLs will use the protocols #{MODE} and each of those URLs have #{SECOND} in 100 chance to have a second level domain.") +puts("\nI am going to look for websites through #{TIMES} random URLs (min length #{MIN} and max length #{MAX}) with the following domains: #{DOMAINS}") +puts("These URLs will use the protocols #{PROTOCOLS} and each of those URLs have #{SECOND} in 100 chance to have a second level domain.") puts("Started at #{Time.new.hour}h#{Time.new.min}m\n") File.open(REPORT_FILE, 'a+') diff --git a/index.html b/index.html index 1985242..d00c805 100644 --- a/index.html +++ b/index.html @@ -1,5 +1,5 @@ - - +This file is not necessary for anything else, you may delete it if you find it useless -->