2022-12-09 19:51:52 +01:00
require " option_parser "
require " http "
require " uri "
require " json "
2022-12-11 20:00:36 +01:00
def main_loop ( times , domains , protocols , log , min , max , second , report_file )
2022-12-09 19:51:52 +01:00
json_text = JSON . build do | json |
json . array do
i = 0
while i < times
2022-12-12 21:03:40 +01:00
url = url_generator ( domains , protocols , min , max , second )
2022-12-09 19:51:52 +01:00
puts " #{ url } ( #{ i + 1 } / #{ times } ) " if log
client = HTTP :: Client . new ( URI . parse url )
client . connect_timeout = 40 . seconds
begin
response = client . get ( " / " )
puts " #{ url } exists! "
json . object do
json . field " website_url " , url
json . field " response_type " , " SUCCESS "
2022-12-11 20:00:36 +01:00
json . field " response_code " , " #{ response . status_code } "
2022-12-09 19:51:52 +01:00
json . field " response_details " , HTTP :: Status . new ( response . status_code )
end
rescue e : Socket :: Addrinfo :: Error
# Website essentially does not exist
rescue err : Socket :: Error | IO :: TimeoutError
# Website essentially does exist
puts " #{ url } exists! "
json . object do
json . field " website_url " , url
json . field " response_type " , " ERROR "
json . field " response_code " , " UNKNOWN " # afaik no way to get a status code out of an exception
json . field " response_details " , err . message
end
ensure
i += 1
end
end
end
end
File . write ( report_file , json_text )
end_date = Time . local
puts " \n Finished at #{ end_date . hour } h #{ end_date . minute } m "
end
2022-12-11 20:00:36 +01:00
def url_generator ( domains , protocols , min , max , second )
2022-12-09 19:51:52 +01:00
result = String . build do | str |
2022-12-12 21:03:40 +01:00
str << " #{ protocols [ Random . rand ( protocols . size ) ] } :// "
2022-12-11 20:00:36 +01:00
url_length = Random . rand ( min .. max )
2022-12-09 19:51:52 +01:00
characters = " abcdefghijklmnopqrstuvwxyz0123456789 "
i = 0
while i < url_length
str << characters [ Random . rand ( characters . size - 1 ) ]
i += 1
end
2022-12-12 21:03:40 +01:00
str << " . #{ domains [ Random . rand ( domains . size ) ] } " if Random . rand ( 1 .. 100 ) <= second
str << " . #{ domains [ Random . rand ( domains . size ) ] } "
2022-12-09 19:51:52 +01:00
end
result
end
2022-12-12 21:03:40 +01:00
defaults = JSON . parse ( File . read ( " ../defaults.json " ) )
times = defaults [ " times " ] . as_i
protocols = defaults [ " protocols " ] . as_a
domains = defaults [ " domains " ] . as_a
second = defaults [ " second " ] . as_i
log = defaults [ " log " ] . as_bool
min = defaults [ " min " ] . as_i
max = defaults [ " max " ] . as_i
2022-12-09 19:51:52 +01:00
OptionParser . parse do | parser |
parser . banner = " Website-Finder "
parser . on " -h " , " --help " , " Show help " do
puts parser
exit
end
2022-12-12 21:03:40 +01:00
parser . on ( " -t TIMES " , " --times=TIMES " , " Number of requests / DEFAULT: #{ times } " ) { | p_times | times = p_times . to_i }
2022-12-09 19:51:52 +01:00
parser . on ( " -d DOMAINS " , " --domains=DOMAINS " , " Domains used in URLS, like: .com,.net,.gov / DEFAULT: #{ domains } " ) { | p_domains | domains = p_domains . split ( " , " ) }
2022-12-11 20:00:36 +01:00
parser . on ( " -p protocols " , " --protocols=PROTOCOLS " , " You may choose between: http | https | http,https / DEFAULT: #{ protocols } " ) { | p_protocols | protocols = p_protocols . split ( " , " ) }
2022-12-09 19:51:52 +01:00
parser . on ( " -l " , " --log " , " Log all requests / DEFAULT: #{ log } " ) { log = true }
2022-12-12 21:03:40 +01:00
parser . on ( " " , " --min=LENGTH " , " Minimum length of URLs / DEFAULT: #{ min } " ) { | p_length | min = p_length . to_i }
parser . on ( " " , " --max=LENGTH " , " Maximum length of URLs / DEFAULT: #{ max } " ) { | p_length | max = p_length . to_i }
parser . on ( " -s SECOND " , " --second=SECOND " , " Likelihood of a URL featuring a second-level domain / DEFAULT: #{ second } " ) { | p_second | second = p_second . to_i }
2022-12-09 19:51:52 +01:00
end
date = Time . local
2022-12-11 20:00:36 +01:00
puts " \n I am going to look for websites through #{ times } random URLs (min length #{ min } and max length #{ max } with the following domains: #{ domains } "
puts " These URLs will use the protocols #{ protocols } "
2022-12-12 21:03:40 +01:00
puts " and each of those URLs have #{ second } in a 100 chance to have a second level domain "
2022-12-09 19:51:52 +01:00
puts " Started at #{ date . hour } h #{ date . minute } m \n "
report_file = " CR_report_ #{ date . day } #{ date . hour } #{ date . minute } .json "
2022-12-11 20:00:36 +01:00
main_loop ( times , domains , protocols , log , min , max , second , report_file )