2020-07-15 19:28:36 +02:00
require 'net/http'
2020-07-21 00:00:09 +02:00
require 'json'
2020-07-15 19:28:36 +02:00
def main_loop
2020-07-21 00:00:09 +02:00
json_object = [ ]
2021-03-07 16:30:48 +01:00
TIMES . times do | i |
2020-07-24 01:37:47 +02:00
url = url_generator ( )
2021-03-07 16:30:48 +01:00
puts ( " #{ url } ( #{ i + 1 } / #{ TIMES } ) " ) if LOG
2020-07-15 19:28:36 +02:00
begin
response = Net :: HTTP . get_response ( URI ( url ) )
puts ( " #{ url } exists! " )
2020-07-21 00:00:09 +02:00
json_object << Hash [ " website_url " = > url , " response_type " = > " SUCCESS " , " response_code " = > response . code , " response_details " = > response . message ]
2021-03-07 16:30:48 +01:00
rescue Exception = > e # Unlike Node/PY, the number of existing websites that raise exceptions is small
2020-07-15 19:28:36 +02:00
if e . class != SocketError
puts ( " #{ url } exists! " )
2020-07-21 00:00:09 +02:00
json_object << Hash [ " website_url " = > url , " response_type " = > " ERROR " , " response_code " = > e . class . to_s , " response_details " = > e . to_s ]
2020-07-15 19:28:36 +02:00
end
end
end
2020-07-21 00:00:09 +02:00
File . open ( REPORT_FILE , 'a+' ) { | f | f << json_object . to_json } if json_object . any?
2020-07-15 19:28:36 +02:00
puts ( " Finished at #{ Time . new . hour } h #{ Time . new . min } m \n " )
end
2020-07-24 01:37:47 +02:00
def url_generator ( )
2022-12-11 20:00:36 +01:00
result = PROTOCOLS [ rand ( 0 ... PROTOCOLS . length ) ] + '://'
url_length = rand ( MIN .. MAX )
2020-07-15 19:28:36 +02:00
result += rand ( 36 ** url_length ) . to_s ( 36 )
2022-12-12 21:03:40 +01:00
result += " . " + DOMAINS [ rand ( 0 ... DOMAINS . length ) ] if rand ( 1 ... 100 ) < = SECOND
result += " . " + DOMAINS [ rand ( 0 ... DOMAINS . length ) ]
2020-07-15 19:28:36 +02:00
end
2022-12-12 21:03:40 +01:00
DEFAULTS = JSON . parse ( File . read ( " ../defaults.json " ) )
TIMES = ARGV . include? ( '-t' ) ? ARGV [ ARGV . index ( '-t' ) + 1 ] . to_i : DEFAULTS [ " times " ]
PROTOCOLS = ARGV . include? ( '-p' ) ? ARGV [ ARGV . index ( '-p' ) + 1 ] . split ( " , " ) : DEFAULTS [ " protocols " ]
DOMAINS = ARGV . include? ( '-d' ) ? ARGV [ ARGV . index ( '-d' ) + 1 ] . split ( " , " ) : DEFAULTS [ " domains " ]
SECOND = ARGV . include? ( '-s' ) ? ARGV [ ARGV . index ( '-s' ) + 1 ] . to_i : DEFAULTS [ " second " ]
LOG = ARGV . include? ( '-l' ) ? true : DEFAULTS [ " log " ]
MIN = ARGV . include? ( '-min' ) ? ARGV [ ARGV . index ( '-min' ) + 1 ] . to_i : DEFAULTS [ " min " ]
MAX = ARGV . include? ( '-max' ) ? ARGV [ ARGV . index ( '-max' ) + 1 ] . to_i : DEFAULTS [ " max " ]
2020-07-21 00:00:09 +02:00
2022-12-12 21:03:40 +01:00
DATE = Time . new
2022-12-11 20:00:36 +01:00
puts ( " \n I am going to look for websites through #{ TIMES } random URLs (min length #{ MIN } and max length #{ MAX } ) with the following domains: #{ DOMAINS } " )
2022-12-12 21:03:40 +01:00
puts ( " These URLs will use the protocols #{ PROTOCOLS } and each of those URLs have #{ SECOND } in 100 chance to have a second level domain " )
puts ( " Started at #{ DATE . hour } h #{ DATE . min } m \n " )
2020-07-15 19:28:36 +02:00
2022-12-12 21:03:40 +01:00
REPORT_FILE = " RB_report_ #{ DATE . day } #{ DATE . hour } #{ DATE . min } .json "
2020-07-21 00:00:09 +02:00
File . open ( REPORT_FILE , 'a+' )
2020-07-15 19:28:36 +02:00
main_loop