2020-07-15 19:28:36 +02:00
require 'net/http'
2020-07-21 00:00:09 +02:00
require 'json'
2020-07-15 19:28:36 +02:00
def main_loop
2020-07-21 00:00:09 +02:00
json_object = [ ]
2021-03-07 16:30:48 +01:00
TIMES . times do | i |
2020-07-24 01:37:47 +02:00
url = url_generator ( )
2021-03-07 16:30:48 +01:00
puts ( " #{ url } ( #{ i + 1 } / #{ TIMES } ) " ) if LOG
2020-07-15 19:28:36 +02:00
begin
response = Net :: HTTP . get_response ( URI ( url ) )
puts ( " #{ url } exists! " )
2020-07-21 00:00:09 +02:00
json_object << Hash [ " website_url " = > url , " response_type " = > " SUCCESS " , " response_code " = > response . code , " response_details " = > response . message ]
2021-03-07 16:30:48 +01:00
rescue Exception = > e # Unlike Node/PY, the number of existing websites that raise exceptions is small
2020-07-15 19:28:36 +02:00
if e . class != SocketError
puts ( " #{ url } exists! " )
2020-07-21 00:00:09 +02:00
json_object << Hash [ " website_url " = > url , " response_type " = > " ERROR " , " response_code " = > e . class . to_s , " response_details " = > e . to_s ]
2020-07-15 19:28:36 +02:00
end
end
end
2020-07-21 00:00:09 +02:00
File . open ( REPORT_FILE , 'a+' ) { | f | f << json_object . to_json } if json_object . any?
2020-07-15 19:28:36 +02:00
puts ( " Finished at #{ Time . new . hour } h #{ Time . new . min } m \n " )
end
2020-07-24 01:37:47 +02:00
def url_generator ( )
result = MODE [ rand ( 0 ... MODE . length ) ] + '://'
url_length = rand ( MINI .. MAXI )
2020-07-15 19:28:36 +02:00
result += rand ( 36 ** url_length ) . to_s ( 36 )
2020-07-24 01:37:47 +02:00
result += DOMAINS [ rand ( 0 ... DOMAINS . length ) ] if rand ( 1 ... 100 ) < = SECOND
result += DOMAINS [ rand ( 0 ... DOMAINS . length ) ]
2020-07-15 19:28:36 +02:00
end
2020-07-24 01:37:47 +02:00
TIMES = ARGV . include? ( '-t' ) ? ARGV [ ARGV . index ( '-t' ) + 1 ] . to_i : 3000
DOMAINS = ARGV . include? ( '-d' ) ? ARGV [ ARGV . index ( '-d' ) + 1 ] . split ( " , " ) : [ '.co' , '.com' , '.net' , '.edu' , '.gov' , '.cn' , '.org' , '.cc' , '.us' , '.mil' , '.ac' , '.it' , '.de' ]
MODE = ARGV . include? ( '-m' ) ? ARGV [ ARGV . index ( '-m' ) + 1 ] . split ( " , " ) : [ 'http' ]
LOG = ARGV . index ( '-l' ) . class == Integer
MINI = ARGV . include? ( '-MIN' ) ? ARGV [ ARGV . index ( '-MIN' ) + 1 ] . to_i : 2
MAXI = ARGV . include? ( '-MAX' ) ? ARGV [ ARGV . index ( '-MAX' ) + 1 ] . to_i : 50
SECOND = ARGV . include? ( '-s' ) ? ARGV [ ARGV . index ( '-s' ) + 1 ] . to_i : 1
2020-07-15 19:28:36 +02:00
2020-07-21 00:00:09 +02:00
REPORT_FILE = " RB_report_ #{ Time . new . day } #{ Time . new . hour } #{ Time . new . min } .json "
2020-07-24 01:37:47 +02:00
puts ( " \n I am going to look for websites through #{ TIMES } random URLs (min length #{ MINI } and max length #{ MAXI } ) with the following domains: #{ DOMAINS } " )
puts ( " These URLs will use the protocols #{ MODE } and each of those URLs have #{ SECOND } in 100 chance to have a second level domain. " )
2020-07-15 19:28:36 +02:00
puts ( " Started at #{ Time . new . hour } h #{ Time . new . min } m \n " )
2020-07-21 00:00:09 +02:00
File . open ( REPORT_FILE , 'a+' )
2020-07-15 19:28:36 +02:00
main_loop