1
+ import os
2
+ import requests
3
+ import hashlib
4
+ import pandas as pd
5
+
6
+ class virtual_host_scanner (object ):
7
+ """Virtual host scanning class
8
+
9
+ Virtual host scanner has the following properties:
10
+
11
+ Attributes:
12
+ wordlist: location to a wordlist file to use with scans
13
+ target: the target for scanning
14
+ port: the port to scan. Defaults to 80
15
+ ignore_http_codes: commad seperated list of http codes to ignore
16
+ ignore_content_length: integer value of content length to ignore
17
+ output: folder to write output file to
18
+ """
19
+
20
+ def __init__ (self , target , output , port = 80 , unique_depth = 1 , ignore_http_codes = '404' , ignore_content_length = 0 ,
21
+ wordlist = "./wordlists/virtual-host-scanning.txt" ):
22
+ self .target = target
23
+ self .output = output + '/' + target + '_virtualhosts.txt'
24
+ self .port = port
25
+ self .ignore_http_codes = list (map (int , ignore_http_codes .replace (' ' , '' ).split (',' )))
26
+ self .ignore_content_length = ignore_content_length
27
+ self .wordlist = wordlist
28
+ self .unique_depth = unique_depth
29
+
30
+ self .completed_scan = False
31
+ self .results = []
32
+
33
+ def scan (self ):
34
+ print ("[+] Starting virtual host scan for %s using port %s and wordlist %s" % (self .target , str (self .port ), self .wordlist ))
35
+ print ("[>] Ignoring HTTP codes: %s" % (self .ignore_http_codes ))
36
+ if (self .ignore_content_length > 0 ):
37
+ print ("[>] Ignoring Content length: %s" % (self .ignore_content_length ))
38
+
39
+ if not os .path .exists (self .wordlist ):
40
+ print ("[!] Wordlist %s doesn't exist, ending scan." % self .wordlist )
41
+ return
42
+
43
+ virtual_host_list = open (self .wordlist ).read ().splitlines ()
44
+
45
+ for virtual_host in virtual_host_list :
46
+ hostname = virtual_host .replace ('%s' , self .target )
47
+
48
+ headers = {
49
+ 'Host' : hostname if self .port == 80 else '{}:{}' .format (hostname , self .port ),
50
+ 'Accept' : '*/*'
51
+ }
52
+
53
+ # todo: to be made redundant/replaced with a --ssl flag? Current implementation limits ssl severely
54
+ dest_url = '{}://{}:{}/' .format ('https' if int (self .port ) == 443 else 'http' , self .target , self .port )
55
+
56
+ try :
57
+ res = requests .get (dest_url , headers = headers , verify = False )
58
+ except requests .exceptions .RequestException :
59
+ continue
60
+
61
+ if res .status_code in self .ignore_http_codes :
62
+ continue
63
+
64
+ if self .ignore_content_length > 0 and self .ignore_content_length == int (res .headers .get ('content-length' )):
65
+ continue
66
+
67
+ # hash the page results to aid in identifing unique content
68
+ page_hash = hashlib .sha256 (res .text .encode ('utf-8' )).hexdigest ()
69
+ output = '[#] Found: {} (code: {}, length: {}, hash: {})' .format (hostname , res .status_code ,
70
+ res .headers .get ('content-length' ), page_hash )
71
+
72
+ # print current results
73
+ print (output )
74
+ for key , val in res .headers .items ():
75
+ output = ' {}: {}' .format (key , val )
76
+ print (output )
77
+
78
+ # add url and hash into array for likely matches
79
+ self .results .append (hostname + ',' + page_hash )
80
+
81
+ self .completed_scan = True
82
+
83
+
84
+ def likely_matches (self ):
85
+ if self .completed_scan is False :
86
+ print ("Likely matches cannot be printed as a scan has not yet been run." )
87
+ return
88
+
89
+ print ("\n [#] Most likely matches with a unique count of %s or less:" % self .unique_depth )
90
+
91
+ d = {}
92
+
93
+ for item in self .results :
94
+ r = item .split ("," )
95
+ d [r [0 ]]= r [1 ]
96
+
97
+ df = pd .DataFrame ([[key , value ] for key , value in d .items ()], columns = ["key_col" , "val_col" ])
98
+ d = df .groupby ("val_col" ).filter (lambda x : len (x ) <= self .unique_depth )
99
+ matches = ((d ["key_col" ].values ).tolist ())
100
+
101
+ return matches
0 commit comments