-
Notifications
You must be signed in to change notification settings - Fork 8
Expand file tree
/
Copy pathdeep-web-scanner.py
More file actions
241 lines (204 loc) · 8.54 KB
/
deep-web-scanner.py
File metadata and controls
241 lines (204 loc) · 8.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
import argparse
import bs4
import os
import colorama
from concurrent.futures import ThreadPoolExecutor
import threading
import ipaddress
import socket
import time
import random
from typing import Optional, Union
import requests
requests.packages.urllib3.disable_warnings() # type: ignore
colorama.init(autoreset=True)
folder = os.path.dirname(__file__)
output_strings: list[str] = []
ports = [80, 443, 8080, 8081, 4434, 3443, 8443, 3000, 4000, 5000]
workers = len(ports) * 20
keywords = ["cam", "rasp", " hp ", "system", "index of", "dashboard"]
output_tmp = ""
last_write = time.time()
global_lock = threading.Lock()
banner_targets: list[dict[str, Union[str, int]]] = []
def main():
print("----------------------------")
print(" Deep Web Scanner! ")
print("----------------------------\n")
print("Every active webserver url will be logged in the output file.")
print("This terminal will only show urls/metadata with the following keywords: " + ", ".join(keywords))
if indexof.lower() == "true":
print("'Index of /' filenames will be logged!")
print("Scan will start...")
with open(input_file, "r") as myfile:
content = myfile.readlines()
# randomize this list if arg is true
if randomize:
random.shuffle(content)
for line in content:
# split ip range 2.56.20.0-2.56.23.255
if "-" in line:
ip_range_array = line.split("-")
ip_range_start = ip_range_array[0].strip()
ip_range_end = ip_range_array[1].strip()
print(
f"Start scan from range: {ip_range_start} - {ip_range_end}")
current_ip = ipaddress.IPv4Address(ip_range_start)
end_ip = ipaddress.IPv4Address(ip_range_end)
with ThreadPoolExecutor(max_workers=workers) as executor_portcheck:
while current_ip < end_ip:
executor_portcheck.submit(
start_portcheck, current_ip.exploded)
current_ip += 1
elif "/" in line:
ip_range = ipaddress.ip_network(line.strip())
with ThreadPoolExecutor(max_workers=workers) as executor_portcheck:
for ip in ip_range.hosts():
executor_portcheck.submit(start_portcheck, ip.exploded)
else:
print(
"No valid input file! Should be something like 2.56.20.0-2.56.23.255 per line!")
global banner_targets
print(f"{len(banner_targets)} responses")
with ThreadPoolExecutor(max_workers=10) as executor_getbanner:
for target in banner_targets:
executor_getbanner.submit(
start_request, target["ip"], target["port"]) # type: ignore
banner_targets.clear()
write_line("", True)
def start_portcheck(ip: str) -> None:
global banner_targets
# fast webserver port checking
for port in ports:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
sock.settimeout(2)
result = sock.connect_ex((ip, port))
if result == 0:
# queue normal browser request
banner_targets.append({"ip": ip, "port": port})
def start_request(ip: str, port: int) -> None:
# check for running websites
try:
url = "https://" + ip + ":" + str(port)
if str(port).__contains__("80"):
url = "http://" + ip + ":" + str(port)
site_result = request_url(url)
if not isinstance(site_result, bool) and site_result is not False:
# if the site is reachable get some information
get_banner(site_result[0], site_result[1])
except Exception as e:
print(f"IP: {ip}, Port: {str(port)}")
print(e)
def request_url(url: str) -> Union[tuple[requests.Response, bs4.BeautifulSoup], bool]:
# request url and return the response
session = requests.session()
session.headers[
"User-Agent"
] = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.152 Safari/537.36"
header = session.head(url=url, timeout=5, verify=False)
# check content type
one_allowed_content_type = False
content_type_header = header.headers.get("content-type")
if content_type_header is not None:
for allowed_content_type in ["html", "plain", "text", "json"]:
if allowed_content_type in content_type_header.lower():
one_allowed_content_type = True
if not one_allowed_content_type:
return False
else:
return False
response = session.get(url=url, timeout=10, verify=False)
session.close()
try:
soup = bs4.BeautifulSoup(response.text, "html.parser")
return (response, soup)
except Exception:
soup = bs4.BeautifulSoup("", "html.parser")
return (response, soup)
def get_banner(request: requests.Response, soup: bs4.BeautifulSoup):
# get banner information, show console output and save them to file
banner_array: list[str] = []
banner_array.append(request.url)
if soup:
server_header = request.headers.get("Server")
if isinstance(server_header, str):
banner_array.append(server_header)
title = soup.find("title")
if isinstance(title, bs4.Tag):
title = title.get_text().strip().replace("\n", "")
banner_array.append(title)
meta_tags: bs4.element.ResultSet[bs4.Tag] = soup.find_all(
"meta", attrs={"name": "generator"})
if len(meta_tags) > 0:
for meta_tag in meta_tags:
attrs = meta_tag.attr
if isinstance(attrs, bs4.Tag):
generator = attrs.get("content")
if isinstance(generator, str):
banner_array.append(generator)
# has this site a password field?
password_fields = soup.find_all(attrs={"type": "password"})
if len(password_fields) > 0:
banner_array.append("login required")
# check for "index of" websites and show root files/folders
global indexof
if indexof.lower() == "true" and "index of" in request.text.lower():
a_array: list[bs4.Tag] = soup.find_all("a")
for a in a_array:
href = a.attrs.get("href")
if isinstance(href, str):
if href.find("?") != 0:
banner_array.append(href)
banner_array.append(f"{str(len(request.content))} content size")
fullstring = ", ".join(banner_array)
if fullstring not in output_strings:
output_strings.append(fullstring)
for keyword in keywords:
if keyword in fullstring.lower():
if "login required" in fullstring:
print(colorama.Fore.RED + fullstring)
elif "Index of /" in fullstring:
print(colorama.Fore.YELLOW + fullstring)
else:
print(colorama.Fore.GREEN + fullstring)
write_line(fullstring)
def write_line(line: str, force: Optional[bool] = False):
# buffers and writes output to file
global output_tmp, last_write
output_tmp += line + "\n"
if last_write + 30 < time.time() or force:
last_write = time.time()
while global_lock.locked():
continue
global_lock.acquire()
lines_to_write = output_tmp.count("\n")
with open(output_file, "a") as output_1:
output_1.write(output_tmp)
output_tmp = ""
if lines_to_write > 1:
print(f"{lines_to_write} webservers found and written to file")
else:
print(f"{lines_to_write} webserver found and written to file")
global_lock.release()
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Check if domain has an active website and grab banner."
)
parser.add_argument(
"-i", type=str, default="./asn-country-ipv4.csv", help="Path to input file"
)
parser.add_argument(
"-o", type=str, default="./deep-web.txt", help="Path to output file"
)
parser.add_argument(
"-indexof", type=str, default="true", help="Show files from index of sites"
)
parser.add_argument(
"-randomize", type=str, default="true", help="Randomize input list entries"
)
args = parser.parse_args()
input_file = args.i
output_file = args.o
indexof = args.indexof
randomize = args.randomize
main()