-
-
Notifications
You must be signed in to change notification settings - Fork 777
Expand file tree
/
Copy pathhttpx.py
More file actions
235 lines (201 loc) · 8.31 KB
/
httpx.py
File metadata and controls
235 lines (201 loc) · 8.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
import re
import orjson
import tempfile
import subprocess
from pathlib import Path
from http.cookies import SimpleCookie
from bbot.modules.base import BaseModule
class httpx(BaseModule):
watched_events = ["OPEN_TCP_PORT", "URL_UNVERIFIED", "URL"]
produced_events = ["URL", "HTTP_RESPONSE"]
flags = ["active", "safe", "web-basic", "social-enum", "subdomain-enum", "cloud-enum"]
meta = {
"description": "Visit webpages. Many other modules rely on httpx",
"created_date": "2022-07-08",
"author": "@TheTechromancer",
}
options = {
"threads": 50,
"in_scope_only": True,
"version": "1.2.5",
"max_response_size": 5242880,
"store_responses": False,
"probe_all_ips": False,
}
options_desc = {
"threads": "Number of httpx threads to use",
"in_scope_only": "Only visit web reparents that are in scope.",
"version": "httpx version",
"max_response_size": "Max response size in bytes",
"store_responses": "Save raw HTTP responses to scan folder",
"probe_all_ips": "Probe all the ips associated with same host",
}
deps_ansible = [
{
"name": "Download httpx",
"unarchive": {
"src": "https://github.com/projectdiscovery/httpx/releases/download/v#{BBOT_MODULES_HTTPX_VERSION}/httpx_#{BBOT_MODULES_HTTPX_VERSION}_#{BBOT_OS}_#{BBOT_CPU_ARCH_GOLANG}.zip",
"include": "httpx",
"dest": "#{BBOT_TOOLS}",
"remote_src": True,
},
}
]
scope_distance_modifier = 2
_shuffle_incoming_queue = False
_batch_size = 500
_priority = 2
# accept Javascript URLs
accept_url_special = True
async def setup(self):
self.threads = self.config.get("threads", 50)
self.max_response_size = self.config.get("max_response_size", 5242880)
self.store_responses = self.config.get("store_responses", False)
self.probe_all_ips = self.config.get("probe_all_ips", False)
self.httpx_tempdir_regex = re.compile(r"^httpx\d+$")
return True
async def filter_event(self, event):
if "_wildcard" in str(event.host).split("."):
return False, "event is wildcard"
if "unresolved" in event.tags:
return False, "event is unresolved"
if event.module == self:
return False, "event is from self"
if "spider-max" in event.tags:
return False, "event exceeds spidering limits"
# scope filtering
in_scope_only = self.config.get("in_scope_only", True)
if "httpx-safe" in event.tags:
return True
max_scope_distance = 0 if in_scope_only else (self.scan.scope_search_distance + 1)
if event.scope_distance > max_scope_distance:
return False, "event is not in scope"
return True
def make_url_metadata(self, event):
has_spider_max = "spider-max" in event.tags
url_hash = None
if event.type.startswith("URL"):
# we NEED the port, otherwise httpx will try HTTPS even for HTTP URLs
url = event.with_port().geturl()
if event.parsed_url.path == "/":
url_hash = hash((event.host, event.port, has_spider_max))
else:
url = str(event.data)
url_hash = hash((event.host, event.port, has_spider_max))
if url_hash is None:
url_hash = hash((url, has_spider_max))
return url, url_hash
def _incoming_dedup_hash(self, event):
url, url_hash = self.make_url_metadata(event)
return url_hash
async def handle_batch(self, *events):
stdin = {}
for event in events:
url, url_hash = self.make_url_metadata(event)
stdin[url] = event
if not stdin:
return
command = [
"httpx",
"-silent",
"-json",
"-include-response",
"-threads",
self.threads,
"-timeout",
self.scan.httpx_timeout,
"-retries",
self.scan.httpx_retries,
"-header",
f"User-Agent: {self.scan.useragent}",
"-response-size-to-read",
f"{self.max_response_size}",
]
if self.store_responses:
response_dir = self.scan.home / "httpx"
self.helpers.mkdir(response_dir)
command += ["-srd", str(response_dir)]
dns_resolvers = ",".join(self.helpers.system_resolvers)
if dns_resolvers:
command += ["-r", dns_resolvers]
if self.probe_all_ips:
command += ["-probe-all-ips"]
# Add custom HTTP headers
for hk, hv in self.scan.custom_http_headers.items():
command += ["-header", f"{hk}: {hv}"]
# Add custom HTTP cookies as a single header
if self.scan.custom_http_cookies:
cookie = SimpleCookie()
for ck, cv in self.scan.custom_http_cookies.items():
cookie[ck] = cv
# Build the cookie header
cookie_header = f"Cookie: {cookie.output(header='', sep='; ').strip()}"
command += ["-header", cookie_header]
proxy = self.scan.http_proxy
if proxy:
command += ["-http-proxy", proxy]
async for line in self.run_process_live(command, text=False, input=list(stdin), stderr=subprocess.DEVNULL):
try:
j = await self.helpers.run_in_executor(orjson.loads, line)
except orjson.JSONDecodeError:
self.warning(f"httpx failed to decode line: {line}")
continue
url = j.get("url", "")
status_code = int(j.get("status_code", 0))
if status_code == 0:
self.debug(f'No HTTP status code for "{url}"')
continue
parent_event = stdin.get(j.get("input", ""), None)
if parent_event is None:
self.warning(f"Unable to correlate parent event from: {line}")
continue
# discard 404s from unverified URLs
path = j.get("path", "/")
if parent_event.type == "URL_UNVERIFIED" and status_code in (404,) and path != "/":
self.debug(f'Discarding 404 from "{url}"')
continue
# discard 4xx responses that contain WAF strings
if 400 <= status_code < 500:
body = j.get("body", "")
if any(ws in body for ws in self.helpers.get_waf_strings()):
self.debug(f'Discarding WAF {status_code} from "{url}"')
continue
# main URL
tags = [f"status-{status_code}"]
httpx_ip = j.get("host", "")
if httpx_ip:
tags.append(f"ip-{httpx_ip}")
# grab title
title = self.helpers.tagify(j.get("title", ""), maxlen=30)
if title:
tags.append(f"http-title-{title}")
url_context = "{module} visited {event.parent.data} and got status code {event.http_status}"
if parent_event.type == "OPEN_TCP_PORT":
url_context += " at {event.data}"
url_event = self.make_event(
url,
"URL",
parent_event,
tags=tags,
context=url_context,
)
if url_event:
if url_event != parent_event:
await self.emit_event(url_event)
# HTTP response
content_type = j.get("header", {}).get("content_type", "unspecified").split(";")[0]
content_length = j.get("content_length", 0)
content_length = self.helpers.bytes_to_human(content_length)
await self.emit_event(
j,
"HTTP_RESPONSE",
url_event,
tags=url_event.tags,
context=f"HTTP_RESPONSE was {content_length} with {content_type} content type",
)
for tempdir in Path(tempfile.gettempdir()).iterdir():
if tempdir.is_dir() and self.httpx_tempdir_regex.match(tempdir.name):
self.helpers.rm_rf(tempdir)
async def cleanup(self):
resume_file = self.helpers.current_dir / "resume.cfg"
resume_file.unlink(missing_ok=True)