Skip to content

Commit bdb2e64

Browse files
NO-SNOW: refactor - PREVIOUS FIXES AZURE
1 parent ef5a649 commit bdb2e64

File tree

1 file changed

+181
-140
lines changed

1 file changed

+181
-140
lines changed

ci/github/save_mitm_requests.py

Lines changed: 181 additions & 140 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,146 @@
1313
)
1414
logger = logging.getLogger(__name__)
1515

16+
17+
def safe_mitmproxy_call(func, fallback, description):
18+
"""Safely call a mitmproxy API function with fallback"""
19+
try:
20+
return func()
21+
except Exception as e:
22+
logger.warning(f"Failed to {description}: {e}")
23+
return fallback
24+
25+
26+
def extract_request_data(flow):
27+
"""Extract request data safely from mitmproxy flow"""
28+
data = {}
29+
30+
# Basic request info
31+
data["method"] = safe_mitmproxy_call(
32+
lambda: flow.request.method, "UNKNOWN", "get request method"
33+
)
34+
35+
data["host"] = safe_mitmproxy_call(
36+
lambda: flow.request.pretty_host.lower(),
37+
safe_mitmproxy_call(
38+
lambda: flow.request.headers.get("host", "unknown").lower(),
39+
"unknown",
40+
"get host from headers",
41+
),
42+
"get pretty_host",
43+
)
44+
45+
# URLs and paths
46+
raw_url = safe_mitmproxy_call(
47+
lambda: flow.request.pretty_url,
48+
safe_mitmproxy_call(lambda: flow.request.url, "unknown", "get basic URL"),
49+
"get pretty_url",
50+
)
51+
52+
raw_path = safe_mitmproxy_call(
53+
lambda: flow.request.path, "unknown", "get request path"
54+
)
55+
56+
# Process URLs with masking
57+
try:
58+
data["url"] = SecretDetector.mask_secrets(safe_str(raw_url)).masked_text
59+
data["path"] = SecretDetector.mask_secrets(safe_str(raw_path)).masked_text
60+
except Exception as e:
61+
logger.error(f"Failed to mask URL/path: {e}")
62+
data["url"] = safe_str(raw_url)
63+
data["path"] = safe_str(raw_path)
64+
65+
# Request headers and size
66+
data["headers"] = safe_mitmproxy_call(
67+
lambda: dict(flow.request.headers), {}, "get request headers"
68+
)
69+
70+
data["size"] = safe_mitmproxy_call(
71+
lambda: len(flow.request.content) if flow.request.content else 0,
72+
0,
73+
"get request size",
74+
)
75+
76+
return data
77+
78+
79+
def extract_response_data(flow):
80+
"""Extract response data safely from mitmproxy flow"""
81+
data = {}
82+
83+
# Basic response info
84+
data["status_code"] = safe_mitmproxy_call(
85+
lambda: flow.response.status_code, 0, "get status code"
86+
)
87+
88+
data["reason"] = safe_mitmproxy_call(
89+
lambda: flow.response.reason or "", "", "get response reason"
90+
)
91+
92+
# Response headers and size
93+
data["headers"] = safe_mitmproxy_call(
94+
lambda: dict(flow.response.headers), {}, "get response headers"
95+
)
96+
97+
data["size"] = safe_mitmproxy_call(
98+
lambda: len(flow.response.content) if flow.response.content else 0,
99+
0,
100+
"get response size",
101+
)
102+
103+
# Content type for debugging
104+
data["content_type"] = data["headers"].get("content-type", "")
105+
106+
return data
107+
108+
109+
def extract_timing_data(flow):
110+
"""Extract timing data safely from mitmproxy flow"""
111+
return safe_mitmproxy_call(
112+
lambda: (
113+
int((flow.response.timestamp_end - flow.request.timestamp_start) * 1000)
114+
if flow.response.timestamp_end and flow.request.timestamp_start
115+
else 0
116+
),
117+
0,
118+
"calculate duration",
119+
)
120+
121+
122+
def process_headers_safely(request_headers, response_headers):
123+
"""Process headers with SecretDetector masking"""
124+
try:
125+
# Log content-encoding for debugging
126+
content_encoding = response_headers.get("content-encoding", "none")
127+
content_type = response_headers.get("content-type", "unknown")
128+
logger.debug(
129+
f"Response content-encoding: {content_encoding}, content-type: {content_type}"
130+
)
131+
132+
masked_request = SecretDetector.mask_secrets(
133+
json.dumps(request_headers, ensure_ascii=True)
134+
).masked_text
135+
136+
masked_response = SecretDetector.mask_secrets(
137+
json.dumps(response_headers, ensure_ascii=True)
138+
).masked_text
139+
140+
return masked_request, masked_response
141+
142+
except (UnicodeDecodeError, UnicodeEncodeError) as e:
143+
logger.warning(f"Header processing encoding error: {e}")
144+
return (
145+
f"[HEADER ENCODING ERROR: {str(e)}]",
146+
f"[HEADER ENCODING ERROR: {str(e)}]",
147+
)
148+
except Exception as e:
149+
logger.error(f"Unexpected header processing error: {e}")
150+
return (
151+
f"[HEADER ERROR: {type(e).__name__}]",
152+
f"[HEADER ERROR: {type(e).__name__}]",
153+
)
154+
155+
16156
# Import SecretDetector directly without package initialization
17157
secret_detector_path = (
18158
Path(__file__).parent
@@ -118,6 +258,20 @@ def safe_str(value, max_length=5000):
118258
system_info = f"{platform.system()} {platform.release()}"
119259
logger.info(f"Running on: {system_info}")
120260

261+
# Log proxy environment variables for debugging
262+
import os
263+
264+
proxy_vars = [
265+
"HTTP_PROXY",
266+
"HTTPS_PROXY",
267+
"http_proxy",
268+
"https_proxy",
269+
"REQUESTS_CA_BUNDLE",
270+
]
271+
for var in proxy_vars:
272+
value = os.environ.get(var, "NOT_SET")
273+
logger.info(f"Environment {var}={value}")
274+
121275
# Use UTF-8 with BOM for better Windows compatibility
122276
f = open(
123277
"test_requests.csv", "w", newline="", encoding="utf-8-sig", errors="replace"
@@ -161,6 +315,7 @@ def safe_str(value, max_length=5000):
161315

162316
def response(flow):
163317
"""Called when a response is received"""
318+
# Debug logging
164319
try:
165320
debug_host = getattr(flow.request, "pretty_host", "unknown")
166321
debug_method = getattr(flow.request, "method", "unknown")
@@ -169,152 +324,36 @@ def response(flow):
169324
logger.error(f"Debug error getting basic request info: {debug_error}")
170325

171326
try:
172-
# Skip if domain should be ignored - wrap mitmproxy API call
173-
try:
174-
host = flow.request.pretty_host.lower()
175-
except Exception as host_error:
176-
logger.warning(f"Failed to get pretty_host: {host_error}")
177-
try:
178-
# Fallback to host header
179-
host = flow.request.headers.get("host", "unknown").lower()
180-
except Exception:
181-
host = "unknown"
182-
183-
if any(ignored_domain in host for ignored_domain in IGNORE_DOMAINS):
327+
# Extract all data using helper functions
328+
request_data = extract_request_data(flow)
329+
response_data = extract_response_data(flow)
330+
duration_ms = extract_timing_data(flow)
331+
332+
# Skip if domain should be ignored
333+
if any(
334+
ignored_domain in request_data["host"] for ignored_domain in IGNORE_DOMAINS
335+
):
184336
return
185337

186-
# Calculate duration - wrap mitmproxy API calls
187-
try:
188-
duration_ms = (
189-
int((flow.response.timestamp_end - flow.request.timestamp_start) * 1000)
190-
if flow.response.timestamp_end and flow.request.timestamp_start
191-
else 0
192-
)
193-
except Exception as duration_error:
194-
logger.warning(f"Failed to calculate duration: {duration_error}")
195-
duration_ms = 0
196-
197-
# Get request/response sizes - wrap mitmproxy API calls
198-
try:
199-
request_size = len(flow.request.content) if flow.request.content else 0
200-
except Exception as req_size_error:
201-
logger.warning(f"Failed to get request size: {req_size_error}")
202-
request_size = 0
203-
204-
try:
205-
response_size = len(flow.response.content) if flow.response.content else 0
206-
except Exception as resp_size_error:
207-
logger.warning(f"Failed to get response size: {resp_size_error}")
208-
response_size = 0
209-
210-
# Convert headers to JSON strings and mask secrets (with proper encoding)
211-
# Wrap each mitmproxy API call separately
212-
try:
213-
request_headers_dict = dict(flow.request.headers)
214-
except Exception as req_header_error:
215-
logger.warning(f"Failed to get request headers: {req_header_error}")
216-
request_headers_dict = {}
217-
218-
try:
219-
response_headers_dict = dict(flow.response.headers)
220-
except Exception as resp_header_error:
221-
logger.warning(f"Failed to get response headers: {resp_header_error}")
222-
response_headers_dict = {}
223-
224-
# Process headers safely
225-
try:
226-
# Log content-encoding and content-type for debugging
227-
content_encoding = response_headers_dict.get("content-encoding", "none")
228-
content_type = response_headers_dict.get("content-type", "unknown")
229-
logger.debug(
230-
f"Response content-encoding: {content_encoding}, content-type: {content_type}"
231-
)
232-
233-
request_headers = SecretDetector.mask_secrets(
234-
json.dumps(request_headers_dict, ensure_ascii=True)
235-
).masked_text
236-
response_headers = SecretDetector.mask_secrets(
237-
json.dumps(response_headers_dict, ensure_ascii=True)
238-
).masked_text
239-
except (UnicodeDecodeError, UnicodeEncodeError) as e:
240-
logger.warning(f"Header processing encoding error: {e}")
241-
request_headers = f"[HEADER ENCODING ERROR: {str(e)}]"
242-
response_headers = f"[HEADER ENCODING ERROR: {str(e)}]"
243-
except Exception as e:
244-
logger.error(f"Unexpected header processing error: {e}")
245-
request_headers = f"[HEADER ERROR: {type(e).__name__}]"
246-
response_headers = f"[HEADER ERROR: {type(e).__name__}]"
247-
248-
# Extract key info and mask sensitive data (with proper encoding)
249-
timestamp = datetime.now().isoformat()
250-
251-
# Get method safely
252-
try:
253-
method = flow.request.method
254-
except Exception as method_error:
255-
logger.warning(f"Failed to get request method: {method_error}")
256-
method = "UNKNOWN"
257-
258-
# Get URL and path safely - wrap mitmproxy API calls
259-
try:
260-
raw_url = flow.request.pretty_url
261-
except Exception as url_error:
262-
logger.warning(f"Failed to get pretty_url: {url_error}")
263-
try:
264-
raw_url = flow.request.url
265-
except Exception:
266-
raw_url = "unknown"
267-
268-
try:
269-
raw_path = flow.request.path
270-
except Exception as path_error:
271-
logger.warning(f"Failed to get path: {path_error}")
272-
raw_path = "unknown"
273-
274-
# Process URL and path with masking
275-
try:
276-
logger.debug(
277-
f"Raw URL type: {type(raw_url)}, Raw path type: {type(raw_path)}"
278-
)
279-
url = SecretDetector.mask_secrets(safe_str(raw_url)).masked_text
280-
path = SecretDetector.mask_secrets(safe_str(raw_path)).masked_text
281-
except Exception as e:
282-
logger.error(f"Failed to mask URL/path: {e}")
283-
url = safe_str(raw_url)
284-
path = safe_str(raw_path)
285-
286-
# Get response properties safely
287-
try:
288-
status_code = flow.response.status_code
289-
except Exception as status_error:
290-
logger.warning(f"Failed to get status_code: {status_error}")
291-
status_code = 0
292-
293-
try:
294-
reason = flow.response.reason or ""
295-
except Exception as reason_error:
296-
logger.warning(f"Failed to get reason: {reason_error}")
297-
reason = ""
298-
299-
try:
300-
content_type = flow.response.headers.get("content-type", "")
301-
except Exception as content_type_error:
302-
logger.warning(f"Failed to get content-type: {content_type_error}")
303-
content_type = ""
338+
# Process headers with secret masking
339+
request_headers, response_headers = process_headers_safely(
340+
request_data["headers"], response_data["headers"]
341+
)
304342

305343
# Write row to CSV with safe string conversion
344+
timestamp = datetime.now().isoformat()
306345
writer.writerow(
307346
[
308347
safe_str(timestamp),
309-
safe_str(method),
310-
safe_str(url),
311-
safe_str(host),
312-
safe_str(path),
313-
safe_str(status_code),
314-
safe_str(reason),
315-
safe_str(request_size),
316-
safe_str(response_size),
317-
safe_str(content_type),
348+
safe_str(request_data["method"]),
349+
safe_str(request_data["url"]),
350+
safe_str(request_data["host"]),
351+
safe_str(request_data["path"]),
352+
safe_str(response_data["status_code"]),
353+
safe_str(response_data["reason"]),
354+
safe_str(request_data["size"]),
355+
safe_str(response_data["size"]),
356+
safe_str(response_data["content_type"]),
318357
safe_str(duration_ms),
319358
safe_str(request_headers),
320359
safe_str(response_headers),
@@ -324,7 +363,9 @@ def response(flow):
324363

325364
try:
326365
f.flush() # Ensure it's written immediately
327-
logger.debug(f"Successfully wrote {method} {host}")
366+
logger.debug(
367+
f"Successfully wrote {request_data['method']} {request_data['host']}"
368+
)
328369
except Exception as flush_error:
329370
logger.error(f"Flush error: {flush_error}")
330371

0 commit comments

Comments
 (0)