Skip to content

Commit c3cf074

Browse files
NO-SNOW: deal with error unicode etc
1 parent 65a2825 commit c3cf074

File tree

1 file changed

+96
-56
lines changed

1 file changed

+96
-56
lines changed

ci/github/save_mitm_requests.py

Lines changed: 96 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,27 @@
2828
"example.com", # Test domain from setup
2929
}
3030

31-
# Open CSV file for writing requests
32-
f = open("test_requests.csv", "w", newline="", encoding="utf-8")
33-
writer = csv.writer(f)
31+
32+
def clean_for_csv(value):
33+
"""Clean a value for safe CSV output"""
34+
if value is None:
35+
return ""
36+
37+
# Convert to string and handle encoding issues
38+
try:
39+
str_value = str(value)
40+
# Replace problematic characters
41+
str_value = str_value.replace("\x00", "") # Remove null bytes
42+
str_value = str_value.replace("\r", "\\r") # Escape carriage returns
43+
str_value = str_value.replace("\n", "\\n") # Escape newlines
44+
return str_value
45+
except (UnicodeDecodeError, UnicodeEncodeError):
46+
return "[ENCODING ERROR]"
47+
48+
49+
# Open CSV file for writing requests with proper encoding and quoting
50+
f = open("test_requests.csv", "w", newline="", encoding="utf-8", errors="replace")
51+
writer = csv.writer(f, quoting=csv.QUOTE_ALL)
3452

3553
# Write CSV header
3654
writer.writerow(
@@ -48,6 +66,7 @@
4866
"duration_ms",
4967
"request_headers",
5068
"response_headers",
69+
"error_message",
5170
]
5271
)
5372

@@ -71,73 +90,94 @@ def response(flow):
7190
request_size = len(flow.request.content) if flow.request.content else 0
7291
response_size = len(flow.response.content) if flow.response.content else 0
7392

74-
# Convert headers to JSON strings and mask secrets
75-
request_headers_dict = dict(flow.request.headers)
76-
response_headers_dict = dict(flow.response.headers)
77-
78-
request_headers = SecretDetector.mask_secrets(
79-
json.dumps(request_headers_dict)
80-
).masked_text
81-
response_headers = SecretDetector.mask_secrets(
82-
json.dumps(response_headers_dict)
83-
).masked_text
84-
85-
# Extract key info and mask sensitive data
93+
# Convert headers to JSON strings and mask secrets (with proper encoding)
94+
try:
95+
request_headers_dict = dict(flow.request.headers)
96+
response_headers_dict = dict(flow.response.headers)
97+
98+
request_headers = SecretDetector.mask_secrets(
99+
json.dumps(request_headers_dict, ensure_ascii=True)
100+
).masked_text
101+
response_headers = SecretDetector.mask_secrets(
102+
json.dumps(response_headers_dict, ensure_ascii=True)
103+
).masked_text
104+
except (UnicodeDecodeError, UnicodeEncodeError) as e:
105+
request_headers = f"[ENCODING ERROR: {str(e)}]"
106+
response_headers = f"[ENCODING ERROR: {str(e)}]"
107+
108+
# Extract key info and mask sensitive data (with proper encoding)
86109
timestamp = datetime.now().isoformat()
87110
method = flow.request.method
88-
url = SecretDetector.mask_secrets(flow.request.pretty_url).masked_text
89-
path = SecretDetector.mask_secrets(flow.request.path).masked_text
111+
112+
try:
113+
url = SecretDetector.mask_secrets(flow.request.pretty_url).masked_text
114+
path = SecretDetector.mask_secrets(flow.request.path).masked_text
115+
except (UnicodeDecodeError, UnicodeEncodeError) as e:
116+
url = f"[ENCODING ERROR: {str(e)}]"
117+
path = f"[ENCODING ERROR: {str(e)}]"
118+
90119
status_code = flow.response.status_code
91-
reason = flow.response.reason
120+
reason = flow.response.reason or ""
92121
content_type = flow.response.headers.get("content-type", "")
93122

94-
# Write row to CSV
123+
# Write row to CSV with cleaned data
95124
writer.writerow(
96125
[
97-
timestamp,
98-
method,
99-
url,
100-
host,
101-
path,
102-
status_code,
103-
reason,
104-
request_size,
105-
response_size,
106-
content_type,
107-
duration_ms,
108-
request_headers,
109-
response_headers,
126+
clean_for_csv(timestamp),
127+
clean_for_csv(method),
128+
clean_for_csv(url),
129+
clean_for_csv(host),
130+
clean_for_csv(path),
131+
clean_for_csv(status_code),
132+
clean_for_csv(reason),
133+
clean_for_csv(request_size),
134+
clean_for_csv(response_size),
135+
clean_for_csv(content_type),
136+
clean_for_csv(duration_ms),
137+
clean_for_csv(request_headers),
138+
clean_for_csv(response_headers),
139+
"", # No error for successful requests
110140
]
111141
)
112142

113143
f.flush() # Ensure it's written immediately
114144

115145
except Exception as e:
116146
# Write error row (only for non-ignored domains)
117-
if "host" in locals():
118-
host_check = locals()["host"]
119-
else:
120-
host_check = getattr(flow.request, "pretty_host", "").lower()
121-
122-
if not any(ignored_domain in host_check for ignored_domain in IGNORE_DOMAINS):
123-
writer.writerow(
124-
[
125-
datetime.now().isoformat(),
126-
"ERROR",
127-
SecretDetector.mask_secrets(str(e)).masked_text,
128-
"",
129-
"",
130-
"",
131-
"",
132-
"",
133-
"",
134-
"",
135-
"",
136-
"",
137-
"",
138-
]
139-
)
140-
f.flush()
147+
try:
148+
error_host = getattr(flow.request, "pretty_host", "")
149+
error_method = getattr(flow.request, "method", "")
150+
except Exception:
151+
error_host = ""
152+
error_method = ""
153+
154+
# Check if we should ignore this domain
155+
if error_host and any(
156+
ignored_domain in error_host.lower() for ignored_domain in IGNORE_DOMAINS
157+
):
158+
return
159+
160+
writer.writerow(
161+
[
162+
clean_for_csv(datetime.now().isoformat()),
163+
clean_for_csv(error_method),
164+
"", # Empty URL for errors
165+
clean_for_csv(error_host),
166+
"", # Empty path for errors
167+
"", # Empty status code for errors
168+
"", # Empty reason for errors
169+
"", # Empty request size for errors
170+
"", # Empty response size for errors
171+
"", # Empty content type for errors
172+
"", # Empty duration for errors
173+
"", # Empty request headers for errors
174+
"", # Empty response headers for errors
175+
clean_for_csv(
176+
SecretDetector.mask_secrets(str(e)).masked_text
177+
), # Error message
178+
]
179+
)
180+
f.flush()
141181

142182

143183
def done():

0 commit comments

Comments
 (0)