Skip to content

Commit fb982f0

Browse files
committed
SimpleChatTC:SimpleProxy: debug dumps to identify funny bing
bing raised a challenge for chrome triggered search requests after few requests, which were spread few minutes apart, while still seemingly allowing wget based search to continue (again spread few minutes apart). Added a simple helper to trace this, use --debug True to enable same.
1 parent 33f35ca commit fb982f0

File tree

2 files changed

+22
-1
lines changed

2 files changed

+22
-1
lines changed

tools/server/public_simplechat/local.tools/simpleproxy.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
"^search\\.yahoo\\.com$",
77
".*\\.brave\\.com$",
88
"^search\\.brave\\.com$",
9+
"^brave\\.com$",
910
".*\\.duckduckgo\\.com$",
1011
"^duckduckgo\\.com$",
1112
".*\\.google\\.com$",

tools/server/public_simplechat/local.tools/simpleproxy.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,13 @@
2222
from dataclasses import dataclass
2323
import html.parser
2424
import re
25+
import time
2526

2627

2728
gMe = {
2829
'--port': 3128,
2930
'--config': '/dev/null',
31+
'--debug': False,
3032
'server': None
3133
}
3234

@@ -105,6 +107,18 @@ class UrlReqResp:
105107
contentData: str = ""
106108

107109

110+
def debug_dump(meta: dict, data: dict):
111+
if not gMe['--debug']:
112+
return
113+
timeTag = f"{time.time():0.12f}"
114+
with open(f"/tmp/simpleproxy.{timeTag}.meta", '+w') as f:
115+
for k in meta:
116+
f.write(f"\n\n\n\n{k}:{meta[k]}\n\n\n\n")
117+
with open(f"/tmp/simpleproxy.{timeTag}.data", '+w') as f:
118+
for k in data:
119+
f.write(f"\n\n\n\n{k}:{data[k]}\n\n\n\n")
120+
121+
108122
def validate_url(url: str, tag: str):
109123
"""
110124
Implement a re based filter logic on the specified url.
@@ -152,7 +166,7 @@ def handle_urlreq(ph: ProxyHandler, pr: urllib.parse.ParseResult, tag: str):
152166
return gotVU
153167
try:
154168
hUA = ph.headers.get('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:132.0) Gecko/20100101 Firefox/132.0')
155-
hAL = ph.headers.get('Accept-Language', "en-US,en")
169+
hAL = ph.headers.get('Accept-Language', "en-US,en;q=0.9")
156170
hA = ph.headers.get('Accept', "text/html,*/*")
157171
headers = {
158172
'User-Agent': hUA,
@@ -166,6 +180,7 @@ def handle_urlreq(ph: ProxyHandler, pr: urllib.parse.ParseResult, tag: str):
166180
contentData = response.read().decode('utf-8')
167181
statusCode = response.status or 200
168182
contentType = response.getheader('Content-Type') or 'text/html'
183+
debug_dump({ 'url': req.full_url, 'headers': req.headers, 'ctype': contentType }, { 'cdata': contentData })
169184
return UrlReqResp(True, statusCode, "", contentType, contentData)
170185
except Exception as exc:
171186
return UrlReqResp(False, 502, f"WARN:{tag}:Failed:{exc}")
@@ -283,6 +298,7 @@ def handle_urltext(ph: ProxyHandler, pr: urllib.parse.ParseResult):
283298
ph.send_header('Access-Control-Allow-Origin', '*')
284299
ph.end_headers()
285300
ph.wfile.write(textHtml.get_stripped_text().encode('utf-8'))
301+
debug_dump({ 'RawText': 'yes', 'StrippedText': 'yes' }, { 'RawText': textHtml.text, 'StrippedText': textHtml.get_stripped_text() })
286302
except Exception as exc:
287303
ph.send_error(502, f"WARN:UrlTextFailed:{exc}")
288304

@@ -336,6 +352,10 @@ def process_args(args: list[str]):
336352
iArg += 1
337353
gMe[cArg] = ast.literal_eval(args[iArg])
338354
iArg += 1
355+
case '--debug':
356+
iArg += 1
357+
gMe[cArg] = ast.literal_eval(args[iArg])
358+
iArg += 1
339359
case _:
340360
gMe['INTERNAL.ProcessArgs.Unknown'].append(cArg)
341361
print(f"WARN:ProcessArgs:{iArg}:IgnoringUnknownCommand:{cArg}")

0 commit comments

Comments
 (0)