2222from dataclasses import dataclass
2323import html .parser
2424import re
25+ import time
2526
2627
2728gMe = {
2829 '--port' : 3128 ,
2930 '--config' : '/dev/null' ,
31+ '--debug' : False ,
3032 'server' : None
3133}
3234
@@ -105,6 +107,18 @@ class UrlReqResp:
105107 contentData : str = ""
106108
107109
110+ def debug_dump (meta : dict , data : dict ):
111+ if not gMe ['--debug' ]:
112+ return
113+ timeTag = f"{ time .time ():0.12f} "
114+ with open (f"/tmp/simpleproxy.{ timeTag } .meta" , '+w' ) as f :
115+ for k in meta :
116+ f .write (f"\n \n \n \n { k } :{ meta [k ]} \n \n \n \n " )
117+ with open (f"/tmp/simpleproxy.{ timeTag } .data" , '+w' ) as f :
118+ for k in data :
119+ f .write (f"\n \n \n \n { k } :{ data [k ]} \n \n \n \n " )
120+
121+
108122def validate_url (url : str , tag : str ):
109123 """
110124 Implement a re based filter logic on the specified url.
@@ -152,7 +166,7 @@ def handle_urlreq(ph: ProxyHandler, pr: urllib.parse.ParseResult, tag: str):
152166 return gotVU
153167 try :
154168 hUA = ph .headers .get ('User-Agent' , 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:132.0) Gecko/20100101 Firefox/132.0' )
155- hAL = ph .headers .get ('Accept-Language' , "en-US,en" )
169+ hAL = ph .headers .get ('Accept-Language' , "en-US,en;q=0.9 " )
156170 hA = ph .headers .get ('Accept' , "text/html,*/*" )
157171 headers = {
158172 'User-Agent' : hUA ,
@@ -166,6 +180,7 @@ def handle_urlreq(ph: ProxyHandler, pr: urllib.parse.ParseResult, tag: str):
166180 contentData = response .read ().decode ('utf-8' )
167181 statusCode = response .status or 200
168182 contentType = response .getheader ('Content-Type' ) or 'text/html'
183+ debug_dump ({ 'url' : req .full_url , 'headers' : req .headers , 'ctype' : contentType }, { 'cdata' : contentData })
169184 return UrlReqResp (True , statusCode , "" , contentType , contentData )
170185 except Exception as exc :
171186 return UrlReqResp (False , 502 , f"WARN:{ tag } :Failed:{ exc } " )
@@ -283,6 +298,7 @@ def handle_urltext(ph: ProxyHandler, pr: urllib.parse.ParseResult):
283298 ph .send_header ('Access-Control-Allow-Origin' , '*' )
284299 ph .end_headers ()
285300 ph .wfile .write (textHtml .get_stripped_text ().encode ('utf-8' ))
301+ debug_dump ({ 'RawText' : 'yes' , 'StrippedText' : 'yes' }, { 'RawText' : textHtml .text , 'StrippedText' : textHtml .get_stripped_text () })
286302 except Exception as exc :
287303 ph .send_error (502 , f"WARN:UrlTextFailed:{ exc } " )
288304
@@ -336,6 +352,10 @@ def process_args(args: list[str]):
336352 iArg += 1
337353 gMe [cArg ] = ast .literal_eval (args [iArg ])
338354 iArg += 1
355+ case '--debug' :
356+ iArg += 1
357+ gMe [cArg ] = ast .literal_eval (args [iArg ])
358+ iArg += 1
339359 case _:
340360 gMe ['INTERNAL.ProcessArgs.Unknown' ].append (cArg )
341361 print (f"WARN:ProcessArgs:{ iArg } :IgnoringUnknownCommand:{ cArg } " )
0 commit comments