@@ -372,29 +372,28 @@ def collect_request_attributes(
372372 else :
373373 # old semconv v1.20.0
374374 if _report_old (sem_conv_opt_in_mode ):
375+ path_info = environ .get ("PATH_INFO" , "" )
376+ print ("Path info" , path_info )
375377 try :
376378 result [HTTP_URL ] = redact_url (
377379 wsgiref_util .request_uri (environ )
378380 )
379- except UnicodeEncodeError :
380- # The underlying wsgiref library seems to hardcode latin1 into this call
381- # This can cause issues for some characters and you can hit decode errors
382- path_info = quote (
383- environ .get ("PATH_INFO" , "" ),
384- safe = "/;=," ,
385- encoding = "utf-8" ,
386- errors = "replace" ,
381+ except UnicodeEncodeError as e :
382+ url = wsgiref_util .application_uri (environ )
383+ path = environ .get ("PATH_INFO" , "" )
384+ # Taken from repercent_broken_unicode function in django/utils/encoding
385+ repercent = quote (
386+ path [e .start : e .end ], safe = b"/#%[]=:;$&()+,!?*@'~"
387387 )
388- scheme = environ . get ( "wsgi.url_scheme" , "http" )
389- host = environ . get (
390- "HTTP_HOST" , environ .get ("SERVER_NAME" , "localhost" )
391- )
392- url = f" { scheme } :// { host } { path_info } "
393-
388+ path = path [: e . start ] + repercent . encode (). decode ( )
389+ # Most of this taken directly from original wsgiref library https://github.com/python/cpython/blob/bbe589f93ccaf32eb95fd9d1f8f3dc9a536e8db1/Lib/wsgiref/util.py#L61
390+ if not environ .get ("SCRIPT_NAME" ):
391+ url += path [ 1 :]
392+ else :
393+ url += path
394394 if environ .get ("QUERY_STRING" ):
395- url += f"?{ environ ['QUERY_STRING' ]} "
396-
397- result [HTTP_URL ] = redact_url (url )
395+ url += "?" + environ ["QUERY_STRING" ]
396+ result [HTTP_URL ] = url
398397
399398 remote_addr = environ .get ("REMOTE_ADDR" )
400399 if remote_addr :
0 commit comments