Skip to content

Commit b0a5c0a

Browse files
committed
fix: try to mock what underlying libraries are doing
1 parent 96c8d16 commit b0a5c0a

File tree

1 file changed

+16
-17
lines changed
  • instrumentation/opentelemetry-instrumentation-wsgi/src/opentelemetry/instrumentation/wsgi

1 file changed

+16
-17
lines changed

instrumentation/opentelemetry-instrumentation-wsgi/src/opentelemetry/instrumentation/wsgi/__init__.py

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -372,29 +372,28 @@ def collect_request_attributes(
372372
else:
373373
# old semconv v1.20.0
374374
if _report_old(sem_conv_opt_in_mode):
375+
path_info = environ.get("PATH_INFO", "")
376+
print("Path info", path_info)
375377
try:
376378
result[HTTP_URL] = redact_url(
377379
wsgiref_util.request_uri(environ)
378380
)
379-
except UnicodeEncodeError:
380-
# The underlying wsgiref library seems to hardcode latin1 into this call
381-
# This can cause issues for some characters and you can hit decode errors
382-
path_info = quote(
383-
environ.get("PATH_INFO", ""),
384-
safe="/;=,",
385-
encoding="utf-8",
386-
errors="replace",
381+
except UnicodeEncodeError as e:
382+
url = wsgiref_util.application_uri(environ)
383+
path = environ.get("PATH_INFO", "")
384+
# Taken from repercent_broken_unicode function in django/utils/encoding
385+
repercent = quote(
386+
path[e.start : e.end], safe=b"/#%[]=:;$&()+,!?*@'~"
387387
)
388-
scheme = environ.get("wsgi.url_scheme", "http")
389-
host = environ.get(
390-
"HTTP_HOST", environ.get("SERVER_NAME", "localhost")
391-
)
392-
url = f"{scheme}://{host}{path_info}"
393-
388+
path = path[: e.start] + repercent.encode().decode()
389+
# Most of this taken directly from original wsgiref library https://github.com/python/cpython/blob/bbe589f93ccaf32eb95fd9d1f8f3dc9a536e8db1/Lib/wsgiref/util.py#L61
390+
if not environ.get("SCRIPT_NAME"):
391+
url += path[1:]
392+
else:
393+
url += path
394394
if environ.get("QUERY_STRING"):
395-
url += f"?{environ['QUERY_STRING']}"
396-
397-
result[HTTP_URL] = redact_url(url)
395+
url += "?" + environ["QUERY_STRING"]
396+
result[HTTP_URL] = url
398397

399398
remote_addr = environ.get("REMOTE_ADDR")
400399
if remote_addr:

0 commit comments

Comments
 (0)