diff --git a/instrumentation/opentelemetry-instrumentation-wsgi/src/opentelemetry/instrumentation/wsgi/__init__.py b/instrumentation/opentelemetry-instrumentation-wsgi/src/opentelemetry/instrumentation/wsgi/__init__.py index ecbc256287..7c16ff2b9b 100644 --- a/instrumentation/opentelemetry-instrumentation-wsgi/src/opentelemetry/instrumentation/wsgi/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-wsgi/src/opentelemetry/instrumentation/wsgi/__init__.py @@ -221,6 +221,7 @@ def response_hook(span: Span, environ: WSGIEnvironment, status: str, response_he import wsgiref.util as wsgiref_util from timeit import default_timer from typing import TYPE_CHECKING, Any, Callable, Dict, Iterable, TypeVar, cast +from urllib.parse import quote from opentelemetry import context, trace from opentelemetry.instrumentation._semconv import ( @@ -371,7 +372,27 @@ def collect_request_attributes( else: # old semconv v1.20.0 if _report_old(sem_conv_opt_in_mode): - result[HTTP_URL] = redact_url(wsgiref_util.request_uri(environ)) + path_info = environ.get("PATH_INFO", "") + try: + result[HTTP_URL] = redact_url( + wsgiref_util.request_uri(environ) + ) + except UnicodeEncodeError as e: + url = wsgiref_util.application_uri(environ) + path = environ.get("PATH_INFO", "") + # Taken from repercent_broken_unicode function in django/utils/encoding + repercent = quote( + path[e.start : e.end], safe=b"/#%[]=:;$&()+,!?*@'~" + ) + path = path[: e.start] + repercent.encode().decode() + # Most of this taken directly from original wsgiref library https://github.com/python/cpython/blob/bbe589f93ccaf32eb95fd9d1f8f3dc9a536e8db1/Lib/wsgiref/util.py#L61 + if not environ.get("SCRIPT_NAME"): + url += path[1:] + else: + url += path + if environ.get("QUERY_STRING"): + url += "?" + environ["QUERY_STRING"] + result[HTTP_URL] = url remote_addr = environ.get("REMOTE_ADDR") if remote_addr: diff --git a/instrumentation/opentelemetry-instrumentation-wsgi/tests/test_wsgi_middleware.py b/instrumentation/opentelemetry-instrumentation-wsgi/tests/test_wsgi_middleware.py index 5a6e2d21f7..108cdf95f6 100644 --- a/instrumentation/opentelemetry-instrumentation-wsgi/tests/test_wsgi_middleware.py +++ b/instrumentation/opentelemetry-instrumentation-wsgi/tests/test_wsgi_middleware.py @@ -831,6 +831,21 @@ def test_remove_sensitive_params(self): expected.items(), ) + def test_unicode_path_info_is_utf8_encoded(self): + self.environ["HTTP_HOST"] = "mock" + self.environ["PATH_INFO"] = "/заказ" + self.environ["QUERY_STRING"] = "foo=bar" + + expected = { + HTTP_URL: "http://mock/%D0%B7%D0%B0%D0%BA%D0%B0%D0%B7?foo=bar", + NET_HOST_PORT: 80, + } + + self.assertGreaterEqual( + otel_wsgi.collect_request_attributes(self.environ).items(), + expected.items(), + ) + class TestWsgiMiddlewareWithTracerProvider(WsgiTestBase): def validate_response(