Skip to content

Commit d8cb55d

Browse files
committed
fix: add support for non-latin1 characters in wsgi module
1 parent 77f3171 commit d8cb55d

File tree

2 files changed

+30
-1
lines changed

2 files changed

+30
-1
lines changed

instrumentation/opentelemetry-instrumentation-wsgi/src/opentelemetry/instrumentation/wsgi/__init__.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,7 @@ def response_hook(span: Span, environ: WSGIEnvironment, status: str, response_he
219219

220220
import functools
221221
import wsgiref.util as wsgiref_util
222+
from urllib.parse import quote
222223
from timeit import default_timer
223224
from typing import TYPE_CHECKING, Any, Callable, Dict, Iterable, TypeVar, cast
224225

@@ -371,7 +372,20 @@ def collect_request_attributes(
371372
else:
372373
# old semconv v1.20.0
373374
if _report_old(sem_conv_opt_in_mode):
374-
result[HTTP_URL] = redact_url(wsgiref_util.request_uri(environ))
375+
try:
376+
result[HTTP_URL] = redact_url(wsgiref_util.request_uri(environ))
377+
except UnicodeEncodeError:
378+
# The underlying wsgiref library seems to hardcode latin1 into this call
379+
# This can cause issues for some characters and you can hit decode errors
380+
path_info = quote(environ.get("PATH_INFO", ""), safe="/;=,", encoding="utf-8", errors="replace")
381+
scheme = environ.get("wsgi.url_scheme", "http")
382+
host = environ.get("HTTP_HOST", environ.get("SERVER_NAME", "localhost"))
383+
url = f"{scheme}://{host}{path_info}"
384+
385+
if environ.get("QUERY_STRING"):
386+
url += f"?{environ['QUERY_STRING']}"
387+
388+
result[HTTP_URL] = redact_url(url)
375389

376390
remote_addr = environ.get("REMOTE_ADDR")
377391
if remote_addr:

instrumentation/opentelemetry-instrumentation-wsgi/tests/test_wsgi_middleware.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -831,6 +831,21 @@ def test_remove_sensitive_params(self):
831831
expected.items(),
832832
)
833833

834+
def test_unicode_path_info_is_utf8_encoded(self):
835+
self.environ["HTTP_HOST"] = "mock"
836+
self.environ["PATH_INFO"] = "/заказ"
837+
self.environ["QUERY_STRING"] = "foo=bar"
838+
839+
expected = {
840+
HTTP_URL: "http://mock/%D0%B7%D0%B0%D0%BA%D0%B0%D0%B7?foo=bar",
841+
NET_HOST_PORT: 80,
842+
}
843+
844+
self.assertGreaterEqual(
845+
otel_wsgi.collect_request_attributes(self.environ).items(),
846+
expected.items(),
847+
)
848+
834849

835850
class TestWsgiMiddlewareWithTracerProvider(WsgiTestBase):
836851
def validate_response(

0 commit comments

Comments
 (0)