Skip to content

Commit 063051a

Browse files
fix(wsgi): handle django wsgi app being an iterator [backport 1.20] (#7077)
Backport 229378a from #6858 to 1.20. This change fixes #5325 by having the WSGI middleware handle cases when the wrapped iterable object is an iterator. This condition can arise when a Django app attempts to send its "request finished" signal, in which case it may cause connection leaks. Standard methods of distinguishing an iterable from its iterator, such as checking for the presence of __iter__ and __next__ methods, don't work in this case for unknown reasons. Instead of adding brittle special-case detection logic to the middleware, this new argument allows users to indicate when this is the case. ## Checklist - [x] Change(s) are motivated and described in the PR description. - [x] Testing strategy is described if automated tests are not included in the PR. - [x] Risk is outlined (performance impact, potential for breakage, maintainability, etc). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed. If no release note is required, add label `changelog/no-changelog`. - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)). - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Title is accurate. - [x] No unnecessary changes are introduced. - [x] Description motivates each change. - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes unless absolutely necessary. - [x] Testing strategy adequately addresses listed risk(s). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] Release note makes sense to a user of the library. - [x] Reviewer has explicitly acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment. - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) - [x] If this PR touches code that signs or publishes builds or packages, or handles credentials of any kind, I've requested a review from `@DataDog/security-design-and-guidance`. - [x] This PR doesn't touch any of that. Co-authored-by: Emmett Butler <[email protected]>
1 parent ac0fdad commit 063051a

File tree

6 files changed

+122
-19
lines changed

6 files changed

+122
-19
lines changed

ddtrace/contrib/wsgi/wsgi.py

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -58,14 +58,16 @@ class _DDWSGIMiddlewareBase(object):
5858
:param tracer: Tracer instance to use the middleware with. Defaults to the global tracer.
5959
:param int_config: Integration specific configuration object.
6060
:param pin: Set tracing metadata on a particular traced connection
61+
:param app_is_iterator: Boolean indicating whether the wrapped app is a Python iterator
6162
"""
6263

63-
def __init__(self, application, tracer, int_config, pin):
64-
# type: (Iterable, Tracer, Config, Pin) -> None
64+
def __init__(self, application, tracer, int_config, pin, app_is_iterator=False):
65+
# type: (Iterable, Tracer, Config, Pin, bool) -> None
6566
self.app = application
6667
self.tracer = tracer
6768
self._config = int_config
6869
self._pin = pin
70+
self.app_is_iterator = app_is_iterator
6971

7072
@property
7173
def _request_span_name(self):
@@ -88,7 +90,7 @@ def _response_span_name(self):
8890
def __call__(self, environ, start_response):
8991
# type: (Iterable, Callable) -> wrapt.ObjectProxy
9092
headers = get_request_headers(environ)
91-
closing_iterator = ()
93+
closing_iterable = ()
9294
not_blocked = True
9395
with core.context_with_data(
9496
"wsgi.__call__",
@@ -101,7 +103,7 @@ def __call__(self, environ, start_response):
101103
if core.get_item(HTTP_REQUEST_BLOCKED):
102104
status, headers, content = core.dispatch("wsgi.block.started", ctx, construct_url)[0][0]
103105
start_response(str(status), headers)
104-
closing_iterator = [content]
106+
closing_iterable = [content]
105107
not_blocked = False
106108

107109
def blocked_view():
@@ -113,17 +115,17 @@ def blocked_view():
113115
if not_blocked:
114116
core.dispatch("wsgi.request.prepare", ctx, start_response)
115117
try:
116-
closing_iterator = self.app(environ, ctx.get_item("intercept_start_response"))
118+
closing_iterable = self.app(environ, ctx.get_item("intercept_start_response"))
117119
except BaseException:
118120
core.dispatch("wsgi.app.exception", ctx)
119121
raise
120122
else:
121-
core.dispatch("wsgi.app.success", ctx, closing_iterator)
123+
core.dispatch("wsgi.app.success", ctx, closing_iterable)
122124
if core.get_item(HTTP_REQUEST_BLOCKED):
123125
_, _, content = core.dispatch("wsgi.block.started", ctx, construct_url)[0][0]
124-
closing_iterator = [content]
126+
closing_iterable = [content]
125127

126-
return core.dispatch("wsgi.request.complete", ctx, closing_iterator)[0][0]
128+
return core.dispatch("wsgi.request.complete", ctx, closing_iterable, self.app_is_iterator)[0][0]
127129

128130
def _traced_start_response(self, start_response, request_span, app_span, status, environ, exc_info=None):
129131
# type: (Callable, Span, Span, str, Dict, Any) -> None
@@ -203,15 +205,18 @@ class DDWSGIMiddleware(_DDWSGIMiddlewareBase):
203205
:param tracer: Tracer instance to use the middleware with. Defaults to the global tracer.
204206
:param span_modifier: Span modifier that can add tags to the root span.
205207
Defaults to using the request method and url in the resource.
208+
:param app_is_iterator: Boolean indicating whether the wrapped WSGI app is a Python iterator
206209
"""
207210

208211
_request_span_name = schematize_url_operation("wsgi.request", protocol="http", direction=SpanDirection.INBOUND)
209212
_application_span_name = "wsgi.application"
210213
_response_span_name = "wsgi.response"
211214

212-
def __init__(self, application, tracer=None, span_modifier=default_wsgi_span_modifier):
213-
# type: (Iterable, Optional[Tracer], Callable[[Span, Dict[str, str]], None]) -> None
214-
super(DDWSGIMiddleware, self).__init__(application, tracer or ddtrace.tracer, config.wsgi, None)
215+
def __init__(self, application, tracer=None, span_modifier=default_wsgi_span_modifier, app_is_iterator=False):
216+
# type: (Iterable, Optional[Tracer], Callable[[Span, Dict[str, str]], None], bool) -> None
217+
super(DDWSGIMiddleware, self).__init__(
218+
application, tracer or ddtrace.tracer, config.wsgi, None, app_is_iterator=app_is_iterator
219+
)
215220
self.span_modifier = span_modifier
216221

217222
def _traced_start_response(self, start_response, request_span, app_span, status, environ, exc_info=None):

ddtrace/tracing/trace_handlers.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,13 @@
2828

2929

3030
class _TracedIterable(wrapt.ObjectProxy):
31-
def __init__(self, wrapped, span, parent_span):
32-
super(_TracedIterable, self).__init__(wrapped)
31+
def __init__(self, wrapped, span, parent_span, wrapped_is_iterator=False):
32+
self._self_wrapped_is_iterator = wrapped_is_iterator
33+
if self._self_wrapped_is_iterator:
34+
super(_TracedIterable, self).__init__(wrapped)
35+
self._wrapped_iterator = iter(wrapped)
36+
else:
37+
super(_TracedIterable, self).__init__(iter(wrapped))
3338
self._self_span = span
3439
self._self_parent_span = parent_span
3540
self._self_span_finished = False
@@ -39,7 +44,10 @@ def __iter__(self):
3944

4045
def __next__(self):
4146
try:
42-
return next(self.__wrapped__)
47+
if self._self_wrapped_is_iterator:
48+
return next(self._wrapped_iterator)
49+
else:
50+
return next(self.__wrapped__)
4351
except StopIteration:
4452
self._finish_spans()
4553
raise
@@ -158,15 +166,15 @@ def _on_request_prepare(ctx, start_response):
158166
ctx.set_item("intercept_start_response", intercept_start_response)
159167

160168

161-
def _on_app_success(ctx, closing_iterator):
169+
def _on_app_success(ctx, closing_iterable):
162170
app_span = ctx.get_item("app_span")
163171
middleware = ctx.get_item("middleware")
164172
modifier = (
165173
middleware._application_call_modifier
166174
if hasattr(middleware, "_application_call_modifier")
167175
else middleware._application_span_modifier
168176
)
169-
modifier(app_span, ctx.get_item("environ"), closing_iterator)
177+
modifier(app_span, ctx.get_item("environ"), closing_iterable)
170178
app_span.finish()
171179

172180

@@ -179,7 +187,7 @@ def _on_app_exception(ctx):
179187
req_span.finish()
180188

181189

182-
def _on_request_complete(ctx, closing_iterator):
190+
def _on_request_complete(ctx, closing_iterable, app_is_iterator):
183191
middleware = ctx.get_item("middleware")
184192
req_span = ctx.get_item("req_span")
185193
# start flask.response span. This span will be finished after iter(result) is closed.
@@ -199,9 +207,9 @@ def _on_request_complete(ctx, closing_iterator):
199207
if hasattr(middleware, "_response_call_modifier")
200208
else middleware._response_span_modifier
201209
)
202-
modifier(resp_span, closing_iterator)
210+
modifier(resp_span, closing_iterable)
203211

204-
return _TracedIterable(iter(closing_iterator), resp_span, req_span)
212+
return _TracedIterable(closing_iterable, resp_span, req_span, wrapped_is_iterator=app_is_iterator)
205213

206214

207215
def _on_response_context_started(ctx):

docs/spelling_wordlist.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ doctest
7777
dogpile
7878
dogpile.cache
7979
dogstatsd
80+
dunder
8081
dsn
8182
elasticsearch
8283
elasticsearch1
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
---
2+
features:
3+
- |
4+
wsgi: This change introduces the keyword argument app_is_iterator to the DDWSGIMiddleware constructor.
5+
It's provided as a workaround for an issue where the Datadog WSGI middleware would fail to handle WSGI
6+
apps that are not their own iterators. This condition can arise when a Django app attempts to send its
7+
"request finished" signal, in which case it may cause connection leaks. Standard methods of distinguishing
8+
an iterable from its iterator, such as checking for the presence of iter and next dunder methods, don't
9+
work in this case for unknown reasons. Instead of adding brittle special-case detection logic to the
10+
middleware, this new argument allows users to indicate when this is the case.

tests/.suitespec.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -632,6 +632,7 @@
632632
"@contrib",
633633
"@appsec",
634634
"@asgi",
635+
"@wsgi",
635636
"@django",
636637
"@dbapi",
637638
"@pg",
@@ -1078,6 +1079,7 @@
10781079
"@tracing",
10791080
"@wsgi",
10801081
"tests/contrib/wsgi/*",
1082+
"tests/contrib/django/test_django_wsgi.py",
10811083
"tests/contrib/uwsgi/__init__.py",
10821084
"tests/snapshots/tests.contrib.wsgi.*"
10831085
],
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
import logging
2+
import os
3+
import subprocess
4+
5+
import django
6+
from django.core.signals import request_finished
7+
from django.core.wsgi import get_wsgi_application
8+
from django.dispatch import receiver
9+
from django.http import HttpResponse
10+
import pytest
11+
12+
from ddtrace.contrib.wsgi import DDWSGIMiddleware
13+
from ddtrace.internal.compat import PY2
14+
from ddtrace.internal.compat import PY3
15+
from tests.webclient import Client
16+
17+
18+
filepath, extension = os.path.splitext(__file__)
19+
ROOT_URLCONF = os.path.basename(filepath)
20+
WSGI_APPLICATION = os.path.basename(filepath) + ".app"
21+
DEBUG = True
22+
SERVER_PORT = 8000
23+
SENTINEL_LOG = "request finished signal received"
24+
25+
log = logging.getLogger(__name__)
26+
27+
28+
@receiver(request_finished)
29+
def log_request_finished(*_, **__):
30+
log.warning(SENTINEL_LOG)
31+
32+
33+
def handler(_):
34+
return HttpResponse("Hello!")
35+
36+
37+
if PY3:
38+
from django.urls import path
39+
40+
urlpatterns = [path("", handler)]
41+
# it would be better to check for app_is_iterator programmatically, but Django WSGI apps behave like
42+
# iterators for the purpose of DDWSGIMiddleware despite not having both "__next__" and "__iter__" methods
43+
app = DDWSGIMiddleware(get_wsgi_application(), app_is_iterator=True)
44+
45+
46+
@pytest.mark.skipif(
47+
django.VERSION < (3, 0, 0) or PY2, reason="Older Django versions don't work with this use of django-admin"
48+
)
49+
def test_django_app_receives_request_finished_signal_when_app_is_ddwsgimiddleware():
50+
env = os.environ.copy()
51+
env.update(
52+
{
53+
"PYTHONPATH": os.path.dirname(os.path.abspath(__file__)) + ":" + env["PYTHONPATH"],
54+
"DJANGO_SETTINGS_MODULE": "test_django_wsgi",
55+
}
56+
)
57+
cmd = ["django-admin", "runserver", "--noreload", str(SERVER_PORT)]
58+
proc = subprocess.Popen(
59+
cmd,
60+
stdout=subprocess.PIPE,
61+
stderr=subprocess.PIPE,
62+
close_fds=True,
63+
env=env,
64+
)
65+
66+
client = Client("http://localhost:%d" % SERVER_PORT)
67+
client.wait()
68+
output = ""
69+
try:
70+
assert client.get("/").status_code == 200
71+
finally:
72+
try:
73+
_, output = proc.communicate(timeout=1)
74+
except subprocess.TimeoutExpired:
75+
proc.kill()
76+
_, output = proc.communicate()
77+
assert SENTINEL_LOG in str(output)

0 commit comments

Comments
 (0)