Skip to content

Commit 3567a03

Browse files
devmonkey22xrmx
andauthored
Fix tornado server (request) duration metric calculation (#3679)
* Fix `opentelemetry-instrumentation-tornado` server (request) duration metric calculation (#3486) * Update changelog * Linting * Add tornado metrics test_metrics_concurrent_requests test * Fix tornado test ms to sec conversion --------- Co-authored-by: Riccardo Magliocchetti <[email protected]>
1 parent c6b14fc commit 3567a03

File tree

4 files changed

+92
-4
lines changed

4 files changed

+92
-4
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1313

1414
### Fixed
1515

16+
- `opentelemetry-instrumentation-tornado` Fix server (request) duration metric calculation
17+
([#3679](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3679))
1618
- `opentelemetry-instrumentation`: Avoid calls to `context.detach` with `None` token.
1719
([#3673](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3673))
1820

instrumentation/opentelemetry-instrumentation-tornado/src/opentelemetry/instrumentation/tornado/__init__.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,7 @@ def client_response_hook(span, future):
211211

212212
_logger = getLogger(__name__)
213213
_TraceContext = namedtuple("TraceContext", ["activation", "span", "token"])
214+
_HANDLER_STATE_KEY = "_otel_state_key"
214215
_HANDLER_CONTEXT_KEY = "_otel_trace_context_key"
215216
_OTEL_PATCHED_KEY = "_otel_patched_key"
216217

@@ -402,7 +403,8 @@ def _wrap(cls, method_name, wrapper):
402403
def _prepare(
403404
tracer, server_histograms, request_hook, func, handler, args, kwargs
404405
):
405-
server_histograms[_START_TIME] = default_timer()
406+
otel_handler_state = {_START_TIME: default_timer()}
407+
setattr(handler, _HANDLER_STATE_KEY, otel_handler_state)
406408

407409
request = handler.request
408410
if _excluded_urls.url_disabled(request.uri):
@@ -622,9 +624,9 @@ def _record_prepare_metrics(server_histograms, handler):
622624

623625

624626
def _record_on_finish_metrics(server_histograms, handler, error=None):
625-
elapsed_time = round(
626-
(default_timer() - server_histograms[_START_TIME]) * 1000
627-
)
627+
otel_handler_state = getattr(handler, _HANDLER_STATE_KEY, None) or {}
628+
start_time = otel_handler_state.get(_START_TIME, None) or default_timer()
629+
elapsed_time = round((default_timer() - start_time) * 1000)
628630

629631
response_size = int(handler._headers.get("Content-Length", 0))
630632
metric_attributes = _create_metric_attributes(handler)

instrumentation/opentelemetry-instrumentation-tornado/tests/test_metrics_instrumentation.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,11 @@
1313
# limitations under the License.
1414

1515

16+
import asyncio
1617
from timeit import default_timer
1718

19+
import tornado.testing
20+
1821
from opentelemetry.instrumentation.tornado import TornadoInstrumentor
1922
from opentelemetry.sdk.metrics.export import HistogramDataPoint
2023

@@ -165,6 +168,78 @@ def test_basic_metrics(self):
165168
),
166169
)
167170

171+
@tornado.testing.gen_test
172+
async def test_metrics_concurrent_requests(self):
173+
"""
174+
Test that metrics can handle concurrent requests and calculate in an async-safe way.
175+
"""
176+
req1 = self.http_client.fetch(self.get_url("/slow?duration=1.0"))
177+
req2 = self.http_client.fetch(self.get_url("/async"))
178+
await asyncio.gather(req1, req2)
179+
180+
metrics = self.get_sorted_metrics()
181+
self.assertEqual(len(metrics), 7)
182+
183+
client_duration = metrics[0]
184+
server_duration = metrics[4]
185+
self.assertEqual(client_duration.name, "http.client.duration")
186+
self.assertEqual(server_duration.name, "http.server.duration")
187+
188+
# Calculating duration requires tracking state via `_HANDLER_STATE_KEY`, so we want to make sure
189+
# duration is calculated properly per request, and doesn't affect concurrent requests.
190+
req1_client_duration_data_point = next(
191+
dp
192+
for dp in client_duration.data.data_points
193+
if "/slow" in dp.attributes.get("http.url")
194+
)
195+
req1_server_duration_data_point = next(
196+
dp
197+
for dp in server_duration.data.data_points
198+
if "/slow" in dp.attributes.get("http.target")
199+
)
200+
req2_client_duration_data_point = next(
201+
dp
202+
for dp in client_duration.data.data_points
203+
if "/async" in dp.attributes.get("http.url")
204+
)
205+
req2_server_duration_data_point = next(
206+
dp
207+
for dp in server_duration.data.data_points
208+
if "/async" in dp.attributes.get("http.target")
209+
)
210+
211+
# Server and client durations should be similar (adjusting for msecs vs secs)
212+
self.assertAlmostEqual(
213+
abs(
214+
req1_server_duration_data_point.sum / 1000.0
215+
- req1_client_duration_data_point.sum
216+
),
217+
0.0,
218+
delta=0.01,
219+
)
220+
self.assertAlmostEqual(
221+
abs(
222+
req2_server_duration_data_point.sum / 1000.0
223+
- req2_client_duration_data_point.sum
224+
),
225+
0.0,
226+
delta=0.01,
227+
)
228+
229+
# Make sure duration is roughly equivalent to expected (req1/slow) should be around 1 second
230+
self.assertAlmostEqual(
231+
req1_server_duration_data_point.sum / 1000.0,
232+
1.0,
233+
delta=0.1,
234+
msg="Should have been about 1 second",
235+
)
236+
self.assertAlmostEqual(
237+
req2_server_duration_data_point.sum / 1000.0,
238+
0.0,
239+
delta=0.1,
240+
msg="Should have been really short",
241+
)
242+
168243
def test_metric_uninstrument(self):
169244
self.fetch("/")
170245
TornadoInstrumentor().uninstrument()

instrumentation/opentelemetry-instrumentation-tornado/tests/tornado_test_app.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# pylint: disable=W0223,R0201
2+
import asyncio
23
import time
34

45
import tornado.web
@@ -106,6 +107,13 @@ def get(self):
106107
self.set_status(200)
107108

108109

110+
class SlowHandler(tornado.web.RequestHandler):
111+
async def get(self):
112+
with self.application.tracer.start_as_current_span("slow"):
113+
duration = float(self.get_argument("duration", "1.0"))
114+
await asyncio.sleep(duration)
115+
116+
109117
class RaiseHTTPErrorHandler(tornado.web.RequestHandler):
110118
def get(self):
111119
raise tornado.web.HTTPError(403)
@@ -133,6 +141,7 @@ def make_app(tracer):
133141
(r"/ping", HealthCheckHandler),
134142
(r"/test_custom_response_headers", CustomResponseHeaderHandler),
135143
(r"/raise_403", RaiseHTTPErrorHandler),
144+
(r"/slow", SlowHandler),
136145
(r"/echo_socket", EchoWebSocketHandler),
137146
]
138147
)

0 commit comments

Comments
 (0)