Skip to content

Fix tornado server (request) duration metric calculation #3679

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## Unreleased

### Fixed

- `opentelemetry-instrumentation-tornado` Fix server (request) duration metric calculation
([#3679](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3679))

## Version 1.36.0/0.57b0 (2025-07-29)

### Fixed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ def client_response_hook(span, future):

_logger = getLogger(__name__)
_TraceContext = namedtuple("TraceContext", ["activation", "span", "token"])
_HANDLER_STATE_KEY = "_otel_state_key"
_HANDLER_CONTEXT_KEY = "_otel_trace_context_key"
_OTEL_PATCHED_KEY = "_otel_patched_key"

Expand Down Expand Up @@ -402,7 +403,8 @@ def _wrap(cls, method_name, wrapper):
def _prepare(
tracer, server_histograms, request_hook, func, handler, args, kwargs
):
server_histograms[_START_TIME] = default_timer()
otel_handler_state = {_START_TIME: default_timer()}
setattr(handler, _HANDLER_STATE_KEY, otel_handler_state)

request = handler.request
if _excluded_urls.url_disabled(request.uri):
Expand Down Expand Up @@ -622,9 +624,9 @@ def _record_prepare_metrics(server_histograms, handler):


def _record_on_finish_metrics(server_histograms, handler, error=None):
elapsed_time = round(
(default_timer() - server_histograms[_START_TIME]) * 1000
)
otel_handler_state = getattr(handler, _HANDLER_STATE_KEY, None) or {}
start_time = otel_handler_state.get(_START_TIME, None) or default_timer()
elapsed_time = round((default_timer() - start_time) * 1000)

response_size = int(handler._headers.get("Content-Length", 0))
metric_attributes = _create_metric_attributes(handler)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,11 @@
# limitations under the License.


import asyncio
from timeit import default_timer

import tornado.testing

from opentelemetry.instrumentation.tornado import TornadoInstrumentor
from opentelemetry.sdk.metrics.export import HistogramDataPoint

Expand Down Expand Up @@ -165,6 +168,78 @@ def test_basic_metrics(self):
),
)

@tornado.testing.gen_test
async def test_metrics_concurrent_requests(self):
"""
Test that metrics can handle concurrent requests and calculate in an async-safe way.
"""
req1 = self.http_client.fetch(self.get_url("/slow?duration=1.0"))
req2 = self.http_client.fetch(self.get_url("/async"))
await asyncio.gather(req1, req2)

metrics = self.get_sorted_metrics()
self.assertEqual(len(metrics), 7)

client_duration = metrics[0]
server_duration = metrics[4]
self.assertEqual(client_duration.name, "http.client.duration")
self.assertEqual(server_duration.name, "http.server.duration")

# Calculating duration requires tracking state via `_HANDLER_STATE_KEY`, so we want to make sure
# duration is calculated properly per request, and doesn't affect concurrent requests.
req1_client_duration_data_point = next(
dp
for dp in client_duration.data.data_points
if "/slow" in dp.attributes.get("http.url")
)
req1_server_duration_data_point = next(
dp
for dp in server_duration.data.data_points
if "/slow" in dp.attributes.get("http.target")
)
req2_client_duration_data_point = next(
dp
for dp in client_duration.data.data_points
if "/async" in dp.attributes.get("http.url")
)
req2_server_duration_data_point = next(
dp
for dp in server_duration.data.data_points
if "/async" in dp.attributes.get("http.target")
)

# Server and client durations should be similar (adjusting for msecs vs secs)
self.assertAlmostEqual(
abs(
req1_server_duration_data_point.sum / 1000.0
- req1_client_duration_data_point.sum
),
0.0,
delta=0.01,
)
self.assertAlmostEqual(
abs(
req2_server_duration_data_point.sum / 1000.0
- req2_client_duration_data_point.sum
),
0.0,
delta=0.01,
)

# Make sure duration is roughly equivalent to expected (req1/slow) should be around 1 second
self.assertAlmostEqual(
req1_server_duration_data_point.sum / 1000.0,
1.0,
delta=0.1,
msg="Should have been about 1 second",
)
self.assertAlmostEqual(
req2_server_duration_data_point.sum / 1000.0,
0.0,
delta=0.1,
msg="Should have been really short",
)

def test_metric_uninstrument(self):
self.fetch("/")
TornadoInstrumentor().uninstrument()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# pylint: disable=W0223,R0201
import asyncio
import time

import tornado.web
Expand Down Expand Up @@ -106,6 +107,13 @@ def get(self):
self.set_status(200)


class SlowHandler(tornado.web.RequestHandler):
async def get(self):
with self.application.tracer.start_as_current_span("slow"):
duration = float(self.get_argument("duration", "1.0"))
await asyncio.sleep(duration)


class RaiseHTTPErrorHandler(tornado.web.RequestHandler):
def get(self):
raise tornado.web.HTTPError(403)
Expand Down Expand Up @@ -133,6 +141,7 @@ def make_app(tracer):
(r"/ping", HealthCheckHandler),
(r"/test_custom_response_headers", CustomResponseHeaderHandler),
(r"/raise_403", RaiseHTTPErrorHandler),
(r"/slow", SlowHandler),
(r"/echo_socket", EchoWebSocketHandler),
]
)
Expand Down