Skip to content

Rewrite FastAPI instrumentor middleware stack to be failsafe #3664

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
Open
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## Unreleased

### Fixed

- `opentelemetry-instrumentation-fastapi`: Implemented failsafe middleware stack.
([#3664](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3664))

## Version 1.36.0/0.57b0 (2025-07-29)

### Fixed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,6 @@ def client_response_hook(span: Span, scope: dict[str, Any], message: dict[str, A

import fastapi
from starlette.applications import Starlette
from starlette.middleware.errors import ServerErrorMiddleware
from starlette.routing import Match
from starlette.types import ASGIApp

Expand All @@ -210,7 +209,7 @@ def client_response_hook(span: Span, scope: dict[str, Any], message: dict[str, A
from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
from opentelemetry.metrics import MeterProvider, get_meter
from opentelemetry.semconv.attributes.http_attributes import HTTP_ROUTE
from opentelemetry.trace import TracerProvider, get_tracer
from opentelemetry.trace import Span, TracerProvider, get_tracer
from opentelemetry.util.http import (
get_excluded_urls,
parse_excluded_urls,
Expand Down Expand Up @@ -289,22 +288,33 @@ def instrument_app(
schema_url=_get_schema_url(sem_conv_opt_in_mode),
)

# Instead of using `app.add_middleware` we monkey patch `build_middleware_stack` to insert our middleware
# as the outermost middleware.
# Otherwise `OpenTelemetryMiddleware` would have unhandled exceptions tearing through it and would not be able
# to faithfully record what is returned to the client since it technically cannot know what `ServerErrorMiddleware` is going to do.

# In order to make traces available at any stage of the request
# processing - including exception handling - we wrap ourselves as
# the new, outermost middleware. However in order to prevent
# exceptions from user-provided hooks of tearing through, we wrap
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The wrapping is no longer here. I also don't think this comment is an improvement. It's now less clear than before that this approach ensures the correct http status code.

# them to return without failure unconditionally.
def build_middleware_stack(self: Starlette) -> ASGIApp:
def failsafe(func):
@functools.wraps(func)
def wrapper(span: Span, *args, **kwargs):
try:
func(span, *args, **kwargs)
except Exception as exc: # pylint: disable=W0718
span.record_exception(exc)

return wrapper

inner_server_error_middleware: ASGIApp = ( # type: ignore
self._original_build_middleware_stack() # type: ignore
)
otel_middleware = OpenTelemetryMiddleware(

return OpenTelemetryMiddleware(
inner_server_error_middleware,
excluded_urls=excluded_urls,
default_span_details=_get_default_span_details,
server_request_hook=server_request_hook,
client_request_hook=client_request_hook,
client_response_hook=client_response_hook,
server_request_hook=failsafe(server_request_hook),
client_request_hook=failsafe(client_request_hook),
client_response_hook=failsafe(client_response_hook),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This wrapping should be in OpenTelemetryMiddleware itself.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed in commit.

# Pass in tracer/meter to get __name__and __version__ of fastapi instrumentation
tracer=tracer,
meter=meter,
Expand All @@ -313,23 +323,6 @@ def build_middleware_stack(self: Starlette) -> ASGIApp:
http_capture_headers_sanitize_fields=http_capture_headers_sanitize_fields,
exclude_spans=exclude_spans,
)
# Wrap in an outer layer of ServerErrorMiddleware so that any exceptions raised in OpenTelemetryMiddleware
# are handled.
# This should not happen unless there is a bug in OpenTelemetryMiddleware, but if there is we don't want that
# to impact the user's application just because we wrapped the middlewares in this order.
if isinstance(
inner_server_error_middleware, ServerErrorMiddleware
): # usually true
outer_server_error_middleware = ServerErrorMiddleware(
app=otel_middleware,
)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not clear to me why this is being removed or how this relates to the added tests. If you keep this, do the tests fail?

I'm guessing this is meant to fix #3637 but I don't see how this PR does or how that's being tested.

Copy link
Contributor Author

@outergod outergod Aug 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not related to #3637 but you are correct, this should probably stay as-is. No effect on the tests either way. Addressed in commit.

else:
# Something else seems to have patched things, or maybe Starlette changed.
# Just create a default ServerErrorMiddleware.
outer_server_error_middleware = ServerErrorMiddleware(
app=otel_middleware
)
return outer_server_error_middleware

app._original_build_middleware_stack = app.build_middleware_stack
app.build_middleware_stack = types.MethodType(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@
from opentelemetry.semconv._incubating.attributes.net_attributes import (
NET_HOST_PORT,
)
from opentelemetry.semconv.attributes.exception_attributes import (
EXCEPTION_TYPE,
)
from opentelemetry.semconv.attributes.http_attributes import (
HTTP_REQUEST_METHOD,
HTTP_RESPONSE_STATUS_CODE,
Expand Down Expand Up @@ -1877,3 +1880,126 @@ def test_custom_header_not_present_in_non_recording_span(self):
self.assertEqual(200, resp.status_code)
span_list = self.memory_exporter.get_finished_spans()
self.assertEqual(len(span_list), 0)


class TestTraceableExceptionHandling(TestBase):
"""Tests to ensure FastAPI exception handlers are only executed once and with a valid context"""

def setUp(self):
super().setUp()

self.app = fastapi.FastAPI()

otel_fastapi.FastAPIInstrumentor().instrument_app(self.app)
self.client = TestClient(self.app)
self.tracer = self.tracer_provider.get_tracer(__name__)
self.executed = 0
self.request_trace_id = None
self.error_trace_id = None

def tearDown(self) -> None:
super().tearDown()
with self.disable_logging():
otel_fastapi.FastAPIInstrumentor().uninstrument_app(self.app)

def test_error_handler_context(self):
"""OTEL tracing contexts must be available during error handler execution"""

@self.app.exception_handler(Exception)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please test if the exception and http status code are recorded on the span in this case.

async def _(*_):
self.error_trace_id = (
trace.get_current_span().get_span_context().trace_id
)

@self.app.get("/foobar")
async def _():
self.request_trace_id = (
trace.get_current_span().get_span_context().trace_id
)
raise UnhandledException("Test Exception")

try:
self.client.get(
"/foobar",
)
except Exception: # pylint: disable=W0718
pass

self.assertIsNotNone(self.request_trace_id)
self.assertEqual(self.request_trace_id, self.error_trace_id)

def test_error_handler_side_effects(self):
"""FastAPI default exception handlers (aka error handlers) must be executed exactly once per exception"""

@self.app.exception_handler(Exception)
async def _(*_):
self.executed += 1

@self.app.get("/foobar")
async def _():
raise UnhandledException("Test Exception")

try:
self.client.get(
"/foobar",
)
except Exception: # pylint: disable=W0718
pass

self.assertEqual(self.executed, 1)


class TestFailsafeHooks(TestBase):
"""Tests to ensure FastAPI instrumentation hooks don't tear through"""

def setUp(self):
super().setUp()

self.app = fastapi.FastAPI()

@self.app.get("/foobar")
async def _():
return {"message": "Hello World"}

def failing_hook(*_):
raise UnhandledException("Hook Exception")

otel_fastapi.FastAPIInstrumentor().instrument_app(
self.app,
server_request_hook=failing_hook,
client_request_hook=failing_hook,
client_response_hook=failing_hook,
)
self.client = TestClient(self.app)

def tearDown(self) -> None:
super().tearDown()
with self.disable_logging():
otel_fastapi.FastAPIInstrumentor().uninstrument_app(self.app)

def test_failsafe_hooks(self):
"""Crashing hooks must not tear through"""
resp = self.client.get(
"/foobar",
)

self.assertEqual(200, resp.status_code)

def test_failsafe_error_recording(self):
"""Failing hooks must record the exception on the span"""
self.client.get(
"/foobar",
)

spans = self.memory_exporter.get_finished_spans()

self.assertEqual(len(spans), 3)
span = spans[0]
self.assertEqual(len(span.events), 1)
event = span.events[0]
self.assertEqual(event.name, "exception")
assert event.attributes is not None
self.assertEqual(
event.attributes.get(EXCEPTION_TYPE),
f"{__name__}.UnhandledException",
)