Skip to content

Commit 856c3ec

Browse files
authored
providers-fab: Handle database errors in cleanup_session_middleware Session.remove() (#62336)
* providers-fab: Handle database errors in cleanup_session_middleware When Session.remove() is called in the finally block of cleanup_session_middleware, it may raise an OperationalError if the underlying database connection is already dead (e.g., MySQL 'Server has gone away', Aurora failover, network timeout). The unhandled exception propagates as a 500 Internal Server Error to the client, even when the original request succeeded. This commit wraps Session.remove() in a try-except block that catches and logs the error as a warning, consistent with how session cleanup is handled elsewhere in Airflow. Fixes follow-up to #61480. * providers-fab: fix logger reference in session cleanup Define module logger for cleanup warnings and drop unused test response assignment to satisfy ruff. * providers-fab: fix cleanup middleware checks Move logger definition below imports to satisfy static checks and assert warning logging in session cleanup tests so failures are surfaced clearly.
1 parent fca919a commit 856c3ec

File tree

2 files changed

+88
-1
lines changed

2 files changed

+88
-1
lines changed

providers/fab/src/airflow/providers/fab/auth_manager/fab_auth_manager.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
# under the License.
1818
from __future__ import annotations
1919

20+
import logging
2021
import warnings
2122
from contextlib import suppress
2223
from functools import cached_property
@@ -121,6 +122,8 @@
121122
RESOURCE_ASSET_ALIAS,
122123
)
123124

125+
log = logging.getLogger(__name__)
126+
124127

125128
_MAP_DAG_ACCESS_ENTITY_TO_FAB_RESOURCE_TYPE: dict[DagAccessEntity, tuple[str, ...]] = {
126129
DagAccessEntity.AUDIT_LOG: (RESOURCE_AUDIT_LOG,),
@@ -240,7 +243,10 @@ async def cleanup_session_middleware(request, call_next):
240243
from airflow import settings
241244

242245
if settings.Session:
243-
settings.Session.remove()
246+
try:
247+
settings.Session.remove()
248+
except Exception:
249+
log.warning("Failed to remove session during cleanup", exc_info=True)
244250

245251
app.mount("/", WSGIMiddleware(flask_app))
246252

providers/fab/tests/unit/fab/auth_manager/test_fab_auth_manager.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1104,3 +1104,84 @@ def test_session_cleanup_middleware_on_fastapi_route(self, mock_create_app):
11041104

11051105
# Verify Session.remove() was called
11061106
mock_session.remove.assert_called()
1107+
1108+
1109+
class TestFabAuthManagerSessionCleanupErrorHandling:
1110+
"""Test that cleanup_session_middleware handles Session.remove() failures gracefully.
1111+
1112+
When the underlying database connection is dead (e.g., MySQL 'Server has gone away',
1113+
PostgreSQL connection timeout), Session.remove() internally attempts a ROLLBACK which
1114+
raises an OperationalError. The middleware should catch and log this error instead of
1115+
propagating it as a 500 Internal Server Error to the client.
1116+
1117+
See: https://github.com/apache/airflow/issues/XXXXX
1118+
"""
1119+
1120+
@mock.patch("airflow.providers.fab.auth_manager.fab_auth_manager.create_app")
1121+
def test_session_remove_db_error_does_not_propagate(self, mock_create_app):
1122+
"""When Session.remove() raises OperationalError, request should still succeed.
1123+
1124+
Simulates MySQL 'Server has gone away' or similar DB errors during session
1125+
cleanup. The middleware should catch the exception and log a warning.
1126+
"""
1127+
from unittest.mock import patch
1128+
1129+
from fastapi.testclient import TestClient
1130+
from sqlalchemy.exc import OperationalError
1131+
1132+
mock_flask_app = MagicMock()
1133+
mock_create_app.return_value = mock_flask_app
1134+
1135+
auth_manager = FabAuthManager()
1136+
fastapi_app = auth_manager.get_fastapi_app()
1137+
1138+
client = TestClient(fastapi_app, raise_server_exceptions=False)
1139+
1140+
with (
1141+
patch("airflow.settings.Session") as mock_session,
1142+
patch("airflow.providers.fab.auth_manager.fab_auth_manager.log") as mock_log,
1143+
):
1144+
# Simulate MySQL 'Server has gone away' error on Session.remove()
1145+
mock_session.remove.side_effect = OperationalError(
1146+
"ROLLBACK", {}, Exception("(2006, 'Server has gone away')")
1147+
)
1148+
1149+
response = client.get("/users/list/")
1150+
1151+
# The request should NOT get a 500 from the middleware error
1152+
# (it may get other status codes from the mock Flask app, but not
1153+
# an unhandled exception from cleanup_session_middleware)
1154+
mock_session.remove.assert_called()
1155+
mock_log.warning.assert_called()
1156+
assert response is not None
1157+
1158+
@mock.patch("airflow.providers.fab.auth_manager.fab_auth_manager.create_app")
1159+
def test_session_remove_generic_error_does_not_propagate(self, mock_create_app):
1160+
"""Any exception from Session.remove() should be caught during cleanup.
1161+
1162+
This covers edge cases beyond OperationalError, such as AttributeError
1163+
when the session registry is in an unexpected state.
1164+
"""
1165+
from unittest.mock import patch
1166+
1167+
from fastapi.testclient import TestClient
1168+
1169+
mock_flask_app = MagicMock()
1170+
mock_create_app.return_value = mock_flask_app
1171+
1172+
auth_manager = FabAuthManager()
1173+
fastapi_app = auth_manager.get_fastapi_app()
1174+
1175+
client = TestClient(fastapi_app, raise_server_exceptions=False)
1176+
1177+
with (
1178+
patch("airflow.settings.Session") as mock_session,
1179+
patch("airflow.providers.fab.auth_manager.fab_auth_manager.log") as mock_log,
1180+
):
1181+
mock_session.remove.side_effect = RuntimeError("unexpected session error")
1182+
1183+
# Should not raise - the middleware catches all exceptions from Session.remove()
1184+
response = client.get("/users/list/")
1185+
mock_session.remove.assert_called()
1186+
mock_log.warning.assert_called()
1187+
assert response is not None

0 commit comments

Comments
 (0)