diff --git a/changelog.d/19090.bugfix b/changelog.d/19090.bugfix new file mode 100644 index 00000000000..077dafcbf89 --- /dev/null +++ b/changelog.d/19090.bugfix @@ -0,0 +1 @@ +Fix lost logcontext warnings from timeouts in sync and requests made by Synapse itself. diff --git a/synapse/logging/context.py b/synapse/logging/context.py index 6a4425ff1de..45c83f0d61d 100644 --- a/synapse/logging/context.py +++ b/synapse/logging/context.py @@ -896,7 +896,7 @@ def run_in_background( # If the function messes with logcontexts, we can assume it follows the Synapse # logcontext rules (Rules for functions returning awaitables: "If the awaitable # is already complete, the function returns with the same logcontext it started - # with."). If it function doesn't touch logcontexts at all, we can also assume + # with."). If the function doesn't touch logcontexts at all, we can also assume # the logcontext is unchanged. # # Either way, the function should have maintained the calling logcontext, so we @@ -905,11 +905,21 @@ def run_in_background( # to reset the logcontext to the sentinel logcontext as that would run # immediately (remember our goal is to maintain the calling logcontext when we # return). - logcontext_debug_logger.debug( - "run_in_background(%s): deferred already completed and the function should have maintained the logcontext %s", - instance_id, - calling_context, - ) + if current_context() != calling_context: + logcontext_error( + "run_in_background(%s): deferred already completed but the function did not maintain the calling logcontext %s (found %s)" + % ( + instance_id, + calling_context, + current_context(), + ) + ) + else: + logcontext_debug_logger.debug( + "run_in_background(%s): deferred already completed and the function should have maintained the calling logcontext %s", + instance_id, + calling_context, + ) return d # Since the function we called may follow the Synapse logcontext rules (Rules for diff --git a/synapse/util/async_helpers.py b/synapse/util/async_helpers.py index c568b377d24..99e899d1ef9 100644 --- a/synapse/util/async_helpers.py +++ b/synapse/util/async_helpers.py @@ -808,7 +808,8 @@ def time_it_out() -> None: timed_out[0] = True try: - deferred.cancel() + with PreserveLoggingContext(): + deferred.cancel() except Exception: # if we throw any exception it'll break time outs logger.exception("Canceller failed during timeout") diff --git a/synapse/util/clock.py b/synapse/util/clock.py index 6557582629e..5b59cef60a2 100644 --- a/synapse/util/clock.py +++ b/synapse/util/clock.py @@ -266,7 +266,7 @@ def wrapped_callback(*args: Any, **kwargs: Any) -> None: # We use `PreserveLoggingContext` to prevent our new `call_later` # logcontext from finishing as soon as we exit this function, in case `f` # returns an awaitable/deferred which would continue running and may try to - # restore the `loop_call` context when it's done (because it's trying to + # restore the `call_later` context when it's done (because it's trying to # adhere to the Synapse logcontext rules.) # # This also ensures that we return to the `sentinel` context when we exit diff --git a/tests/unittest.py b/tests/unittest.py index 1007f404561..049a92caaaf 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -341,6 +341,9 @@ def logcontext_clean(target: TV) -> TV: """ def logcontext_error(msg: str) -> NoReturn: + # Log so we can still see it in the logs like normal + logger.warning(msg) + # But also fail the test raise AssertionError("logcontext error: %s" % (msg)) patcher = patch("synapse.logging.context.logcontext_error", new=logcontext_error) diff --git a/tests/util/test_async_helpers.py b/tests/util/test_async_helpers.py index a02a2f0cef3..8fbee12fb90 100644 --- a/tests/util/test_async_helpers.py +++ b/tests/util/test_async_helpers.py @@ -45,7 +45,7 @@ ) from tests.server import get_clock -from tests.unittest import TestCase +from tests.unittest import TestCase, logcontext_clean logger = logging.getLogger(__name__) @@ -198,7 +198,12 @@ def canceller(_d: Deferred) -> None: self.failureResultOf(timing_out_d, defer.TimeoutError) - async def test_logcontext_is_preserved_on_cancellation(self) -> None: + @logcontext_clean + async def test_logcontext_is_preserved_on_timeout_cancellation(self) -> None: + """ + Test that the logcontext is preserved when we timeout and the deferred is + cancelled. + """ # Sanity check that we start in the sentinel context self.assertEqual(current_context(), SENTINEL_CONTEXT) @@ -270,6 +275,65 @@ def mark_was_cancelled(res: Failure) -> None: # Back to the sentinel context self.assertEqual(current_context(), SENTINEL_CONTEXT) + @logcontext_clean + async def test_logcontext_is_not_lost_when_awaiting_on_timeout_cancellation( + self, + ) -> None: + """ + Test that the logcontext isn't lost when we `await make_deferred_yieldable(...)` + the deferred to complete/timeout and it times out. + """ + + # Sanity check that we start in the sentinel context + self.assertEqual(current_context(), SENTINEL_CONTEXT) + + # Create a deferred which we will never complete + incomplete_d: Deferred = Deferred() + + async def competing_task() -> None: + with LoggingContext( + name="competing", server_name="test_server" + ) as context_competing: + timing_out_d = timeout_deferred( + deferred=incomplete_d, + timeout=1.0, + clock=self.clock, + ) + self.assertNoResult(timing_out_d) + # We should still be in the logcontext we started in + self.assertIs(current_context(), context_competing) + + # Mimic the normal use case to wait for the work to complete or timeout. + # + # In this specific test, we expect the deferred to timeout and raise an + # exception at this point. + await make_deferred_yieldable(timing_out_d) + + self.fail( + "We should not make it to this point as the `timing_out_d` should have been cancelled" + ) + + d = defer.ensureDeferred(competing_task()) + + # Still in the sentinel context + self.assertEqual(current_context(), SENTINEL_CONTEXT) + + # Pump until we trigger the timeout + self.reactor.pump( + # We only need to pump `1.0` (seconds) as we set + # `timeout_deferred(timeout=1.0)` above + (1.0,) + ) + + # Still in the sentinel context + self.assertEqual(current_context(), SENTINEL_CONTEXT) + + # We expect a failure due to the timeout + self.failureResultOf(d, defer.TimeoutError) + + # Back to the sentinel context at the end of the day + self.assertEqual(current_context(), SENTINEL_CONTEXT) + class _TestException(Exception): # pass