-
Notifications
You must be signed in to change notification settings - Fork 404
Fix lost logcontext when using timeout_deferred(...)
#19090
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
5eea20a
6be53d8
9478194
33776c0
4b9441e
630792c
73080af
5179ea3
202906e
0325f97
18ae299
a31eadd
71d490c
8328e1f
140bb59
9aa4919
7bb2ffc
4e576b7
a782d04
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| Fix lost logcontext warnings from timeouts in sync and requests made by Synapse itself. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -808,7 +808,8 @@ def time_it_out() -> None: | |
| timed_out[0] = True | ||
|
|
||
| try: | ||
| deferred.cancel() | ||
| with PreserveLoggingContext(): | ||
| deferred.cancel() | ||
|
Comment on lines
+811
to
+812
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the main fix! See the Deferred callbacks section of our logcontext docs for more info (specifically using solution 2). Heads-up, I wrote the docs too so it's my assumptions/understanding all the way down. Apply your own scrutiny.
Comment on lines
+811
to
+812
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In an ideal world, I think it should be possible to call the deferred callback/errbacks/cancel with some logcontext. I spent too much time trying to figure out the intricacies here and trying to use solution 3 from the Deferred callbacks docs but wasn't successful. It makes me question if what I wrote there is correct in the first place 🤔 The problem is that calling I can't tell if the problem is a) our function just isn't following logcontext rules (and how to resolve that well) or b) we should Instead of banging my head against this more, I've opted to go for the simple route |
||
| except Exception: # if we throw any exception it'll break time outs | ||
| logger.exception("Canceller failed during timeout") | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -266,7 +266,7 @@ def wrapped_callback(*args: Any, **kwargs: Any) -> None: | |
| # We use `PreserveLoggingContext` to prevent our new `call_later` | ||
| # logcontext from finishing as soon as we exit this function, in case `f` | ||
| # returns an awaitable/deferred which would continue running and may try to | ||
| # restore the `loop_call` context when it's done (because it's trying to | ||
| # restore the `call_later` context when it's done (because it's trying to | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Copy/paste typo |
||
| # adhere to the Synapse logcontext rules.) | ||
| # | ||
| # This also ensures that we return to the `sentinel` context when we exit | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -341,6 +341,9 @@ def logcontext_clean(target: TV) -> TV: | |||||||
| """ | ||||||||
|
|
||||||||
| def logcontext_error(msg: str) -> NoReturn: | ||||||||
| # Log so we can still see it in the logs like normal | ||||||||
| logger.warning(msg) | ||||||||
|
Comment on lines
+344
to
+345
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Quality of life so these the logcontext errors appear in the logs (
synapse/synapse/logging/context.py Lines 105 to 107 in 3b59ac3
Perhaps it would be even better if we just called the original implementation alongside raising the |
||||||||
| # But also fail the test | ||||||||
| raise AssertionError("logcontext error: %s" % (msg)) | ||||||||
|
|
||||||||
| patcher = patch("synapse.logging.context.logcontext_error", new=logcontext_error) | ||||||||
|
|
||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -45,7 +45,7 @@ | |
| ) | ||
|
|
||
| from tests.server import get_clock | ||
| from tests.unittest import TestCase | ||
| from tests.unittest import TestCase, logcontext_clean | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
@@ -198,7 +198,12 @@ def canceller(_d: Deferred) -> None: | |
|
|
||
| self.failureResultOf(timing_out_d, defer.TimeoutError) | ||
|
|
||
| async def test_logcontext_is_preserved_on_cancellation(self) -> None: | ||
| @logcontext_clean | ||
| async def test_logcontext_is_preserved_on_timeout_cancellation(self) -> None: | ||
| """ | ||
| Test that the logcontext is preserved when we timeout and the deferred is | ||
| cancelled. | ||
| """ | ||
| # Sanity check that we start in the sentinel context | ||
| self.assertEqual(current_context(), SENTINEL_CONTEXT) | ||
|
|
||
|
|
@@ -270,6 +275,65 @@ def mark_was_cancelled(res: Failure) -> None: | |
| # Back to the sentinel context | ||
| self.assertEqual(current_context(), SENTINEL_CONTEXT) | ||
|
|
||
| @logcontext_clean | ||
| async def test_logcontext_is_not_lost_when_awaiting_on_timeout_cancellation( | ||
| self, | ||
| ) -> None: | ||
|
Comment on lines
+279
to
+281
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test reproduces the And now passes with the fix introduced in this PR. |
||
| """ | ||
| Test that the logcontext isn't lost when we `await make_deferred_yieldable(...)` | ||
| the deferred to complete/timeout and it times out. | ||
| """ | ||
|
|
||
| # Sanity check that we start in the sentinel context | ||
| self.assertEqual(current_context(), SENTINEL_CONTEXT) | ||
|
|
||
| # Create a deferred which we will never complete | ||
| incomplete_d: Deferred = Deferred() | ||
|
|
||
| async def competing_task() -> None: | ||
| with LoggingContext( | ||
| name="competing", server_name="test_server" | ||
| ) as context_competing: | ||
| timing_out_d = timeout_deferred( | ||
| deferred=incomplete_d, | ||
| timeout=1.0, | ||
| clock=self.clock, | ||
| ) | ||
| self.assertNoResult(timing_out_d) | ||
| # We should still be in the logcontext we started in | ||
| self.assertIs(current_context(), context_competing) | ||
|
|
||
| # Mimic the normal use case to wait for the work to complete or timeout. | ||
| # | ||
| # In this specific test, we expect the deferred to timeout and raise an | ||
| # exception at this point. | ||
| await make_deferred_yieldable(timing_out_d) | ||
|
|
||
| self.fail( | ||
| "We should not make it to this point as the `timing_out_d` should have been cancelled" | ||
| ) | ||
|
|
||
| d = defer.ensureDeferred(competing_task()) | ||
|
|
||
| # Still in the sentinel context | ||
| self.assertEqual(current_context(), SENTINEL_CONTEXT) | ||
|
|
||
| # Pump until we trigger the timeout | ||
| self.reactor.pump( | ||
| # We only need to pump `1.0` (seconds) as we set | ||
| # `timeout_deferred(timeout=1.0)` above | ||
| (1.0,) | ||
| ) | ||
|
|
||
| # Still in the sentinel context | ||
| self.assertEqual(current_context(), SENTINEL_CONTEXT) | ||
|
|
||
| # We expect a failure due to the timeout | ||
| self.failureResultOf(d, defer.TimeoutError) | ||
|
|
||
| # Back to the sentinel context at the end of the day | ||
| self.assertEqual(current_context(), SENTINEL_CONTEXT) | ||
|
|
||
|
|
||
| class _TestException(Exception): # | ||
| pass | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Extra protection when our assumption is wrong.
This did happen for the case we're fixing.