element-hq · MadLittleMods · Sep 10, 2025 · Sep 8, 2025 · Sep 8, 2025 · Sep 8, 2025
@@ -0,0 +1 @@
+Better explain how we manage the logcontext in `run_in_background(...)` and `run_as_background_process(...)`.
@@ -59,6 +59,23 @@ def do_request_handling():
     logger.debug("phew")
 ```
 
+### The `sentinel` context
+
+The default context is `synapse.logging.context.SENTINEL_CONTEXT`, which is an empty
+sentinel value to represent the root context. This is what is used when there is no
+other context set. The phrase "clear/reset the logcontext" means to set the current
+context to the `sentinel` context.
+
+No CPU/database usage metrics are recorded against the `sentinel` context.
+
+Ideally, nothing from the Synapse homeserver would be logged against the `sentinel`
+context as we want to know which server the logs came from. In practice, this is not
+always the case yet especially outside of request handling.
+
+Whenever yielding control back to the reactor (event loop), the `sentinel` logcontext is
+set to avoid leaking the current logcontext into the other reactor which would
+erroneously get attached to the next operation picked up by the event loop.
+
 ## Using logcontexts with awaitables
 
 Awaitables break the linear flow of code so that there is no longer a single entry point

@@ -227,7 +227,16 @@ class ContextRequest:
 
 
 class _Sentinel:
-    """Sentinel to represent the root context"""
+    """
+    Sentinel to represent the root context
+
+    This should only be used for tasks outside of Synapse like when we yield control
+    back to the Twisted reactor (event loop) so we don't leak the current logging
+    context to other tasks that are scheduled next in the event loop.
+
+    Nothing from the Synapse homeserver should be logged with the sentinel context. i.e.
+    we should always know which server the logs are coming from.
+    """
 
     __slots__ = ["previous_context", "finished", "request", "tag"]
 
@@ -784,6 +793,14 @@ def run_in_background(
     return from the function, and that the sentinel context is set once the
     deferred returned by the function completes.
 
+    To explain how the log contexts work here:
+     - When this function is called, the current context is stored ("original"), we kick
+       off the background task, and we restore that original context before returning
+     - When the background task finishes, we don't want to leak our context into the
+       reactor which would erroneously get attached to the next operation picked up by
+       the event loop. We add a callback to the deferred which will clear the logging
+       context after it finishes and yields control back to the reactor.
+
     Useful for wrapping functions that return a deferred or coroutine, which you don't
     yield or await on (for instance because you want to pass it to
     deferred.gatherResults()).
@@ -795,54 +812,62 @@ def run_in_background(
     `f` doesn't raise any deferred exceptions, otherwise a scary-looking
     CRITICAL error about an unhandled error will be logged without much
     indication about where it came from.
-    """
-    current = current_context()
-    try:
-        res = f(*args, **kwargs)
-    except Exception:
-        # the assumption here is that the caller doesn't want to be disturbed
-        # by synchronous exceptions, so let's turn them into Failures.
-        return defer.fail()
-
-    # `res` may be a coroutine, `Deferred`, some other kind of awaitable, or a plain
-    # value. Convert it to a `Deferred`.
-    d: "defer.Deferred[R]"
-    if isinstance(res, typing.Coroutine):
-        # Wrap the coroutine in a `Deferred`.
-        d = defer.ensureDeferred(res)
-    elif isinstance(res, defer.Deferred):
-        d = res
-    elif isinstance(res, Awaitable):
-        # `res` is probably some kind of completed awaitable, such as a `DoneAwaitable`
-        # or `Future` from `make_awaitable`.
-        d = defer.ensureDeferred(_unwrap_awaitable(res))
-    else:
-        # `res` is a plain value. Wrap it in a `Deferred`.
-        d = defer.succeed(res)
-
-    if d.called and not d.paused:
-        # The function should have maintained the logcontext, so we can
-        # optimise out the messing about
-        return d
-
-    # The function may have reset the context before returning, so
-    # we need to restore it now.
-    ctx = set_current_context(current)
 
-    # The original context will be restored when the deferred
-    # completes, but there is nothing waiting for it, so it will
-    # get leaked into the reactor or some other function which
-    # wasn't expecting it. We therefore need to reset the context
-    # here.
+    Returns:
+        Deferred which returns the result of func, or `None` if func raises.
+        Note that the returned Deferred does not follow the synapse logcontext
+        rules.
+    """
+    # Our goal is to have the caller logcontext unchanged after firing off the
+    # background task and returning.
     #
-    # (If this feels asymmetric, consider it this way: we are
-    # effectively forking a new thread of execution. We are
-    # probably currently within a ``with LoggingContext()`` block,
-    # which is supposed to have a single entry and exit point. But
-    # by spawning off another deferred, we are effectively
-    # adding a new exit point.)
-    d.addBoth(_set_context_cb, ctx)
-    return d
+    #  When this function is called, the current context is stored (using
+    #  `PreserveLoggingContext`), we kick off the background task, and we restore the
+    #  original context before returning (also part of `PreserveLoggingContext`).
+    with PreserveLoggingContext(current_context()):
+        try:
+            res = f(*args, **kwargs)
+        except Exception:
+            # the assumption here is that the caller doesn't want to be disturbed
+            # by synchronous exceptions, so let's turn them into Failures.
+            return defer.fail()
+
+        # `res` may be a coroutine, `Deferred`, some other kind of awaitable, or a plain
+        # value. Convert it to a `Deferred`.
+        d: "defer.Deferred[R]"
+        if isinstance(res, typing.Coroutine):
+            # Wrap the coroutine in a `Deferred`.
+            d = defer.ensureDeferred(res)
+        elif isinstance(res, defer.Deferred):
+            d = res
+        elif isinstance(res, Awaitable):
+            # `res` is probably some kind of completed awaitable, such as a `DoneAwaitable`
+            # or `Future` from `make_awaitable`.
+            d = defer.ensureDeferred(_unwrap_awaitable(res))
+        else:
+            # `res` is a plain value. Wrap it in a `Deferred`.
+            d = defer.succeed(res)
+
+        # The deferred has already completed
+        if d.called and not d.paused:
+            # The function should have maintained the logcontext, so we can
+            # optimise out the messing about
+            return d
+
+        # The original logcontext will be restored when the deferred completes, but
+        # there is nothing waiting for it, so it will get leaked into the reactor (which
+        # would then get picked up by the next thing the reactor does). We therefore
+        # need to reset the logcontext here (set the `sentinel` logcontext) before
+        # yielding control back to the reactor.
+        #
+        # (If this feels asymmetric, consider it this way: we are
+        # effectively forking a new thread of execution. We are
+        # probably currently within a ``with LoggingContext()`` block,
+        # which is supposed to have a single entry and exit point. But
+        # by spawning off another deferred, we are effectively
+        # adding a new exit point.)
+        d.addBoth(_set_context_cb, SENTINEL_CONTEXT)
+        return d
 
 
 def run_coroutine_in_background(
@@ -860,52 +885,67 @@ def run_coroutine_in_background(
     do not run until called, and so calling an async function without awaiting
     cannot change the log contexts.
     """
-
-    current = current_context()
-    d = defer.ensureDeferred(coroutine)
-
-    # The function may have reset the context before returning, so
-    # we need to restore it now.
-    ctx = set_current_context(current)
-
-    # The original context will be restored when the deferred
-    # completes, but there is nothing waiting for it, so it will
-    # get leaked into the reactor or some other function which
-    # wasn't expecting it. We therefore need to reset the context
-    # here.
+    # Our goal is to have the caller logcontext unchanged after firing off the
+    # background task and returning.
     #
-    # (If this feels asymmetric, consider it this way: we are
-    # effectively forking a new thread of execution. We are
-    # probably currently within a ``with LoggingContext()`` block,
-    # which is supposed to have a single entry and exit point. But
-    # by spawning off another deferred, we are effectively
-    # adding a new exit point.)
-    d.addBoth(_set_context_cb, ctx)
-    return d
+    #  When this function is called, the current context is stored (using
+    #  `PreserveLoggingContext`), we kick off the background task, and we restore the
+    #  original context before returning (also part of `PreserveLoggingContext`).
+    with PreserveLoggingContext(current_context()):
+        d = defer.ensureDeferred(coroutine)
+
+        # The original logcontext will be restored when the deferred completes, but
+        # there is nothing waiting for it, so it will get leaked into the reactor (which
+        # would then get picked up by the next thing the reactor does). We therefore
+        # need to reset the logcontext here (set the `sentinel` logcontext) before
+        # yielding control back to the reactor.
+        #
+        # (If this feels asymmetric, consider it this way: we are
+        # effectively forking a new thread of execution. We are
+        # probably currently within a ``with LoggingContext()`` block,
+        # which is supposed to have a single entry and exit point. But
+        # by spawning off another deferred, we are effectively
+        # adding a new exit point.)
+        d.addBoth(_set_context_cb, SENTINEL_CONTEXT)
+        return d
 
 
 T = TypeVar("T")
 
 
 def make_deferred_yieldable(deferred: "defer.Deferred[T]") -> "defer.Deferred[T]":
-    """Given a deferred, make it follow the Synapse logcontext rules:
+    """
+    Given a deferred, make it follow the Synapse logcontext rules:
 
-    If the deferred has completed, essentially does nothing (just returns another
-    completed deferred with the result/failure).
+    - If the deferred has completed, essentially does nothing (just returns another
+      completed deferred with the result/failure).
+    - If the deferred has not yet completed, resets the logcontext before returning a
+      incomplete deferred. Then, when the deferred completes, restores the current
+      logcontext before running callbacks/errbacks.
 
-    If the deferred has not yet completed, resets the logcontext before
-    returning a deferred. Then, when the deferred completes, restores the
-    current logcontext before running callbacks/errbacks.
+    This means the resultant deferred can be awaited without leaking the current
+    logcontext to the reactor (which would then get erroneously picked up by the next
+    thing the reactor does), and also means that the logcontext is preserved when the
+    deferred completes.
 
-    (This is more-or-less the opposite operation to run_in_background.)
+    (This is more-or-less the opposite operation to run_in_background in terms of how it
+    handles log contexts.)
     """
+    # The deferred has already completed
     if deferred.called and not deferred.paused:
         # it looks like this deferred is ready to run any callbacks we give it
         # immediately. We may as well optimise out the logcontext faffery.
         return deferred
 
-    # ok, we can't be sure that a yield won't block, so let's reset the
-    # logcontext, and add a callback to the deferred to restore it.
+    # Our goal is to have the caller logcontext unchanged after they yield/await the
+    # returned deferred.
+    #
+    # When the caller yield/await's the returned deferred, it may yield
+    # control back to the reactor. To avoid leaking the current logcontext to the
+    # reactor (which would then get erroneously picked up by the next thing the reactor
+    # does) while the deferred runs in the reactor event loop, we reset the logcontext
+    # and add a callback to the deferred to restore it so the caller's logcontext is
+    # active when the deferred completes.
     prev_context = set_current_context(SENTINEL_CONTEXT)
     deferred.addBoth(_set_context_cb, prev_context)
     return deferred

@@ -47,7 +47,7 @@
 from synapse.logging.context import (
     ContextResourceUsage,
     LoggingContext,
-    PreserveLoggingContext,
+    run_in_background,
 )
 from synapse.logging.opentracing import SynapseTags, start_active_span
 from synapse.metrics import SERVER_NAME_LABEL
@@ -228,6 +228,11 @@ def run_as_background_process(
     clock.looping_call and friends (or for firing-and-forgetting in the middle of a
     normal synapse async function).
 
+    Because the returned Deferred does not follow the synapse logcontext rules, awaiting
+    the result of this function will result in the log context being cleared. In order
+    to properly await the result of this function and maintain the current log context,
+    use `make_deferred_yieldable`.
+
     Args:
         desc: a description for this background process type
         server_name: The homeserver name that this background process is being run for
@@ -280,10 +285,7 @@ async def run() -> Optional[R]:
                     name=desc, **{SERVER_NAME_LABEL: server_name}
                 ).dec()
 
-    with PreserveLoggingContext():
-        # Note that we return a Deferred here so that it can be used in a
-        # looping_call and other places that expect a Deferred.
-        return defer.ensureDeferred(run())
+    return run_in_background(run)
 
 
 P = ParamSpec("P")
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Better explain how we manage the logcontext in `run_in_background(...)` and `run_as_background_process(...)`.
MadLittleMods marked this conversation as resolved. Show resolved Hide resolved