Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
ea6c587
Remove sentinel context from `start_reactor` (`daemonize_process`/`run`)
MadLittleMods Aug 26, 2025
bf21b4a
Simplify stop/start context pattern in `daemonize_process`
MadLittleMods Aug 27, 2025
a677d9b
Remove sentinel context from `atexit` daemonize logs
MadLittleMods Aug 26, 2025
fce59b2
DEBUG: Logging contexts getting lost
MadLittleMods Aug 27, 2025
af1944b
Revert "DEBUG: Logging contexts getting lost"
MadLittleMods Aug 27, 2025
675d94a
Fix `run_as_background_process` not following Synapse log context rules
MadLittleMods Aug 27, 2025
1bf7049
Merge branch 'develop' into madlittlemods/remove-sentinel-context
MadLittleMods Aug 28, 2025
7938e8c
DEBUG: lost `main` context after request
MadLittleMods Aug 28, 2025
0a865fd
Revert "Fix `run_as_background_process` not following Synapse log con…
MadLittleMods Sep 4, 2025
b97b2dd
Debug: Eliminate variables
MadLittleMods Sep 5, 2025
301a714
DEBUG: test
MadLittleMods Sep 6, 2025
c1996b6
WIP: Pair with @erikjohnston
MadLittleMods Sep 8, 2025
d8f68d0
Explain the `sentinel` context better
MadLittleMods Sep 8, 2025
71cd3c4
Clarify when to use the `sentinel` context
MadLittleMods Sep 8, 2025
0e990e6
Explain logging context in `run_as_background_process`
MadLittleMods Sep 8, 2025
6a98096
Add potential future simplication of `run_as_background_process`
MadLittleMods Sep 8, 2025
ea5a841
Add log context explanation to `run_in_background`
MadLittleMods Sep 8, 2025
4b2e88f
Merge branch 'madlittlemods/remove-sentinel-context' of github.com:el…
MadLittleMods Sep 8, 2025
1f4b391
Revert "Add log context explanation to `run_in_background`"
MadLittleMods Sep 9, 2025
bbe1ee7
Revert "Add potential future simplication of `run_as_background_proce…
MadLittleMods Sep 9, 2025
b5ec2da
Revert "Explain logging context in `run_as_background_process`"
MadLittleMods Sep 9, 2025
4aa0aa0
Revert "Clarify when to use the `sentinel` context"
MadLittleMods Sep 9, 2025
37a388c
Revert "Explain the `sentinel` context better"
MadLittleMods Sep 9, 2025
4b61570
Revert "DEBUG: test"
MadLittleMods Sep 9, 2025
ae8055a
Revert "Debug: Eliminate variables"
MadLittleMods Sep 9, 2025
5c05a0b
Revert "DEBUG: lost `main` context after request"
MadLittleMods Sep 9, 2025
b8c0857
Re-establish logcontext at `start`
MadLittleMods Sep 9, 2025
74ab47f
Add context for "Shutting down..." log
MadLittleMods Sep 9, 2025
e741daf
Simplify `atexit` logcontext
MadLittleMods Sep 9, 2025
fab546d
Add logcontext to database profiling loop
MadLittleMods Sep 9, 2025
e626f09
Add changelog
MadLittleMods Sep 9, 2025
dcf5fca
Merge branch 'develop' into madlittlemods/remove-sentinel-context
MadLittleMods Sep 10, 2025
6bbd0bf
Revise changelog
MadLittleMods Sep 10, 2025
9c74f69
Fix lints
MadLittleMods Sep 10, 2025
e6685cc
Remove db profiling `looping_call` change
MadLittleMods Sep 10, 2025
1f95a3b
Prefer `PreserveLoggingContext()` defaults
MadLittleMods Sep 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 16 additions & 17 deletions synapse/app/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,75 +18,75 @@
# [This file includes modifications made by New Vector Limited]
#
#
import atexit
import gc
import logging
import os
import signal
import socket
import sys
import traceback
import warnings
from textwrap import indent
from typing import (
TYPE_CHECKING,
Any,
Awaitable,
Callable,
Dict,
List,
NoReturn,
Optional,
Tuple,
cast,
)

from cryptography.utils import CryptographyDeprecationWarning
from typing_extensions import ParamSpec

import twisted
from twisted.internet import defer, error, reactor as _reactor
from twisted.internet.interfaces import (
IOpenSSLContextFactory,
IReactorSSL,
IReactorTCP,
IReactorUNIX,
)
from twisted.internet.protocol import ServerFactory
from twisted.internet.tcp import Port
from twisted.logger import LoggingFile, LogLevel
from twisted.protocols.tls import TLSMemoryBIOFactory
from twisted.python.threadpool import ThreadPool
from twisted.web.resource import Resource

import synapse.util.caches
from synapse.api.constants import MAX_PDU_SIZE
from synapse.app import check_bind_error
from synapse.app.phone_stats_home import start_phone_stats_home
from synapse.config import ConfigError
from synapse.config._base import format_config_error
from synapse.config.homeserver import HomeServerConfig
from synapse.config.server import ListenerConfig, ManholeConfig, TCPListenerConfig
from synapse.crypto import context_factory
from synapse.events.auto_accept_invites import InviteAutoAccepter
from synapse.events.presence_router import load_legacy_presence_router
from synapse.handlers.auth import load_legacy_password_auth_providers
from synapse.http.site import SynapseSite
from synapse.logging.context import PreserveLoggingContext
from synapse.logging.opentracing import init_tracer
from synapse.metrics import install_gc_manager, register_threadpool
from synapse.metrics.background_process_metrics import run_as_background_process
from synapse.metrics.jemalloc import setup_jemalloc_stats
from synapse.module_api.callbacks.spamchecker_callbacks import load_legacy_spam_checkers
from synapse.module_api.callbacks.third_party_event_rules_callbacks import (
load_legacy_third_party_event_rules,
)
from synapse.logging.context import PreserveLoggingContext
from synapse.types import ISynapseReactor, StrCollection
from synapse.util import SYNAPSE_VERSION
from synapse.util.caches.lrucache import setup_expire_lru_cache_entries
from synapse.util.daemonize import daemonize_process
from synapse.util.gai_resolver import GAIResolver
from synapse.util.rlimit import change_resource_limit

Check failure on line 89 in synapse/app/_base.py

View workflow job for this annotation

GitHub Actions / lint

Ruff (I001)

synapse/app/_base.py:21:1: I001 Import block is un-sorted or un-formatted

if TYPE_CHECKING:
from synapse.server import HomeServer
Expand Down Expand Up @@ -183,25 +183,23 @@
if gc_thresholds:
gc.set_threshold(*gc_thresholds)
install_gc_manager()
run_command()

# make sure that we run the reactor with the sentinel log context,
# otherwise other PreserveLoggingContext instances will get confused
# and complain when they see the logcontext arbitrarily swapping
# between the sentinel and `run` logcontexts.
Comment on lines -188 to -191
Copy link
Contributor Author

@MadLittleMods MadLittleMods Aug 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment was added in 067b00d#diff-6e21d6a61b2f6b6f2d4ce961991ba7f27e83605f927eaa4b19d2c46a975a96c1R460-R463 (matrix-org/synapse#2027)

I can't tell exactly what's it's referring to but the one spot I was seeing Expected logging context sentinel but found main around run_as_background_process(...) usage has been fixed (#18870 (comment)).

Reproduction:

  1. poetry run synapse_homeserver --config-path homeserver.yaml
  2. Ctrl + C to stop the server
  3. Notice LoggingContext: Expected logging context main was lost in the logs

#
# We also need to drop the logcontext before forking if we're daemonizing,
# otherwise the cputime metrics get confused about the per-thread resource usage
# appearing to go backwards.
Comment on lines -193 to -195
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment was added in matrix-org/synapse#5609 and is accurately describing a real problem.

But we can be way more precise about what do to here. We only need to stop the current log context right before forking the process and start it again right after. This can be accomplished with the PreserveLoggingContext being moved strictly around the os.fork() call (search for os.fork() below).


Previously, this caused the sentinel context to be used for a whole bunch of logs (basically anything outside of a request). We now get the main context used 💪

with PreserveLoggingContext():
if daemonize:
assert pid_file is not None
# Reset the logging context when we start the reactor (whenever we yield control
# to the reactor, the `sentinel` logging context needs to be set so we don't
# leak the current logging context and erroneously apply it to the next task the
# reactor event loop picks up)
with PreserveLoggingContext():
run_command()

if daemonize:
assert pid_file is not None

if print_pidfile:
print(pid_file)

if print_pidfile:
print(pid_file)
daemonize_process(pid_file, logger)

daemonize_process(pid_file, logger)
run()
run()


def quit_with_error(error_string: str) -> NoReturn:
Expand Down Expand Up @@ -242,6 +240,7 @@
print("Redirected stdout/stderr to logs")


# TODO: Re-establish log context at this point
def register_start(
cb: Callable[P, Awaitable], *args: P.args, **kwargs: P.kwargs
) -> None:
Expand Down
1 change: 1 addition & 0 deletions synapse/app/homeserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,7 @@ def main() -> None:
redirect_stdio_to_logs()

run(hs)
logger.info("asdf outside main")


if __name__ == "__main__":
Expand Down
31 changes: 22 additions & 9 deletions synapse/handlers/federation.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,65 +22,66 @@

"""Contains handlers for federation events."""

import enum
import itertools
import logging
from enum import Enum
from http import HTTPStatus
from typing import (
TYPE_CHECKING,
AbstractSet,
Dict,
Iterable,
List,
Optional,
Set,
Tuple,
Union,
)

from twisted.internet import defer
import attr
from prometheus_client import Histogram
from signedjson.key import decode_verify_key_bytes
from signedjson.sign import verify_signed_json
from unpaddedbase64 import decode_base64

from synapse import event_auth
from synapse.api.constants import MAX_DEPTH, EventContentFields, EventTypes, Membership
from synapse.api.errors import (
AuthError,
CodeMessageException,
Codes,
FederationDeniedError,
FederationError,
FederationPullAttemptBackoffError,
HttpResponseException,
NotFoundError,
PartialStateConflictError,
RequestSendFailed,
SynapseError,
)
from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersion
from synapse.crypto.event_signing import compute_event_signature
from synapse.event_auth import validate_event_for_room_version
from synapse.events import EventBase
from synapse.events.snapshot import EventContext, UnpersistedEventContextBase
from synapse.events.validator import EventValidator
from synapse.federation.federation_client import InvalidResponseError
from synapse.handlers.pagination import PURGE_PAGINATION_LOCK_NAME
from synapse.http.servlet import assert_params_in_dict
from synapse.logging.context import nested_logging_context
from synapse.logging.context import nested_logging_context, make_deferred_yieldable
from synapse.logging.opentracing import SynapseTags, set_tag, tag_args, trace
from synapse.metrics import SERVER_NAME_LABEL
from synapse.metrics.background_process_metrics import run_as_background_process
from synapse.module_api import NOT_SPAM
from synapse.storage.databases.main.events_worker import EventRedactBehaviour
from synapse.storage.invite_rule import InviteRule
from synapse.types import JsonDict, StrCollection, get_domain_from_id
from synapse.types.state import StateFilter
from synapse.util.async_helpers import Linearizer
from synapse.util.retryutils import NotRetryingDestination
from synapse.visibility import filter_events_for_server

Check failure on line 84 in synapse/handlers/federation.py

View workflow job for this annotation

GitHub Actions / lint

Ruff (I001)

synapse/handlers/federation.py:25:1: I001 Import block is un-sorted or un-formatted

if TYPE_CHECKING:
from synapse.server import HomeServer
Expand Down Expand Up @@ -188,7 +189,8 @@
# were shut down.
if not hs.config.worker.worker_app:
run_as_background_process(
"resume_sync_partial_state_room",
# "resume_sync_partial_state_room",
"rspsr",
self.server_name,
self._resume_partial_state_room_sync,
)
Expand Down Expand Up @@ -1788,15 +1790,26 @@

async def _resume_partial_state_room_sync(self) -> None:
"""Resumes resyncing of all partial-state rooms after a restart."""
logger.info("asdf->_resume_partial_state_room_sync")
assert not self.config.worker.worker_app

partial_state_rooms = await self.store.get_partial_state_room_resync_info()
for room_id, resync_info in partial_state_rooms.items():
self._start_partial_state_room_sync(
initial_destination=resync_info.joined_via,
other_destinations=resync_info.servers_in_room,
room_id=room_id,
)
# a function which returns an incomplete deferred, but doesn't follow
# the synapse rules.
def blocking_function() -> defer.Deferred:
d: defer.Deferred = defer.Deferred()
self.hs.get_reactor().callLater(0, d.callback, None)
return d

await make_deferred_yieldable(blocking_function())

# partial_state_rooms = await self.store.get_partial_state_room_resync_info()
# for room_id, resync_info in partial_state_rooms.items():
# self._start_partial_state_room_sync(
# initial_destination=resync_info.joined_via,
# other_destinations=resync_info.servers_in_room,
# room_id=room_id,
# )
logger.info("asdf->_resume_partial_state_room_sync done")

def _start_partial_state_room_sync(
self,
Expand Down
7 changes: 7 additions & 0 deletions synapse/http/site.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,7 @@ def get_authenticated_entity(self) -> Tuple[Optional[str], Optional[str]]:
return None, None

def render(self, resrc: Resource) -> None:
logger.info("asdf->SynapseRequest.render")
# this is called once a Resource has been found to serve the request; in our
# case the Resource in question will normally be a JsonResource.

Expand All @@ -319,6 +320,11 @@ def render(self, resrc: Resource) -> None:
user_agent=get_request_user_agent(self),
),
)
logger.info(
"asdf request logcontext=%s logcontext.previous_context=%s",
self.logcontext,
self.logcontext.previous_context,
)

# override the Server header which is set by twisted
self.setHeader("Server", self.synapse_site.server_version_string)
Expand All @@ -342,6 +348,7 @@ def render(self, resrc: Resource) -> None:
servlet=self.request_metrics.name,
**{SERVER_NAME_LABEL: self.our_server_name},
).inc()
logger.info("asdf->SynapseRequest.render done")

@contextlib.contextmanager
def processing(self) -> Generator[None, None, None]:
Expand Down
Loading
Loading