Skip to content

Commit 3931667

Browse files
Be able to shutdown homeserver that hasn't setup (#19187)
For example, a homeserver can fail to `setup` if it fails to connect to the database. Fix #19188 Follow-up to #18828 ### Background As part of Element's plan to support a light form of vhosting (virtual host) (multiple instances of Synapse in the same Python process) (c.f Synapse Pro for small hosts), we're currently diving into the details and implications of running multiple instances of Synapse in the same Python process. "Clean tenant deprovisioning" tracked internally by element-hq/synapse-small-hosts#50
1 parent f86918e commit 3931667

File tree

7 files changed

+358
-180
lines changed

7 files changed

+358
-180
lines changed

changelog.d/19187.misc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix `HomeServer.shutdown()` failing if the homeserver hasn't been setup yet.

synapse/api/errors.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -856,6 +856,12 @@ def to_synapse_error(self) -> SynapseError:
856856
return ProxiedRequestError(self.code, errmsg, errcode, j)
857857

858858

859+
class HomeServerNotSetupException(Exception):
860+
"""
861+
Raised when an operation is attempted on the HomeServer before setup() has been called.
862+
"""
863+
864+
859865
class ShadowBanError(Exception):
860866
"""
861867
Raised when a shadow-banned user attempts to perform an action.

synapse/crypto/keyring.py

Lines changed: 92 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
import abc
2323
import logging
24+
from contextlib import ExitStack
2425
from typing import TYPE_CHECKING, Callable, Iterable
2526

2627
import attr
@@ -150,57 +151,81 @@ class Keyring:
150151
"""
151152

152153
def __init__(
153-
self, hs: "HomeServer", key_fetchers: "Iterable[KeyFetcher] | None" = None
154+
self,
155+
hs: "HomeServer",
156+
test_only_key_fetchers: "list[KeyFetcher] | None" = None,
154157
):
155-
self.server_name = hs.hostname
158+
"""
159+
Args:
160+
hs: The HomeServer instance
161+
test_only_key_fetchers: Dependency injection for tests only. If provided,
162+
these key fetchers will be used instead of the default ones.
163+
"""
164+
# Clean-up to avoid partial initialization leaving behind references.
165+
with ExitStack() as exit:
166+
self.server_name = hs.hostname
167+
168+
self._key_fetchers: list[KeyFetcher] = []
169+
if test_only_key_fetchers is None:
170+
# Always fetch keys from the database.
171+
store_key_fetcher = StoreKeyFetcher(hs)
172+
exit.callback(store_key_fetcher.shutdown)
173+
self._key_fetchers.append(store_key_fetcher)
174+
175+
# Fetch keys from configured trusted key servers, if any exist.
176+
key_servers = hs.config.key.key_servers
177+
if key_servers:
178+
perspectives_key_fetcher = PerspectivesKeyFetcher(hs)
179+
exit.callback(perspectives_key_fetcher.shutdown)
180+
self._key_fetchers.append(perspectives_key_fetcher)
181+
182+
# Finally, fetch keys from the origin server directly.
183+
server_key_fetcher = ServerKeyFetcher(hs)
184+
exit.callback(server_key_fetcher.shutdown)
185+
self._key_fetchers.append(server_key_fetcher)
186+
else:
187+
self._key_fetchers = test_only_key_fetchers
188+
189+
self._fetch_keys_queue: BatchingQueue[
190+
_FetchKeyRequest, dict[str, dict[str, FetchKeyResult]]
191+
] = BatchingQueue(
192+
name="keyring_server",
193+
hs=hs,
194+
clock=hs.get_clock(),
195+
# The method called to fetch each key
196+
process_batch_callback=self._inner_fetch_key_requests,
197+
)
198+
exit.callback(self._fetch_keys_queue.shutdown)
156199

157-
if key_fetchers is None:
158-
# Always fetch keys from the database.
159-
mutable_key_fetchers: list[KeyFetcher] = [StoreKeyFetcher(hs)]
160-
# Fetch keys from configured trusted key servers, if any exist.
161-
key_servers = hs.config.key.key_servers
162-
if key_servers:
163-
mutable_key_fetchers.append(PerspectivesKeyFetcher(hs))
164-
# Finally, fetch keys from the origin server directly.
165-
mutable_key_fetchers.append(ServerKeyFetcher(hs))
166-
167-
self._key_fetchers: Iterable[KeyFetcher] = tuple(mutable_key_fetchers)
168-
else:
169-
self._key_fetchers = key_fetchers
170-
171-
self._fetch_keys_queue: BatchingQueue[
172-
_FetchKeyRequest, dict[str, dict[str, FetchKeyResult]]
173-
] = BatchingQueue(
174-
name="keyring_server",
175-
hs=hs,
176-
clock=hs.get_clock(),
177-
# The method called to fetch each key
178-
process_batch_callback=self._inner_fetch_key_requests,
179-
)
200+
self._is_mine_server_name = hs.is_mine_server_name
180201

181-
self._is_mine_server_name = hs.is_mine_server_name
202+
# build a FetchKeyResult for each of our own keys, to shortcircuit the
203+
# fetcher.
204+
self._local_verify_keys: dict[str, FetchKeyResult] = {}
205+
for key_id, key in hs.config.key.old_signing_keys.items():
206+
self._local_verify_keys[key_id] = FetchKeyResult(
207+
verify_key=key, valid_until_ts=key.expired
208+
)
182209

183-
# build a FetchKeyResult for each of our own keys, to shortcircuit the
184-
# fetcher.
185-
self._local_verify_keys: dict[str, FetchKeyResult] = {}
186-
for key_id, key in hs.config.key.old_signing_keys.items():
187-
self._local_verify_keys[key_id] = FetchKeyResult(
188-
verify_key=key, valid_until_ts=key.expired
210+
vk = get_verify_key(hs.signing_key)
211+
self._local_verify_keys[f"{vk.alg}:{vk.version}"] = FetchKeyResult(
212+
verify_key=vk,
213+
valid_until_ts=2**63, # fake future timestamp
189214
)
190215

191-
vk = get_verify_key(hs.signing_key)
192-
self._local_verify_keys[f"{vk.alg}:{vk.version}"] = FetchKeyResult(
193-
verify_key=vk,
194-
valid_until_ts=2**63, # fake future timestamp
195-
)
216+
# We reached the end of the block which means everything was successful, so
217+
# no exit handlers are needed (remove them all).
218+
exit.pop_all()
196219

197220
def shutdown(self) -> None:
198221
"""
199222
Prepares the KeyRing for garbage collection by shutting down it's queues.
200223
"""
201224
self._fetch_keys_queue.shutdown()
225+
202226
for key_fetcher in self._key_fetchers:
203227
key_fetcher.shutdown()
228+
self._key_fetchers.clear()
204229

205230
async def verify_json_for_server(
206231
self,
@@ -521,9 +546,21 @@ class StoreKeyFetcher(KeyFetcher):
521546
"""KeyFetcher impl which fetches keys from our data store"""
522547

523548
def __init__(self, hs: "HomeServer"):
524-
super().__init__(hs)
525-
526-
self.store = hs.get_datastores().main
549+
# Clean-up to avoid partial initialization leaving behind references.
550+
with ExitStack() as exit:
551+
super().__init__(hs)
552+
# `KeyFetcher` keeps a reference to `hs` which we need to clean up if
553+
# something goes wrong so we can cleanly shutdown the homeserver.
554+
exit.callback(super().shutdown)
555+
556+
# An error can be raised here if someone tried to create a `StoreKeyFetcher`
557+
# before the homeserver is fully set up (`HomeServerNotSetupException:
558+
# HomeServer.setup must be called before getting datastores`).
559+
self.store = hs.get_datastores().main
560+
561+
# We reached the end of the block which means everything was successful, so
562+
# no exit handlers are needed (remove them all).
563+
exit.pop_all()
527564

528565
async def _fetch_keys(
529566
self, keys_to_fetch: list[_FetchKeyRequest]
@@ -543,9 +580,21 @@ async def _fetch_keys(
543580

544581
class BaseV2KeyFetcher(KeyFetcher):
545582
def __init__(self, hs: "HomeServer"):
546-
super().__init__(hs)
547-
548-
self.store = hs.get_datastores().main
583+
# Clean-up to avoid partial initialization leaving behind references.
584+
with ExitStack() as exit:
585+
super().__init__(hs)
586+
# `KeyFetcher` keeps a reference to `hs` which we need to clean up if
587+
# something goes wrong so we can cleanly shutdown the homeserver.
588+
exit.callback(super().shutdown)
589+
590+
# An error can be raised here if someone tried to create a `StoreKeyFetcher`
591+
# before the homeserver is fully set up (`HomeServerNotSetupException:
592+
# HomeServer.setup must be called before getting datastores`).
593+
self.store = hs.get_datastores().main
594+
595+
# We reached the end of the block which means everything was successful, so
596+
# no exit handlers are needed (remove them all).
597+
exit.pop_all()
549598

550599
async def process_v2_response(
551600
self, from_server: str, response_json: JsonDict, time_added_ms: int

synapse/server.py

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
from synapse.api.auth.internal import InternalAuth
5555
from synapse.api.auth.mas import MasDelegatedAuth
5656
from synapse.api.auth_blocking import AuthBlocking
57+
from synapse.api.errors import HomeServerNotSetupException
5758
from synapse.api.filtering import Filtering
5859
from synapse.api.ratelimiting import Ratelimiter, RequestRatelimiter
5960
from synapse.app._base import unregister_sighups
@@ -399,7 +400,7 @@ def run_as_background_process(
399400
"""
400401
if self._is_shutdown:
401402
raise Exception(
402-
f"Cannot start background process. HomeServer has been shutdown {len(self._background_processes)} {len(self.get_clock()._looping_calls)} {len(self.get_clock()._call_id_to_delayed_call)}"
403+
"Cannot start background process. HomeServer has been shutdown"
403404
)
404405

405406
# Ignore linter error as this is the one location this should be called.
@@ -466,7 +467,17 @@ async def shutdown(self) -> None:
466467

467468
# TODO: Cleanup replication pieces
468469

469-
self.get_keyring().shutdown()
470+
keyring: Keyring | None = None
471+
try:
472+
keyring = self.get_keyring()
473+
except HomeServerNotSetupException:
474+
# If the homeserver wasn't fully setup, keyring won't have existed before
475+
# this and will fail to be initialized but it cleans itself up for any
476+
# partial initialization problem.
477+
pass
478+
479+
if keyring:
480+
keyring.shutdown()
470481

471482
# Cleanup metrics associated with the homeserver
472483
for later_gauge in all_later_gauges_to_clean_up_on_shutdown.values():
@@ -478,8 +489,12 @@ async def shutdown(self) -> None:
478489
self.config.server.server_name
479490
)
480491

481-
for db in self.get_datastores().databases:
482-
db.stop_background_updates()
492+
try:
493+
for db in self.get_datastores().databases:
494+
db.stop_background_updates()
495+
except HomeServerNotSetupException:
496+
# If the homeserver wasn't fully setup, the datastores won't exist
497+
pass
483498

484499
if self.should_send_federation():
485500
try:
@@ -513,8 +528,12 @@ async def shutdown(self) -> None:
513528
pass
514529
self._background_processes.clear()
515530

516-
for db in self.get_datastores().databases:
517-
db._db_pool.close()
531+
try:
532+
for db in self.get_datastores().databases:
533+
db._db_pool.close()
534+
except HomeServerNotSetupException:
535+
# If the homeserver wasn't fully setup, the datastores won't exist
536+
pass
518537

519538
def register_async_shutdown_handler(
520539
self,
@@ -677,7 +696,9 @@ def get_clock(self) -> Clock:
677696

678697
def get_datastores(self) -> Databases:
679698
if not self.datastores:
680-
raise Exception("HomeServer.setup must be called before getting datastores")
699+
raise HomeServerNotSetupException(
700+
"HomeServer.setup must be called before getting datastores"
701+
)
681702

682703
return self.datastores
683704

0 commit comments

Comments
 (0)