-
Notifications
You must be signed in to change notification settings - Fork 402
Move start_doing_background_updates()
to HomeServer.start_background_tasks()
#19057
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from 16 commits
4cf90e1
78fd66c
d1ed34a
01343d6
2fcd70d
74c5e64
c1d89d8
064b30b
5e87f89
edf9ef0
e14e5d3
19b181b
b4d1789
6a11d64
4f4bf80
53fd1b2
9e99547
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
@@ -1 +1 @@ | ||||||||||||||||||||||||||||||||||||||||||||||||||
Move `start_doing_background_updates()` to `SynapseHomeServer.start_background_tasks()`. | ||||||||||||||||||||||||||||||||||||||||||||||||||
Move `start_doing_background_updates()` to `HomeServer.start_background_tasks()`. | ||||||||||||||||||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
class PresenceHandlerInitTestCase(unittest.HomeserverTestCase): | |
def default_config(self) -> JsonDict: | |
config = super().default_config() | |
# Disable background tasks on this worker so that the PresenceHandler isn't | |
# loaded until we request it. | |
config["run_background_tasks_on"] = "other" | |
return config |
Example failures:
SYNAPSE_TEST_LOG_LEVEL=INFO poetry run trial tests.handlers.test_presence.PresenceHandlerInitTestCase
The "poetry.dev-dependencies" section is deprecated and will be removed in a future version. Use "poetry.group.dev.dependencies" instead.
tests.handlers.test_presence
PresenceHandlerInitTestCase
test_restored_presence_idles ... [ERROR]
test_restored_presence_online_after_sync_0_org_matrix_msc3026_busy ... [ERROR]
test_restored_presence_online_after_sync_1_online ... [ERROR]
test_restored_presence_online_after_sync_2_unavailable ... [ERROR]
test_restored_presence_online_after_sync_3_offline ... [ERROR]
===============================================================================
[ERROR]
Traceback (most recent call last):
File "/home/eric/Documents/github/element/synapse/tests/unittest.py", line 138, in new
return code(orig, *args, **kwargs)
File "/home/eric/Documents/github/element/synapse/tests/unittest.py", line 232, in setUp
return orig()
File "/home/eric/Documents/github/element/synapse/tests/unittest.py", line 404, in setUp
self.hs = self.make_homeserver(self.reactor, self.clock)
File "/home/eric/Documents/github/element/synapse/tests/unittest.py", line 525, in make_homeserver
hs = self.setup_test_homeserver(reactor=reactor, clock=clock)
File "/home/eric/Documents/github/element/synapse/tests/unittest.py", line 733, in setup_test_homeserver
raise AssertionError(
builtins.AssertionError: Timed out waiting for background updates to complete (5s). Did you forget to `start_doing_background_updates()`?
tests.handlers.test_presence.PresenceHandlerInitTestCase.test_restored_presence_idles
tests.handlers.test_presence.PresenceHandlerInitTestCase.test_restored_presence_online_after_sync_0_org_matrix_msc3026_busy
tests.handlers.test_presence.PresenceHandlerInitTestCase.test_restored_presence_online_after_sync_1_online
tests.handlers.test_presence.PresenceHandlerInitTestCase.test_restored_presence_online_after_sync_2_unavailable
tests.handlers.test_presence.PresenceHandlerInitTestCase.test_restored_presence_online_after_sync_3_offline
-------------------------------------------------------------------------------
Ran 5 tests in 25.208s
FAILED (errors=5)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Database isn't setup for tests that use GenericWorkerServer
❌
We have a few tests that use homeserver_to_use=GenericWorkerServer
and are failing with builtins.AssertionError: Timed out waiting for background updates to complete (5s). Did you forget to 'start_doing_background_updates()'?
in CI (from our new assertion).
Error:
Traceback (most recent call last):
File "/home/runner/work/synapse/synapse/tests/unittest.py", line 138, in new
return code(orig, *args, **kwargs)
File "/home/runner/work/synapse/synapse/tests/unittest.py", line 232, in setUp
return orig()
File "/home/runner/work/synapse/synapse/tests/unittest.py", line 404, in setUp
self.hs = self.make_homeserver(self.reactor, self.clock)
File "/home/runner/work/synapse/synapse/tests/app/test_openid_listener.py", line 40, in make_homeserver
hs = self.setup_test_homeserver(homeserver_to_use=GenericWorkerServer)
File "/home/runner/work/synapse/synapse/tests/unittest.py", line 733, in setup_test_homeserver
raise AssertionError(
builtins.AssertionError: Timed out waiting for background updates to complete (5s). Did you forget to `start_doing_background_updates()`?
tests.app.test_openid_listener.FederationReaderOpenIDListenerTests.test_openid_listener_0
tests.app.test_openid_listener.FederationReaderOpenIDListenerTests.test_openid_listener_1
tests.app.test_openid_listener.FederationReaderOpenIDListenerTests.test_openid_listener_2
tests.app.test_openid_listener.FederationReaderOpenIDListenerTests.test_openid_listener_3
...
tests.handlers.test_presence.PresenceHandlerInitTestCase.test_restored_presence_idles
tests.handlers.test_presence.PresenceHandlerInitTestCase.test_restored_presence_online_after_sync_0_org_matrix_msc3026_busy
tests.handlers.test_presence.PresenceHandlerInitTestCase.test_restored_presence_online_after_sync_1_online
tests.handlers.test_presence.PresenceHandlerInitTestCase.test_restored_presence_online_after_sync_2_unavailable
tests.handlers.test_presence.PresenceHandlerInitTestCase.test_restored_presence_online_after_sync_3_offline
...
tests.replication.test_federation_ack.FederationAckTestCase.test_federation_ack_sent
...
tests.storage.test_rollback_worker.WorkerSchemaTests.test_not_upgraded_current_schema_version_with_outstanding_deltas
tests.storage.test_rollback_worker.WorkerSchemaTests.test_not_upgraded_old_schema_version
tests.storage.test_rollback_worker.WorkerSchemaTests.test_rolling_back
Previously, we didn't wait for background updates in these cases and only ran the database background updates for the TestHomeServer
(the default in tests). Basically turning a blind eye to this kind of problem.
Lines 680 to 696 in ec7554b
async def run_bg_updates() -> None: | |
with LoggingContext(name="run_bg_updates", server_name=server_name): | |
self.get_success(stor.db_pool.updates.run_background_updates(False)) | |
hs = setup_test_homeserver( | |
cleanup_func=self.addCleanup, | |
server_name=server_name, | |
config=config_obj, | |
reactor=reactor, | |
clock=clock, | |
**extra_homeserver_attributes, | |
) | |
stor = hs.get_datastores().main | |
# Run the database background updates, when running against "master". | |
if hs.__class__.__name__ == "TestHomeServer": | |
self.get_success(run_bg_updates()) |
Unable to run test by itself
Compounding things is that we have to deal with #15671 (previously matrix-org/synapse#15671) which prevents us from running these tests on their own so it's even harder to debug what's going on.
synapse.storage.prepare_database.UpgradeDatabaseException: Uninitialised database: run the main synapse process to prepare the database schema before starting worker processes.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Move `start_doing_background_updates()` to `HomeServer.start_background_tasks()`. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -646,6 +646,7 @@ def start_background_tasks(self) -> None: | |
self.get_task_scheduler() | ||
self.get_common_usage_metrics_manager().setup() | ||
start_phone_stats_home(self) | ||
self.get_datastores().main.db_pool.updates.start_doing_background_updates() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To call this out again; before #19036 (comment), With this change, it runs on whatever instance is configured to Based on the previous discussion, we thought this was fine. I just didn't have this in the correct spot to match that description.
MadLittleMods marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
def get_reactor(self) -> ISynapseReactor: | ||
""" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,7 +27,7 @@ | |
|
||
from parameterized import parameterized | ||
|
||
from twisted.internet.task import deferLater | ||
from twisted.internet.testing import MemoryReactor | ||
|
||
import synapse.rest.admin | ||
|
@@ -861,7 +861,7 @@ | |
# Mock PaginationHandler.purge_room to sleep for 100s, so we have time to do a second call | ||
# before the purge is over. Note that it doesn't purge anymore, but we don't care. | ||
async def purge_room(room_id: str, force: bool) -> None: | ||
await deferLater(self.hs.get_reactor(), 100, lambda: None) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. All of these They are changed here because we started seeing these failures in CI with this PR for some reason: Example failure: https://github.com/element-hq/synapse/actions/runs/18477454390/job/52645183606?pr=19057
|
||
await self.hs.get_clock().sleep(100) | ||
|
||
self.pagination_handler.purge_room = AsyncMock(side_effect=purge_room) # type: ignore[method-assign] | ||
|
||
|
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -73,12 +73,12 @@ | |||||||||||||||||||||||||
from synapse.http.site import SynapseRequest, SynapseSite | ||||||||||||||||||||||||||
from synapse.logging.context import ( | ||||||||||||||||||||||||||
SENTINEL_CONTEXT, | ||||||||||||||||||||||||||
LoggingContext, | ||||||||||||||||||||||||||
current_context, | ||||||||||||||||||||||||||
set_current_context, | ||||||||||||||||||||||||||
) | ||||||||||||||||||||||||||
from synapse.rest import RegisterServletsFunc | ||||||||||||||||||||||||||
from synapse.server import HomeServer | ||||||||||||||||||||||||||
from synapse.storage.background_updates import UpdaterStatus | ||||||||||||||||||||||||||
from synapse.storage.keys import FetchKeyResult | ||||||||||||||||||||||||||
from synapse.types import ISynapseReactor, JsonDict, Requester, UserID, create_requester | ||||||||||||||||||||||||||
from synapse.util.clock import Clock | ||||||||||||||||||||||||||
|
@@ -108,6 +108,11 @@ | |||||||||||||||||||||||||
R = TypeVar("R") | ||||||||||||||||||||||||||
S = TypeVar("S") | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
BACKGROUND_UPDATE_TIMEOUT_SECONDS = 5 | ||||||||||||||||||||||||||
""" | ||||||||||||||||||||||||||
We expect this to be pretty immediate as we're working with an empty database. | ||||||||||||||||||||||||||
""" | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
class _TypedFailure(Generic[_ExcType], Protocol): | ||||||||||||||||||||||||||
"""Extension to twisted.Failure, where the 'value' has a certain type.""" | ||||||||||||||||||||||||||
|
@@ -490,7 +495,11 @@ def wait_on_thread(self, deferred: Deferred, timeout: int = 10) -> None: | |||||||||||||||||||||||||
|
||||||||||||||||||||||||||
def wait_for_background_updates(self) -> None: | ||||||||||||||||||||||||||
"""Block until all background database updates have completed.""" | ||||||||||||||||||||||||||
store = self.hs.get_datastores().main | ||||||||||||||||||||||||||
self._wait_for_background_updates(self.hs) | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
def _wait_for_background_updates(self, hs: HomeServer) -> None: | ||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We no longer use this variant but I think it's useful to reference in the comment down below. |
||||||||||||||||||||||||||
"""Block until all background database updates have completed.""" | ||||||||||||||||||||||||||
store = hs.get_datastores().main | ||||||||||||||||||||||||||
while not self.get_success( | ||||||||||||||||||||||||||
store.db_pool.updates.has_completed_background_updates() | ||||||||||||||||||||||||||
): | ||||||||||||||||||||||||||
|
@@ -677,10 +686,6 @@ def setup_test_homeserver( | |||||||||||||||||||||||||
# construct a homeserver with a matching name. | ||||||||||||||||||||||||||
server_name = config_obj.server.server_name | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
async def run_bg_updates() -> None: | ||||||||||||||||||||||||||
with LoggingContext(name="run_bg_updates", server_name=server_name): | ||||||||||||||||||||||||||
self.get_success(stor.db_pool.updates.run_background_updates(False)) | ||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. One difference to note here is that we're running with synapse/synapse/storage/background_updates.py Lines 526 to 527 in ec7554b
Versus now where we run this via synapse/synapse/storage/background_updates.py Lines 400 to 409 in ec7554b
|
||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
hs = setup_test_homeserver( | ||||||||||||||||||||||||||
cleanup_func=self.addCleanup, | ||||||||||||||||||||||||||
server_name=server_name, | ||||||||||||||||||||||||||
|
@@ -689,11 +694,48 @@ async def run_bg_updates() -> None: | |||||||||||||||||||||||||
clock=clock, | ||||||||||||||||||||||||||
**extra_homeserver_attributes, | ||||||||||||||||||||||||||
) | ||||||||||||||||||||||||||
stor = hs.get_datastores().main | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
# Run the database background updates, when running against "master". | ||||||||||||||||||||||||||
if hs.__class__.__name__ == "TestHomeServer": | ||||||||||||||||||||||||||
self.get_success(run_bg_updates()) | ||||||||||||||||||||||||||
# Wait for the database background updates to complete. This is important | ||||||||||||||||||||||||||
# because tests assume that the database is using the latest schema. | ||||||||||||||||||||||||||
# | ||||||||||||||||||||||||||
# We could use `self._wait_for_background_updates(hs)` to accomplish the same | ||||||||||||||||||||||||||
# thing but we don't want to start or drive the background updates here. We want | ||||||||||||||||||||||||||
# to ensure the homeserver itself is doing that. | ||||||||||||||||||||||||||
start_time_s = time.time() | ||||||||||||||||||||||||||
store = hs.get_datastores().main | ||||||||||||||||||||||||||
while not self.get_success( | ||||||||||||||||||||||||||
# This check is slightly naive. It only checks if there is anything left in | ||||||||||||||||||||||||||
# the `background_updates` database table so it is possible that the | ||||||||||||||||||||||||||
# homeserver mistakenly never registered any background updates to be run. | ||||||||||||||||||||||||||
# Since `register_background_xxx(...)` is done across the codebase, we can't | ||||||||||||||||||||||||||
# really assert that everything was registered as expected. | ||||||||||||||||||||||||||
store.db_pool.updates.has_completed_background_updates() | ||||||||||||||||||||||||||
): | ||||||||||||||||||||||||||
# Timeout if it takes too long. This should be pretty immediate as we're | ||||||||||||||||||||||||||
# working with an empty database. | ||||||||||||||||||||||||||
current_time_s = time.time() | ||||||||||||||||||||||||||
if current_time_s - start_time_s > BACKGROUND_UPDATE_TIMEOUT_SECONDS: | ||||||||||||||||||||||||||
background_update_status = store.db_pool.updates.get_status() | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
# Add some better context when we give up | ||||||||||||||||||||||||||
extra_message = "" | ||||||||||||||||||||||||||
if background_update_status == UpdaterStatus.NOT_STARTED: | ||||||||||||||||||||||||||
extra_message = ( | ||||||||||||||||||||||||||
"Did you forget to `start_doing_background_updates()`?" | ||||||||||||||||||||||||||
) | ||||||||||||||||||||||||||
elif background_update_status == UpdaterStatus.RUNNING_UPDATE: | ||||||||||||||||||||||||||
extra_message = "Background updates were still running when we gave up. Are they stuck?" | ||||||||||||||||||||||||||
else: | ||||||||||||||||||||||||||
extra_message = ( | ||||||||||||||||||||||||||
f"Background update status was {background_update_status}." | ||||||||||||||||||||||||||
) | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
raise AssertionError( | ||||||||||||||||||||||||||
f"Timed out waiting for background updates to complete ({BACKGROUND_UPDATE_TIMEOUT_SECONDS}s). " | ||||||||||||||||||||||||||
+ extra_message | ||||||||||||||||||||||||||
) | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
self.pump(by=0.1) | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
return hs | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated the changelog from #19036 to merge with this one.