From 0de3f917efb7267ccba690465ab3c56e523e6d55 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Tue, 2 Dec 2025 13:50:59 -0700 Subject: [PATCH 01/11] Add an Admin API endpoint for listing quarantined media --- docs/admin_api/media_admin_api.md | 27 ++++++ synapse/rest/admin/media.py | 28 ++++++ .../databases/main/media_repository.py | 2 + synapse/storage/databases/main/room.py | 65 +++++++++++-- .../93/04_add_quarantined_ts_to_media.sql | 27 ++++++ tests/rest/admin/test_media.py | 95 +++++++++++++++++++ 6 files changed, 235 insertions(+), 9 deletions(-) create mode 100644 synapse/storage/schema/main/delta/93/04_add_quarantined_ts_to_media.sql diff --git a/docs/admin_api/media_admin_api.md b/docs/admin_api/media_admin_api.md index be72b2e3e22..3ab4dce7d19 100644 --- a/docs/admin_api/media_admin_api.md +++ b/docs/admin_api/media_admin_api.md @@ -73,6 +73,33 @@ Response: } ``` +## Listing all quarantined media + +This API returns a list of all quarantined media on the server. It is paginated, and can be scoped to either local or +remote media. Note that the pagination values are also scoped to whether the media is local or remote. For example, +providing values from a local result set to a request for remote media will return unexpected results. + +Request: +```http +GET /_synapse/admin/v1/media/quarantined?from=0&limit=100&kind=local +``` + +`from` and `limit` are optional parameters, and default to `0` and `100` respectively. They are the row index and number +of rows to return - they are not timestamps. + +`kind` *MUST* either be `local` or `remote`. + +The API returns a JSON body containing MXC URIs for the quarantined media, like the following: + +```json +{ + "media": [ + "mxc://localhost/xwvutsrqponmlkjihgfedcba", + "mxc://localhost/abcdefghijklmnopqrstuvwx" + ] +} +``` + # Quarantine media Quarantining media means that it is marked as inaccessible by users. It applies diff --git a/synapse/rest/admin/media.py b/synapse/rest/admin/media.py index d5346fe0d5c..e4a6b055e13 100644 --- a/synapse/rest/admin/media.py +++ b/synapse/rest/admin/media.py @@ -293,6 +293,34 @@ async def on_GET( return HTTPStatus.OK, {"local": local_mxcs, "remote": remote_mxcs} +class ListQuarantinedMedia(RestServlet): + """Lists all quarantined media on the server.""" + + PATTERNS = admin_patterns("/media/quarantined$") + + def __init__(self, hs: "HomeServer"): + self.store = hs.get_datastores().main + self.auth = hs.get_auth() + + async def on_GET( + self, request: SynapseRequest, + ) -> tuple[int, JsonDict]: + await assert_requester_is_admin(self.auth, request) + + start = parse_integer(request, "from", default=0) + limit = parse_integer(request, "limit", default=100) + local_or_remote = parse_string(request, "kind", required=True) + + if local_or_remote not in ["local", "remote"]: + raise SynapseError( + HTTPStatus.BAD_REQUEST, "Query parameter kind must be either 'local' or 'remote'." + ) + + mxcs = await self.store.get_quarantined_media_mxcs(start, limit, local_or_remote == "local") + + return HTTPStatus.OK, {"media": mxcs} + + class PurgeMediaCacheRestServlet(RestServlet): PATTERNS = admin_patterns("/purge_media_cache$") diff --git a/synapse/storage/databases/main/media_repository.py b/synapse/storage/databases/main/media_repository.py index 50664d63e5f..ca637cd7c51 100644 --- a/synapse/storage/databases/main/media_repository.py +++ b/synapse/storage/databases/main/media_repository.py @@ -61,6 +61,7 @@ class LocalMedia: url_cache: str | None last_access_ts: int quarantined_by: str | None + quarantined_ts: int | None safe_from_quarantine: bool user_id: str | None authenticated: bool | None @@ -78,6 +79,7 @@ class RemoteMedia: created_ts: int last_access_ts: int quarantined_by: str | None + quarantined_ts: int | None authenticated: bool | None sha256: str | None diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 633df077367..22759dc529b 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -21,6 +21,7 @@ # import logging +from datetime import time from enum import Enum from typing import ( TYPE_CHECKING, @@ -945,6 +946,46 @@ def get_retention_policy_for_room_txn( max_lifetime=max_lifetime, ) + async def get_quarantined_media_mxcs(self, index_start: int, index_limit: int, local: bool) -> list[str]: + """Retrieves all the quarantined media MXC URIs starting from the given position, + ordered by quarantined timestamp. + + Note that on established servers the "quarantined timestamp" may be zero due to + being introduced after the quarantine state was introduced. + + Args: + index_start: The position to start from. + index_limit: The maximum number of results to return. + local: When true, only local media will be returned. When false, only remote media will be returned. + + Returns: + The quarantined media as a list of media IDs. + """ + + def _get_quarantined_media_mxcs_txn( + txn: LoggingTransaction, + ) -> list[str]: + # We order by quarantined timestamp *and* media ID (including origin, when + # known) to ensure there's stable ordering for established servers. + if local: + sql = "SELECT '' as media_origin, media_id FROM local_media_repository ORDER BY quarantined_ts, media_id ASC LIMIT ? OFFSET ?" + else: + sql = "SELECT media_origin, media_id FROM remote_media_cache ORDER BY quarantined_ts, media_origin, media_id ASC LIMIT ? OFFSET ?" + txn.execute(sql, (index_limit, index_start)) + + mxcs = [] + + for media_origin, media_id in txn: + if local: + media_origin = self.hs.hostname + mxcs.append(f"mxc://{media_origin}/{media_id}") + + return mxcs + + return await self.db_pool.runInteraction( + "get_quarantined_media_mxcs", _get_quarantined_media_mxcs_txn, + ) + async def get_media_mxcs_in_room(self, room_id: str) -> tuple[list[str], list[str]]: """Retrieves all the local and remote media MXC URIs in a given room @@ -952,7 +993,7 @@ async def get_media_mxcs_in_room(self, room_id: str) -> tuple[list[str], list[st room_id Returns: - The local and remote media as a lists of the media IDs. + The local and remote media as lists of the media IDs. """ def _get_media_mxcs_in_room_txn( @@ -974,6 +1015,7 @@ def _get_media_mxcs_in_room_txn( "get_media_ids_in_room", _get_media_mxcs_in_room_txn ) + async def quarantine_media_ids_in_room( self, room_id: str, quarantined_by: str ) -> int: @@ -1147,6 +1189,10 @@ def _quarantine_local_media_txn( The total number of media items quarantined """ total_media_quarantined = 0 + now_ts = self.clock.time_msec() + + if quarantined_by is None: + now_ts = None # Effectively a legacy path, update any media that was explicitly named. if media_ids: @@ -1155,13 +1201,13 @@ def _quarantine_local_media_txn( ) sql = f""" UPDATE local_media_repository - SET quarantined_by = ? + SET quarantined_by = ?, quarantined_ts = ? WHERE {sql_many_clause_sql}""" if quarantined_by is not None: sql += " AND safe_from_quarantine = FALSE" - txn.execute(sql, [quarantined_by] + sql_many_clause_args) + txn.execute(sql, [quarantined_by, now_ts] + sql_many_clause_args) # Note that a rowcount of -1 can be used to indicate no rows were affected. total_media_quarantined += txn.rowcount if txn.rowcount > 0 else 0 @@ -1172,13 +1218,13 @@ def _quarantine_local_media_txn( ) sql = f""" UPDATE local_media_repository - SET quarantined_by = ? + SET quarantined_by = ?, quarantined_ts = ? WHERE {sql_many_clause_sql}""" if quarantined_by is not None: sql += " AND safe_from_quarantine = FALSE" - txn.execute(sql, [quarantined_by] + sql_many_clause_args) + txn.execute(sql, [quarantined_by, now_ts] + sql_many_clause_args) total_media_quarantined += txn.rowcount if txn.rowcount > 0 else 0 return total_media_quarantined @@ -1202,6 +1248,7 @@ def _quarantine_remote_media_txn( The total number of media items quarantined """ total_media_quarantined = 0 + now_ts = int(time.time()) if media: sql_in_list_clause, sql_args = make_tuple_in_list_sql_clause( @@ -1211,10 +1258,10 @@ def _quarantine_remote_media_txn( ) sql = f""" UPDATE remote_media_cache - SET quarantined_by = ? + SET quarantined_by = ?, quarantined_ts = ? WHERE {sql_in_list_clause}""" - txn.execute(sql, [quarantined_by] + sql_args) + txn.execute(sql, [quarantined_by, now_ts] + sql_args) total_media_quarantined += txn.rowcount if txn.rowcount > 0 else 0 total_media_quarantined = 0 @@ -1224,9 +1271,9 @@ def _quarantine_remote_media_txn( ) sql = f""" UPDATE remote_media_cache - SET quarantined_by = ? + SET quarantined_by = ?, quarantined_ts = ? WHERE {sql_many_clause_sql}""" - txn.execute(sql, [quarantined_by] + sql_many_clause_args) + txn.execute(sql, [quarantined_by, now_ts] + sql_many_clause_args) total_media_quarantined += txn.rowcount if txn.rowcount > 0 else 0 return total_media_quarantined diff --git a/synapse/storage/schema/main/delta/93/04_add_quarantined_ts_to_media.sql b/synapse/storage/schema/main/delta/93/04_add_quarantined_ts_to_media.sql new file mode 100644 index 00000000000..18b76804ff6 --- /dev/null +++ b/synapse/storage/schema/main/delta/93/04_add_quarantined_ts_to_media.sql @@ -0,0 +1,27 @@ +-- +-- This file is licensed under the Affero General Public License (AGPL) version 3. +-- +-- Copyright (C) 2025 Element Creations, Ltd +-- +-- This program is free software: you can redistribute it and/or modify +-- it under the terms of the GNU Affero General Public License as +-- published by the Free Software Foundation, either version 3 of the +-- License, or (at your option) any later version. +-- +-- See the GNU Affero General Public License for more details: +-- . + +-- Add a timestamp for when the sliding sync connection position was last used, +-- only updated with a small granularity. +-- +-- This should be NOT NULL, but we need to consider existing rows. In future we +-- may want to either backfill this or delete all rows with a NULL value (and +-- then make it NOT NULL). +ALTER TABLE local_media_repository ADD COLUMN quarantined_ts BIGINT; +ALTER TABLE remote_media_cache ADD COLUMN quarantined_ts BIGINT; + +UPDATE local_media_repository SET quarantined_ts = 0 WHERE quarantined_by IS NOT NULL; +UPDATE remote_media_cache SET quarantined_ts = 0 WHERE quarantined_by IS NOT NULL; + +-- Note: We *probably* should have an index on quarantined_ts, but we're going +-- to try to defer that to a future migration after seeing the performance impact. diff --git a/tests/rest/admin/test_media.py b/tests/rest/admin/test_media.py index 8cc54cc80c2..a18c43b8b03 100644 --- a/tests/rest/admin/test_media.py +++ b/tests/rest/admin/test_media.py @@ -756,6 +756,101 @@ def _access_media( self.assertFalse(os.path.exists(local_path)) +class ListQuarantinedMediaTestCase(_AdminMediaTests): + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + self.store = hs.get_datastores().main + self.server_name = hs.hostname + + @parameterized.expand(["local", "remote"]) + def test_no_auth(self, kind: str) -> None: + """ + Try to list quarantined media without authentication. + """ + + channel = self.make_request( + "GET", + "/_synapse/admin/v1/media/quarantined?kind=%s" % (kind,), + ) + + self.assertEqual(401, channel.code, msg=channel.json_body) + self.assertEqual(Codes.MISSING_TOKEN, channel.json_body["errcode"]) + + @parameterized.expand(["local", "remote"]) + def test_requester_is_not_admin(self, kind: str) -> None: + """ + If the user is not a server admin, an error is returned. + """ + self.other_user = self.register_user("user", "pass") + self.other_user_token = self.login("user", "pass") + + channel = self.make_request( + "GET", + "/_synapse/admin/v1/media/quarantined?kind=%s" % (kind,), + access_token=self.other_user_token, + ) + + self.assertEqual(403, channel.code, msg=channel.json_body) + self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"]) + + def test_list_quarantined_media(self) -> None: + """ + Ensure we actually get results for each page. We can't really test that + remote media is quarantined, but we can test that local media is. + """ + self.admin_user = self.register_user("admin", "pass", admin=True) + self.admin_user_tok = self.login("admin", "pass") + self.media_id_1 = self.helper.upload_media(SMALL_PNG, tok=self.admin_user_tok, expect_code=200) + self.media_id_2 = self.helper.upload_media(SMALL_PNG, tok=self.admin_user_tok, expect_code=200) + self.media_id_3 = self.helper.upload_media(SMALL_PNG, tok=self.admin_user_tok, expect_code=200) + + def _quarantine(media_id: str) -> None: + channel = self.make_request( + "POST", + "/_synapse/admin/v1/media/quarantine/%s/%s" % (self.server_name,media_id,), + access_token=self.admin_user_tok, + ) + self.assertEqual(200, channel.code, msg=channel.json_body) + + _quarantine(self.media_id_1) + _quarantine(self.media_id_2) + _quarantine(self.media_id_3) + + # Page 1 + channel = self.make_request( + "GET", + "/_synapse/admin/v1/media/quarantined?kind=local&from=0&limit=1", + ) + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(1, len(channel.json_body["media"])) + self.assertEqual("mxc://%s/%s" % (self.server_name, self.media_id_1,), channel.json_body["media"][0]) + + # Page 2 + channel = self.make_request( + "GET", + "/_synapse/admin/v1/media/quarantined?kind=local&from=1&limit=1", + ) + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(1, len(channel.json_body["media"])) + self.assertEqual("mxc://%s/%s" % (self.server_name, self.media_id_2,), channel.json_body["media"][0]) + + # Page 3 + channel = self.make_request( + "GET", + "/_synapse/admin/v1/media/quarantined?kind=local&from=2&limit=1", + ) + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(1, len(channel.json_body["media"])) + self.assertEqual("mxc://%s/%s" % (self.server_name, self.media_id_3,), channel.json_body["media"][0]) + + # Page 4 (no media) + channel = self.make_request( + "GET", + "/_synapse/admin/v1/media/quarantined?kind=local&from=3&limit=1", + ) + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(0, len(channel.json_body["media"])) + + class QuarantineMediaByIDTestCase(_AdminMediaTests): def upload_media_and_return_media_id(self, data: bytes) -> str: # Upload some media into the room From 2729ace1126a65e6de24091f6738396d828206c6 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Tue, 2 Dec 2025 13:53:06 -0700 Subject: [PATCH 02/11] changelog --- changelog.d/19268.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/19268.feature diff --git a/changelog.d/19268.feature b/changelog.d/19268.feature new file mode 100644 index 00000000000..cb7035fee2a --- /dev/null +++ b/changelog.d/19268.feature @@ -0,0 +1 @@ +Add an admin API for retrieving a paginated list of quarantined media. \ No newline at end of file From e319d41ddf7ea6cd8ab961b1fdc54f3260ee642e Mon Sep 17 00:00:00 2001 From: turt2live <1190097+turt2live@users.noreply.github.com> Date: Tue, 2 Dec 2025 20:55:15 +0000 Subject: [PATCH 03/11] Attempt to fix linting --- synapse/rest/admin/media.py | 10 ++++-- synapse/storage/databases/main/room.py | 8 +++-- tests/rest/admin/test_media.py | 45 ++++++++++++++++++++++---- 3 files changed, 50 insertions(+), 13 deletions(-) diff --git a/synapse/rest/admin/media.py b/synapse/rest/admin/media.py index e4a6b055e13..22f7d2d0c2d 100644 --- a/synapse/rest/admin/media.py +++ b/synapse/rest/admin/media.py @@ -303,7 +303,8 @@ def __init__(self, hs: "HomeServer"): self.auth = hs.get_auth() async def on_GET( - self, request: SynapseRequest, + self, + request: SynapseRequest, ) -> tuple[int, JsonDict]: await assert_requester_is_admin(self.auth, request) @@ -313,10 +314,13 @@ async def on_GET( if local_or_remote not in ["local", "remote"]: raise SynapseError( - HTTPStatus.BAD_REQUEST, "Query parameter kind must be either 'local' or 'remote'." + HTTPStatus.BAD_REQUEST, + "Query parameter kind must be either 'local' or 'remote'.", ) - mxcs = await self.store.get_quarantined_media_mxcs(start, limit, local_or_remote == "local") + mxcs = await self.store.get_quarantined_media_mxcs( + start, limit, local_or_remote == "local" + ) return HTTPStatus.OK, {"media": mxcs} diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 22759dc529b..1fa73796c1f 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -946,7 +946,9 @@ def get_retention_policy_for_room_txn( max_lifetime=max_lifetime, ) - async def get_quarantined_media_mxcs(self, index_start: int, index_limit: int, local: bool) -> list[str]: + async def get_quarantined_media_mxcs( + self, index_start: int, index_limit: int, local: bool + ) -> list[str]: """Retrieves all the quarantined media MXC URIs starting from the given position, ordered by quarantined timestamp. @@ -983,7 +985,8 @@ def _get_quarantined_media_mxcs_txn( return mxcs return await self.db_pool.runInteraction( - "get_quarantined_media_mxcs", _get_quarantined_media_mxcs_txn, + "get_quarantined_media_mxcs", + _get_quarantined_media_mxcs_txn, ) async def get_media_mxcs_in_room(self, room_id: str) -> tuple[list[str], list[str]]: @@ -1015,7 +1018,6 @@ def _get_media_mxcs_in_room_txn( "get_media_ids_in_room", _get_media_mxcs_in_room_txn ) - async def quarantine_media_ids_in_room( self, room_id: str, quarantined_by: str ) -> int: diff --git a/tests/rest/admin/test_media.py b/tests/rest/admin/test_media.py index a18c43b8b03..21748841bb4 100644 --- a/tests/rest/admin/test_media.py +++ b/tests/rest/admin/test_media.py @@ -799,14 +799,24 @@ def test_list_quarantined_media(self) -> None: """ self.admin_user = self.register_user("admin", "pass", admin=True) self.admin_user_tok = self.login("admin", "pass") - self.media_id_1 = self.helper.upload_media(SMALL_PNG, tok=self.admin_user_tok, expect_code=200) - self.media_id_2 = self.helper.upload_media(SMALL_PNG, tok=self.admin_user_tok, expect_code=200) - self.media_id_3 = self.helper.upload_media(SMALL_PNG, tok=self.admin_user_tok, expect_code=200) + self.media_id_1 = self.helper.upload_media( + SMALL_PNG, tok=self.admin_user_tok, expect_code=200 + ) + self.media_id_2 = self.helper.upload_media( + SMALL_PNG, tok=self.admin_user_tok, expect_code=200 + ) + self.media_id_3 = self.helper.upload_media( + SMALL_PNG, tok=self.admin_user_tok, expect_code=200 + ) def _quarantine(media_id: str) -> None: channel = self.make_request( "POST", - "/_synapse/admin/v1/media/quarantine/%s/%s" % (self.server_name,media_id,), + "/_synapse/admin/v1/media/quarantine/%s/%s" + % ( + self.server_name, + media_id, + ), access_token=self.admin_user_tok, ) self.assertEqual(200, channel.code, msg=channel.json_body) @@ -822,7 +832,14 @@ def _quarantine(media_id: str) -> None: ) self.assertEqual(200, channel.code, msg=channel.json_body) self.assertEqual(1, len(channel.json_body["media"])) - self.assertEqual("mxc://%s/%s" % (self.server_name, self.media_id_1,), channel.json_body["media"][0]) + self.assertEqual( + "mxc://%s/%s" + % ( + self.server_name, + self.media_id_1, + ), + channel.json_body["media"][0], + ) # Page 2 channel = self.make_request( @@ -831,7 +848,14 @@ def _quarantine(media_id: str) -> None: ) self.assertEqual(200, channel.code, msg=channel.json_body) self.assertEqual(1, len(channel.json_body["media"])) - self.assertEqual("mxc://%s/%s" % (self.server_name, self.media_id_2,), channel.json_body["media"][0]) + self.assertEqual( + "mxc://%s/%s" + % ( + self.server_name, + self.media_id_2, + ), + channel.json_body["media"][0], + ) # Page 3 channel = self.make_request( @@ -840,7 +864,14 @@ def _quarantine(media_id: str) -> None: ) self.assertEqual(200, channel.code, msg=channel.json_body) self.assertEqual(1, len(channel.json_body["media"])) - self.assertEqual("mxc://%s/%s" % (self.server_name, self.media_id_3,), channel.json_body["media"][0]) + self.assertEqual( + "mxc://%s/%s" + % ( + self.server_name, + self.media_id_3, + ), + channel.json_body["media"][0], + ) # Page 4 (no media) channel = self.make_request( From 1dfe742a504aa7107874cc9fe08911cd1e63c6a0 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Tue, 2 Dec 2025 14:02:32 -0700 Subject: [PATCH 04/11] Fix types of media IDs in tests --- tests/rest/admin/test_media.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/rest/admin/test_media.py b/tests/rest/admin/test_media.py index 21748841bb4..3500b8a7673 100644 --- a/tests/rest/admin/test_media.py +++ b/tests/rest/admin/test_media.py @@ -799,15 +799,15 @@ def test_list_quarantined_media(self) -> None: """ self.admin_user = self.register_user("admin", "pass", admin=True) self.admin_user_tok = self.login("admin", "pass") - self.media_id_1 = self.helper.upload_media( - SMALL_PNG, tok=self.admin_user_tok, expect_code=200 - ) - self.media_id_2 = self.helper.upload_media( - SMALL_PNG, tok=self.admin_user_tok, expect_code=200 - ) - self.media_id_3 = self.helper.upload_media( - SMALL_PNG, tok=self.admin_user_tok, expect_code=200 - ) + + def _upload() -> str: + return self.helper.upload_media( + SMALL_PNG, tok=self.admin_user_tok, expect_code=200 + )[6:].split("/")[1] # Cut off 'mxc://' and domain + + self.media_id_1 = _upload() + self.media_id_2 = _upload() + self.media_id_3 = _upload() def _quarantine(media_id: str) -> None: channel = self.make_request( From b7bd54668852a5a74d00e88f8ffd375e244c56b3 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Tue, 2 Dec 2025 14:06:39 -0700 Subject: [PATCH 05/11] it helps to slice the right thing --- tests/rest/admin/test_media.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/rest/admin/test_media.py b/tests/rest/admin/test_media.py index 3500b8a7673..ce913bf29fb 100644 --- a/tests/rest/admin/test_media.py +++ b/tests/rest/admin/test_media.py @@ -803,7 +803,7 @@ def test_list_quarantined_media(self) -> None: def _upload() -> str: return self.helper.upload_media( SMALL_PNG, tok=self.admin_user_tok, expect_code=200 - )[6:].split("/")[1] # Cut off 'mxc://' and domain + )["content_uri"][6:].split("/")[1] # Cut off 'mxc://' and domain self.media_id_1 = _upload() self.media_id_2 = _upload() From d1a390cf636389ac71a493f10989da5e39ab5e30 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Tue, 2 Dec 2025 14:13:04 -0700 Subject: [PATCH 06/11] Fix a bunch of type issues --- synapse/media/media_repository.py | 2 ++ synapse/storage/databases/main/media_repository.py | 8 +++++++- synapse/storage/databases/main/room.py | 11 +++++++---- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/synapse/media/media_repository.py b/synapse/media/media_repository.py index 29c5e66ec49..e84e8423002 100644 --- a/synapse/media/media_repository.py +++ b/synapse/media/media_repository.py @@ -914,6 +914,7 @@ async def _download_remote_file( filesystem_id=file_id, last_access_ts=time_now_ms, quarantined_by=None, + quarantined_ts=None, authenticated=authenticated, sha256=sha256writer.hexdigest(), ) @@ -1047,6 +1048,7 @@ async def _federation_download_remote_file( filesystem_id=file_id, last_access_ts=time_now_ms, quarantined_by=None, + quarantined_ts=None, authenticated=authenticated, sha256=sha256writer.hexdigest(), ) diff --git a/synapse/storage/databases/main/media_repository.py b/synapse/storage/databases/main/media_repository.py index ca637cd7c51..c27c68fbc28 100644 --- a/synapse/storage/databases/main/media_repository.py +++ b/synapse/storage/databases/main/media_repository.py @@ -245,6 +245,7 @@ async def get_local_media(self, media_id: str) -> LocalMedia | None: "user_id", "authenticated", "sha256", + "quarantined_ts", ), allow_none=True, desc="get_local_media", @@ -264,6 +265,7 @@ async def get_local_media(self, media_id: str) -> LocalMedia | None: user_id=row[8], authenticated=row[9], sha256=row[10], + quarantined_ts=row[11], ) async def get_local_media_by_user_paginate( @@ -321,7 +323,8 @@ def get_local_media_by_user_paginate_txn( safe_from_quarantine, user_id, authenticated, - sha256 + sha256, + quarantined_ts FROM local_media_repository WHERE user_id = ? ORDER BY {order_by_column} {order}, media_id ASC @@ -347,6 +350,7 @@ def get_local_media_by_user_paginate_txn( user_id=row[9], authenticated=row[10], sha256=row[11], + quarantined_ts=row[12], ) for row in txn ] @@ -697,6 +701,7 @@ async def get_cached_remote_media( "quarantined_by", "authenticated", "sha256", + "quarantined_ts", ), allow_none=True, desc="get_cached_remote_media", @@ -715,6 +720,7 @@ async def get_cached_remote_media( quarantined_by=row[6], authenticated=row[7], sha256=row[8], + quarantined_ts=row[9], ) async def store_cached_remote_media( diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index 1fa73796c1f..d83ee7a33bc 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -970,9 +970,9 @@ def _get_quarantined_media_mxcs_txn( # We order by quarantined timestamp *and* media ID (including origin, when # known) to ensure there's stable ordering for established servers. if local: - sql = "SELECT '' as media_origin, media_id FROM local_media_repository ORDER BY quarantined_ts, media_id ASC LIMIT ? OFFSET ?" + sql = "SELECT '' as media_origin, media_id FROM local_media_repository WHERE quarantined_by IS NOT NULL ORDER BY quarantined_ts, media_id ASC LIMIT ? OFFSET ?" else: - sql = "SELECT media_origin, media_id FROM remote_media_cache ORDER BY quarantined_ts, media_origin, media_id ASC LIMIT ? OFFSET ?" + sql = "SELECT media_origin, media_id FROM remote_media_cache WHERE quarantined_by IS NOT NULL ORDER BY quarantined_ts, media_origin, media_id ASC LIMIT ? OFFSET ?" txn.execute(sql, (index_limit, index_start)) mxcs = [] @@ -1191,7 +1191,7 @@ def _quarantine_local_media_txn( The total number of media items quarantined """ total_media_quarantined = 0 - now_ts = self.clock.time_msec() + now_ts: int | None = self.clock.time_msec() if quarantined_by is None: now_ts = None @@ -1250,7 +1250,10 @@ def _quarantine_remote_media_txn( The total number of media items quarantined """ total_media_quarantined = 0 - now_ts = int(time.time()) + now_ts: int | None = self.clock.time_msec() + + if quarantined_by is None: + now_ts = None if media: sql_in_list_clause, sql_args = make_tuple_in_list_sql_clause( From e1aaea43dee38cb9f774257a4b86150d0440ed00 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Tue, 2 Dec 2025 14:15:41 -0700 Subject: [PATCH 07/11] remove import that wasn't supposed to be there --- synapse/storage/databases/main/room.py | 1 - 1 file changed, 1 deletion(-) diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index d83ee7a33bc..db9fd5f3c4f 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -21,7 +21,6 @@ # import logging -from datetime import time from enum import Enum from typing import ( TYPE_CHECKING, From 60ade15dda095570261b837b6a238ca31bfa5396 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Tue, 2 Dec 2025 14:28:30 -0700 Subject: [PATCH 08/11] register the servlet --- synapse/rest/admin/media.py | 1 + 1 file changed, 1 insertion(+) diff --git a/synapse/rest/admin/media.py b/synapse/rest/admin/media.py index 22f7d2d0c2d..5bb02439383 100644 --- a/synapse/rest/admin/media.py +++ b/synapse/rest/admin/media.py @@ -564,6 +564,7 @@ def register_servlets_for_media_repo(hs: "HomeServer", http_server: HttpServer) ProtectMediaByID(hs).register(http_server) UnprotectMediaByID(hs).register(http_server) ListMediaInRoom(hs).register(http_server) + ListQuarantinedMedia(hs).register(http_server) # XXX DeleteMediaByDateSize must be registered before DeleteMediaByID as # their URL routes overlap. DeleteMediaByDateSize(hs).register(http_server) From b456a8bec772319e0cb992e547a54205cfa439a3 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Wed, 3 Dec 2025 10:21:19 -0700 Subject: [PATCH 09/11] access tokens are important --- tests/rest/admin/test_media.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/rest/admin/test_media.py b/tests/rest/admin/test_media.py index ce913bf29fb..555570ca82c 100644 --- a/tests/rest/admin/test_media.py +++ b/tests/rest/admin/test_media.py @@ -829,6 +829,7 @@ def _quarantine(media_id: str) -> None: channel = self.make_request( "GET", "/_synapse/admin/v1/media/quarantined?kind=local&from=0&limit=1", + access_token=self.admin_user_tok, ) self.assertEqual(200, channel.code, msg=channel.json_body) self.assertEqual(1, len(channel.json_body["media"])) @@ -845,6 +846,7 @@ def _quarantine(media_id: str) -> None: channel = self.make_request( "GET", "/_synapse/admin/v1/media/quarantined?kind=local&from=1&limit=1", + access_token=self.admin_user_tok, ) self.assertEqual(200, channel.code, msg=channel.json_body) self.assertEqual(1, len(channel.json_body["media"])) @@ -861,6 +863,7 @@ def _quarantine(media_id: str) -> None: channel = self.make_request( "GET", "/_synapse/admin/v1/media/quarantined?kind=local&from=2&limit=1", + access_token=self.admin_user_tok, ) self.assertEqual(200, channel.code, msg=channel.json_body) self.assertEqual(1, len(channel.json_body["media"])) @@ -877,6 +880,7 @@ def _quarantine(media_id: str) -> None: channel = self.make_request( "GET", "/_synapse/admin/v1/media/quarantined?kind=local&from=3&limit=1", + access_token=self.admin_user_tok, ) self.assertEqual(200, channel.code, msg=channel.json_body) self.assertEqual(0, len(channel.json_body["media"])) From 4ad03ab76edc6156f55a00e949c0fa8a4526120e Mon Sep 17 00:00:00 2001 From: turt2live <1190097+turt2live@users.noreply.github.com> Date: Wed, 3 Dec 2025 17:25:53 +0000 Subject: [PATCH 10/11] Attempt to fix linting --- tests/rest/admin/test_media.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/rest/admin/test_media.py b/tests/rest/admin/test_media.py index 555570ca82c..3fd2a7be7c7 100644 --- a/tests/rest/admin/test_media.py +++ b/tests/rest/admin/test_media.py @@ -829,7 +829,7 @@ def _quarantine(media_id: str) -> None: channel = self.make_request( "GET", "/_synapse/admin/v1/media/quarantined?kind=local&from=0&limit=1", - access_token=self.admin_user_tok, + access_token=self.admin_user_tok, ) self.assertEqual(200, channel.code, msg=channel.json_body) self.assertEqual(1, len(channel.json_body["media"])) @@ -846,7 +846,7 @@ def _quarantine(media_id: str) -> None: channel = self.make_request( "GET", "/_synapse/admin/v1/media/quarantined?kind=local&from=1&limit=1", - access_token=self.admin_user_tok, + access_token=self.admin_user_tok, ) self.assertEqual(200, channel.code, msg=channel.json_body) self.assertEqual(1, len(channel.json_body["media"])) @@ -863,7 +863,7 @@ def _quarantine(media_id: str) -> None: channel = self.make_request( "GET", "/_synapse/admin/v1/media/quarantined?kind=local&from=2&limit=1", - access_token=self.admin_user_tok, + access_token=self.admin_user_tok, ) self.assertEqual(200, channel.code, msg=channel.json_body) self.assertEqual(1, len(channel.json_body["media"])) @@ -880,7 +880,7 @@ def _quarantine(media_id: str) -> None: channel = self.make_request( "GET", "/_synapse/admin/v1/media/quarantined?kind=local&from=3&limit=1", - access_token=self.admin_user_tok, + access_token=self.admin_user_tok, ) self.assertEqual(200, channel.code, msg=channel.json_body) self.assertEqual(0, len(channel.json_body["media"])) From 631bb5d2b7e71edaaf57e60c5b18c6e1d60ae702 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Wed, 3 Dec 2025 10:40:19 -0700 Subject: [PATCH 11/11] Just assert that there's media, rather than the specific media The test runs too fast to really quarantine in order --- tests/rest/admin/test_media.py | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/tests/rest/admin/test_media.py b/tests/rest/admin/test_media.py index 555570ca82c..8a80aba659f 100644 --- a/tests/rest/admin/test_media.py +++ b/tests/rest/admin/test_media.py @@ -833,14 +833,6 @@ def _quarantine(media_id: str) -> None: ) self.assertEqual(200, channel.code, msg=channel.json_body) self.assertEqual(1, len(channel.json_body["media"])) - self.assertEqual( - "mxc://%s/%s" - % ( - self.server_name, - self.media_id_1, - ), - channel.json_body["media"][0], - ) # Page 2 channel = self.make_request( @@ -850,14 +842,6 @@ def _quarantine(media_id: str) -> None: ) self.assertEqual(200, channel.code, msg=channel.json_body) self.assertEqual(1, len(channel.json_body["media"])) - self.assertEqual( - "mxc://%s/%s" - % ( - self.server_name, - self.media_id_2, - ), - channel.json_body["media"][0], - ) # Page 3 channel = self.make_request( @@ -867,14 +851,6 @@ def _quarantine(media_id: str) -> None: ) self.assertEqual(200, channel.code, msg=channel.json_body) self.assertEqual(1, len(channel.json_body["media"])) - self.assertEqual( - "mxc://%s/%s" - % ( - self.server_name, - self.media_id_3, - ), - channel.json_body["media"][0], - ) # Page 4 (no media) channel = self.make_request(