Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/19268.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add an admin API for retrieving a paginated list of quarantined media.
27 changes: 27 additions & 0 deletions docs/admin_api/media_admin_api.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,33 @@ Response:
}
```

## Listing all quarantined media

This API returns a list of all quarantined media on the server. It is paginated, and can be scoped to either local or
remote media. Note that the pagination values are also scoped to whether the media is local or remote. For example,
providing values from a local result set to a request for remote media will return unexpected results.

Request:
```http
GET /_synapse/admin/v1/media/quarantined?from=0&limit=100&kind=local
```

`from` and `limit` are optional parameters, and default to `0` and `100` respectively. They are the row index and number
of rows to return - they are not timestamps.

`kind` *MUST* either be `local` or `remote`.

The API returns a JSON body containing MXC URIs for the quarantined media, like the following:

```json
{
"media": [
"mxc://localhost/xwvutsrqponmlkjihgfedcba",
"mxc://localhost/abcdefghijklmnopqrstuvwx"
]
}
```

# Quarantine media

Quarantining media means that it is marked as inaccessible by users. It applies
Expand Down
2 changes: 2 additions & 0 deletions synapse/media/media_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -914,6 +914,7 @@ async def _download_remote_file(
filesystem_id=file_id,
last_access_ts=time_now_ms,
quarantined_by=None,
quarantined_ts=None,
authenticated=authenticated,
sha256=sha256writer.hexdigest(),
)
Expand Down Expand Up @@ -1047,6 +1048,7 @@ async def _federation_download_remote_file(
filesystem_id=file_id,
last_access_ts=time_now_ms,
quarantined_by=None,
quarantined_ts=None,
authenticated=authenticated,
sha256=sha256writer.hexdigest(),
)
Expand Down
33 changes: 33 additions & 0 deletions synapse/rest/admin/media.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,38 @@ async def on_GET(
return HTTPStatus.OK, {"local": local_mxcs, "remote": remote_mxcs}


class ListQuarantinedMedia(RestServlet):
"""Lists all quarantined media on the server."""

PATTERNS = admin_patterns("/media/quarantined$")

def __init__(self, hs: "HomeServer"):
self.store = hs.get_datastores().main
self.auth = hs.get_auth()

async def on_GET(
self,
request: SynapseRequest,
) -> tuple[int, JsonDict]:
await assert_requester_is_admin(self.auth, request)

start = parse_integer(request, "from", default=0)
limit = parse_integer(request, "limit", default=100)
local_or_remote = parse_string(request, "kind", required=True)

if local_or_remote not in ["local", "remote"]:
raise SynapseError(
HTTPStatus.BAD_REQUEST,
"Query parameter kind must be either 'local' or 'remote'.",
)

mxcs = await self.store.get_quarantined_media_mxcs(
start, limit, local_or_remote == "local"
)

return HTTPStatus.OK, {"media": mxcs}


class PurgeMediaCacheRestServlet(RestServlet):
PATTERNS = admin_patterns("/purge_media_cache$")

Expand Down Expand Up @@ -532,6 +564,7 @@ def register_servlets_for_media_repo(hs: "HomeServer", http_server: HttpServer)
ProtectMediaByID(hs).register(http_server)
UnprotectMediaByID(hs).register(http_server)
ListMediaInRoom(hs).register(http_server)
ListQuarantinedMedia(hs).register(http_server)
# XXX DeleteMediaByDateSize must be registered before DeleteMediaByID as
# their URL routes overlap.
DeleteMediaByDateSize(hs).register(http_server)
Expand Down
10 changes: 9 additions & 1 deletion synapse/storage/databases/main/media_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ class LocalMedia:
url_cache: str | None
last_access_ts: int
quarantined_by: str | None
quarantined_ts: int | None
safe_from_quarantine: bool
user_id: str | None
authenticated: bool | None
Expand All @@ -78,6 +79,7 @@ class RemoteMedia:
created_ts: int
last_access_ts: int
quarantined_by: str | None
quarantined_ts: int | None
authenticated: bool | None
sha256: str | None

Expand Down Expand Up @@ -243,6 +245,7 @@ async def get_local_media(self, media_id: str) -> LocalMedia | None:
"user_id",
"authenticated",
"sha256",
"quarantined_ts",
),
allow_none=True,
desc="get_local_media",
Expand All @@ -262,6 +265,7 @@ async def get_local_media(self, media_id: str) -> LocalMedia | None:
user_id=row[8],
authenticated=row[9],
sha256=row[10],
quarantined_ts=row[11],
)

async def get_local_media_by_user_paginate(
Expand Down Expand Up @@ -319,7 +323,8 @@ def get_local_media_by_user_paginate_txn(
safe_from_quarantine,
user_id,
authenticated,
sha256
sha256,
quarantined_ts
FROM local_media_repository
WHERE user_id = ?
ORDER BY {order_by_column} {order}, media_id ASC
Expand All @@ -345,6 +350,7 @@ def get_local_media_by_user_paginate_txn(
user_id=row[9],
authenticated=row[10],
sha256=row[11],
quarantined_ts=row[12],
)
for row in txn
]
Expand Down Expand Up @@ -695,6 +701,7 @@ async def get_cached_remote_media(
"quarantined_by",
"authenticated",
"sha256",
"quarantined_ts",
),
allow_none=True,
desc="get_cached_remote_media",
Expand All @@ -713,6 +720,7 @@ async def get_cached_remote_media(
quarantined_by=row[6],
authenticated=row[7],
sha256=row[8],
quarantined_ts=row[9],
)

async def store_cached_remote_media(
Expand Down
69 changes: 60 additions & 9 deletions synapse/storage/databases/main/room.py
Original file line number Diff line number Diff line change
Expand Up @@ -945,14 +945,57 @@ def get_retention_policy_for_room_txn(
max_lifetime=max_lifetime,
)

async def get_quarantined_media_mxcs(
self, index_start: int, index_limit: int, local: bool
) -> list[str]:
"""Retrieves all the quarantined media MXC URIs starting from the given position,
ordered by quarantined timestamp.

Note that on established servers the "quarantined timestamp" may be zero due to
being introduced after the quarantine state was introduced.

Args:
index_start: The position to start from.
index_limit: The maximum number of results to return.
local: When true, only local media will be returned. When false, only remote media will be returned.

Returns:
The quarantined media as a list of media IDs.
"""

def _get_quarantined_media_mxcs_txn(
txn: LoggingTransaction,
) -> list[str]:
# We order by quarantined timestamp *and* media ID (including origin, when
# known) to ensure there's stable ordering for established servers.
if local:
sql = "SELECT '' as media_origin, media_id FROM local_media_repository WHERE quarantined_by IS NOT NULL ORDER BY quarantined_ts, media_id ASC LIMIT ? OFFSET ?"
else:
sql = "SELECT media_origin, media_id FROM remote_media_cache WHERE quarantined_by IS NOT NULL ORDER BY quarantined_ts, media_origin, media_id ASC LIMIT ? OFFSET ?"
txn.execute(sql, (index_limit, index_start))

mxcs = []

for media_origin, media_id in txn:
if local:
media_origin = self.hs.hostname
mxcs.append(f"mxc://{media_origin}/{media_id}")

return mxcs

return await self.db_pool.runInteraction(
"get_quarantined_media_mxcs",
_get_quarantined_media_mxcs_txn,
)

async def get_media_mxcs_in_room(self, room_id: str) -> tuple[list[str], list[str]]:
"""Retrieves all the local and remote media MXC URIs in a given room

Args:
room_id

Returns:
The local and remote media as a lists of the media IDs.
The local and remote media as lists of the media IDs.
"""

def _get_media_mxcs_in_room_txn(
Expand Down Expand Up @@ -1147,6 +1190,10 @@ def _quarantine_local_media_txn(
The total number of media items quarantined
"""
total_media_quarantined = 0
now_ts: int | None = self.clock.time_msec()

if quarantined_by is None:
now_ts = None

# Effectively a legacy path, update any media that was explicitly named.
if media_ids:
Expand All @@ -1155,13 +1202,13 @@ def _quarantine_local_media_txn(
)
sql = f"""
UPDATE local_media_repository
SET quarantined_by = ?
SET quarantined_by = ?, quarantined_ts = ?
WHERE {sql_many_clause_sql}"""

if quarantined_by is not None:
sql += " AND safe_from_quarantine = FALSE"

txn.execute(sql, [quarantined_by] + sql_many_clause_args)
txn.execute(sql, [quarantined_by, now_ts] + sql_many_clause_args)
# Note that a rowcount of -1 can be used to indicate no rows were affected.
total_media_quarantined += txn.rowcount if txn.rowcount > 0 else 0

Expand All @@ -1172,13 +1219,13 @@ def _quarantine_local_media_txn(
)
sql = f"""
UPDATE local_media_repository
SET quarantined_by = ?
SET quarantined_by = ?, quarantined_ts = ?
WHERE {sql_many_clause_sql}"""

if quarantined_by is not None:
sql += " AND safe_from_quarantine = FALSE"

txn.execute(sql, [quarantined_by] + sql_many_clause_args)
txn.execute(sql, [quarantined_by, now_ts] + sql_many_clause_args)
total_media_quarantined += txn.rowcount if txn.rowcount > 0 else 0

return total_media_quarantined
Expand All @@ -1202,6 +1249,10 @@ def _quarantine_remote_media_txn(
The total number of media items quarantined
"""
total_media_quarantined = 0
now_ts: int | None = self.clock.time_msec()

if quarantined_by is None:
now_ts = None

if media:
sql_in_list_clause, sql_args = make_tuple_in_list_sql_clause(
Expand All @@ -1211,10 +1262,10 @@ def _quarantine_remote_media_txn(
)
sql = f"""
UPDATE remote_media_cache
SET quarantined_by = ?
SET quarantined_by = ?, quarantined_ts = ?
WHERE {sql_in_list_clause}"""

txn.execute(sql, [quarantined_by] + sql_args)
txn.execute(sql, [quarantined_by, now_ts] + sql_args)
total_media_quarantined += txn.rowcount if txn.rowcount > 0 else 0

total_media_quarantined = 0
Expand All @@ -1224,9 +1275,9 @@ def _quarantine_remote_media_txn(
)
sql = f"""
UPDATE remote_media_cache
SET quarantined_by = ?
SET quarantined_by = ?, quarantined_ts = ?
WHERE {sql_many_clause_sql}"""
txn.execute(sql, [quarantined_by] + sql_many_clause_args)
txn.execute(sql, [quarantined_by, now_ts] + sql_many_clause_args)
total_media_quarantined += txn.rowcount if txn.rowcount > 0 else 0

return total_media_quarantined
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
--
-- This file is licensed under the Affero General Public License (AGPL) version 3.
--
-- Copyright (C) 2025 Element Creations, Ltd
--
-- This program is free software: you can redistribute it and/or modify
-- it under the terms of the GNU Affero General Public License as
-- published by the Free Software Foundation, either version 3 of the
-- License, or (at your option) any later version.
--
-- See the GNU Affero General Public License for more details:
-- <https://www.gnu.org/licenses/agpl-3.0.html>.

-- Add a timestamp for when the sliding sync connection position was last used,
-- only updated with a small granularity.
--
-- This should be NOT NULL, but we need to consider existing rows. In future we
-- may want to either backfill this or delete all rows with a NULL value (and
-- then make it NOT NULL).
ALTER TABLE local_media_repository ADD COLUMN quarantined_ts BIGINT;
ALTER TABLE remote_media_cache ADD COLUMN quarantined_ts BIGINT;

UPDATE local_media_repository SET quarantined_ts = 0 WHERE quarantined_by IS NOT NULL;
UPDATE remote_media_cache SET quarantined_ts = 0 WHERE quarantined_by IS NOT NULL;

-- Note: We *probably* should have an index on quarantined_ts, but we're going
-- to try to defer that to a future migration after seeing the performance impact.
Loading
Loading