Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/19204.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Made the local media directory optional by treating it as a storage provider. This allows off-site media storage without local cache, where media is stored directly to remote providers only, with temporary files used for thumbnail generation when needed. Contributed by Patrice Brend'amour @dr.allgood.
6 changes: 3 additions & 3 deletions scripts-dev/lint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -126,13 +126,13 @@ ruff format --quiet "${files[@]}"
# Using --fix has a tendency to cause subsequent runs of clippy to recompile
# rust code, which can slow down this script. Thus we run clippy without --fix
# first which is quick, and then re-run it with --fix if an error was found.
if ! cargo-clippy --bins --examples --lib --tests -- -D warnings > /dev/null 2>&1; then
cargo-clippy \
if ! cargo clippy --bins --examples --lib --tests -- -D warnings > /dev/null 2>&1; then
cargo clippy \
--bins --examples --lib --tests --allow-staged --allow-dirty --fix -- -D warnings
fi

# Ensure the formatting of Rust code.
cargo-fmt
cargo fmt

# Ensure type hints are correct.
mypy
Expand Down
6 changes: 4 additions & 2 deletions synapse/media/media_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,10 @@
SHA256TransparentIOReader,
SHA256TransparentIOWriter,
)
from synapse.media.storage_provider import StorageProviderWrapper
from synapse.media.storage_provider import (
FileStorageProviderBackend,
StorageProviderWrapper,
)
from synapse.media.thumbnailer import Thumbnailer, ThumbnailError
from synapse.media.url_previewer import UrlPreviewer
from synapse.storage.databases.main.media_repository import LocalMedia, RemoteMedia
Expand Down Expand Up @@ -143,7 +146,6 @@ def __init__(self, hs: "HomeServer"):

# If we have a local media directory, add it as a storage provider
if self.primary_base_path:
from synapse.media.storage_provider import FileStorageProviderBackend, StorageProviderWrapper
backend = FileStorageProviderBackend(hs, self.primary_base_path)
local_wrapper = StorageProviderWrapper(
backend,
Expand Down
59 changes: 37 additions & 22 deletions synapse/media/media_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,16 +50,16 @@
from synapse.api.errors import NotFoundError
from synapse.logging.context import defer_to_thread, run_in_background
from synapse.logging.opentracing import start_active_span, trace, trace_with_opname
from synapse.media._base import ThreadedFileSender
from synapse.media.storage_provider import FileStorageProviderBackend
from synapse.util.clock import Clock
from synapse.util.file_consumer import BackgroundFileConsumer

from ..types import JsonDict
from ._base import FileInfo, Responder
from ._base import FileInfo, Responder, ThreadedFileSender
from .filepath import MediaFilePaths

if TYPE_CHECKING:
from synapse.media.storage_provider import StorageProvider
from synapse.media.storage_provider import StorageProviderWrapper
from synapse.server import HomeServer

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -162,7 +162,7 @@ def __init__(
self,
hs: "HomeServer",
filepaths: MediaFilePaths,
storage_providers: Sequence["StorageProvider"],
storage_providers: Sequence["StorageProviderWrapper"],
):
self.hs = hs
self.reactor = hs.get_reactor()
Expand Down Expand Up @@ -228,7 +228,7 @@ async def store_into_file(
path = self._file_info_to_path(file_info)

if self.local_provider:
fname = os.path.join(self.local_media_directory, path)
fname = os.path.join(self.local_media_directory, path) # type: ignore[arg-type]
dirname = os.path.dirname(fname)
os.makedirs(dirname, exist_ok=True)

Expand All @@ -237,11 +237,11 @@ async def store_into_file(
with open(fname, "wb") as f:
yield f, fname

with start_active_span("spam checking and writing to other storage providers"):
spam_check = (
await self._spam_checker_module_callbacks.check_media_file_for_spam(
ReadableFileWrapper(self.clock, fname), file_info
)
with start_active_span(
"spam checking and writing to other storage providers"
):
spam_check = await self._spam_checker_module_callbacks.check_media_file_for_spam(
ReadableFileWrapper(self.clock, fname), file_info
)
if spam_check != self._spam_checker_module_callbacks.NOT_SPAM:
logger.info("Blocking media due to spam checker")
Expand All @@ -268,14 +268,14 @@ async def store_into_file(
# No local provider, write to temp file
with tempfile.NamedTemporaryFile(delete=False) as f:
fname = f.name
yield f, fname
yield cast(BinaryIO, f), fname

try:
with start_active_span("spam checking and writing to storage providers"):
spam_check = (
await self._spam_checker_module_callbacks.check_media_file_for_spam(
ReadableFileWrapper(self.clock, fname), file_info
)
with start_active_span(
"spam checking and writing to storage providers"
):
spam_check = await self._spam_checker_module_callbacks.check_media_file_for_spam(
ReadableFileWrapper(self.clock, fname), file_info
)
if spam_check != self._spam_checker_module_callbacks.NOT_SPAM:
logger.info("Blocking media due to spam checker")
Expand All @@ -302,6 +302,18 @@ async def fetch_media(self, file_info: FileInfo) -> Responder | None:
Returns:
Returns a Responder if the file was found, otherwise None.
"""
# URL cache files are stored locally and should not go through storage providers
if file_info.url_cache:
path = self._file_info_to_path(file_info)
if self.local_provider:
local_path = os.path.join(self.local_media_directory, path)
if os.path.isfile(local_path):
# Import here to avoid circular import
from .media_storage import FileResponder

return FileResponder(self.hs, open(local_path, "rb"))
return None

paths = [self._file_info_to_path(file_info)]

# fallback for remote thumbnails with no method in the filename
Expand Down Expand Up @@ -339,7 +351,7 @@ async def ensure_media_is_in_local_cache(self, file_info: FileInfo) -> str:
"""
path = self._file_info_to_path(file_info)
if self.local_provider:
local_path = os.path.join(self.local_media_directory, path)
local_path = os.path.join(self.local_media_directory, path) # type: ignore[arg-type]
if os.path.exists(local_path):
return local_path

Expand All @@ -353,7 +365,10 @@ async def ensure_media_is_in_local_cache(self, file_info: FileInfo) -> str:
height=file_info.thumbnail.height,
content_type=file_info.thumbnail.type,
)
legacy_local_path = os.path.join(self.local_media_directory, legacy_path)
legacy_local_path = os.path.join(
self.local_media_directory, # type: ignore[arg-type]
legacy_path,
)
if os.path.exists(legacy_local_path):
return legacy_local_path

Expand All @@ -363,13 +378,13 @@ async def ensure_media_is_in_local_cache(self, file_info: FileInfo) -> str:
for provider in self.storage_providers:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we could make use of the fetch_media method here.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not really. They have a different purpose. The ensure_media_is_in_local_cache is used to ensure a local copy so a thumbnail can be generated. While the fetch_media returns a Responder (to stream the file).

if provider is self.local_provider:
continue
res: Any = await provider.fetch(path, file_info)
if res:
with res:
remote_res: Any = await provider.fetch(path, file_info)
if remote_res:
with remote_res:
consumer = BackgroundFileConsumer(
open(local_path, "wb"), self.reactor
)
await res.write_to_consumer(consumer)
await remote_res.write_to_consumer(consumer)
await consumer.wait()
return local_path

Expand Down
4 changes: 3 additions & 1 deletion synapse/media/storage_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
from synapse.util.async_helpers import maybe_awaitable

from ._base import FileInfo, Responder
from .media_storage import FileResponder

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -178,6 +177,9 @@ async def fetch(self, path: str, file_info: FileInfo) -> Responder | None:

backup_fname = os.path.join(self.base_directory, path)
if os.path.isfile(backup_fname):
# Import here to avoid circular import
from .media_storage import FileResponder

return FileResponder(self.hs, open(backup_fname, "rb"))

return None
Expand Down
114 changes: 108 additions & 6 deletions tests/federation/test_federation_media.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,7 @@ def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
]

self.filepaths = MediaFilePaths(self.primary_base_path)
self.media_storage = MediaStorage(
hs, self.primary_base_path, self.filepaths, storage_providers
)
self.media_storage = MediaStorage(hs, self.filepaths, storage_providers)
self.media_repo = hs.get_media_repository()

def test_file_download(self) -> None:
Expand Down Expand Up @@ -187,7 +185,7 @@ def test_federation_etag(self) -> None:
self.assertNotIn("body", channel.result)


class FederationThumbnailTest(unittest.FederatingHomeserverTestCase):
class FederationMediaTest(unittest.FederatingHomeserverTestCase):
def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
super().prepare(reactor, clock, hs)
self.test_dir = tempfile.mkdtemp(prefix="synapse-tests-")
Expand All @@ -207,9 +205,113 @@ def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
]

self.filepaths = MediaFilePaths(self.primary_base_path)
self.media_storage = MediaStorage(
hs, self.primary_base_path, self.filepaths, storage_providers
self.media_storage = MediaStorage(hs, self.filepaths, storage_providers)
self.media_repo = hs.get_media_repository()

def test_thumbnail_download_scaled(self) -> None:
content = io.BytesIO(small_png.data)
content_uri = self.get_success(
self.media_repo.create_or_update_content(
"image/png",
"test_png_thumbnail",
content,
67,
UserID.from_string("@user_id:whatever.org"),
)
)
# test with an image file
channel = self.make_signed_federation_request(
"GET",
f"/_matrix/federation/v1/media/thumbnail/{content_uri.media_id}?width=32&height=32&method=scale",
)
self.pump()
self.assertEqual(200, channel.code)

content_type = channel.headers.getRawHeaders("content-type")
assert content_type is not None
assert "multipart/mixed" in content_type[0]
assert "boundary" in content_type[0]

# extract boundary
boundary = content_type[0].split("boundary=")[1]
# split on boundary and check that json field and expected value exist
body = channel.result.get("body")
assert body is not None
stripped_bytes = body.split(b"\r\n" + b"--" + boundary.encode("utf-8"))
found_json = any(
b"\r\nContent-Type: application/json\r\n\r\n{}" in field
for field in stripped_bytes
)
self.assertTrue(found_json)

# check that the png file exists and matches the expected scaled bytes
found_file = any(small_png.expected_scaled in field for field in stripped_bytes)
self.assertTrue(found_file)

def test_thumbnail_download_cropped(self) -> None:
content = io.BytesIO(small_png.data)
content_uri = self.get_success(
self.media_repo.create_or_update_content(
"image/png",
"test_png_thumbnail",
content,
67,
UserID.from_string("@user_id:whatever.org"),
)
)
# test with an image file
channel = self.make_signed_federation_request(
"GET",
f"/_matrix/federation/v1/media/thumbnail/{content_uri.media_id}?width=32&height=32&method=crop",
)
self.pump()
self.assertEqual(200, channel.code)

content_type = channel.headers.getRawHeaders("content-type")
assert content_type is not None
assert "multipart/mixed" in content_type[0]
assert "boundary" in content_type[0]

# extract boundary
boundary = content_type[0].split("boundary=")[1]
# split on boundary and check that json field and expected value exist
body = channel.result.get("body")
assert body is not None
stripped_bytes = body.split(b"\r\n" + b"--" + boundary.encode("utf-8"))
found_json = any(
b"\r\nContent-Type: application/json\r\n\r\n{}" in field
for field in stripped_bytes
)
self.assertTrue(found_json)

# check that the png file exists and matches the expected cropped bytes
found_file = any(
small_png.expected_cropped in field for field in stripped_bytes
)
self.assertTrue(found_file)


class FederationThumbnailTest(unittest.FederatingHomeserverTestCase):
def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
super().prepare(reactor, clock, hs)
self.test_dir = tempfile.mkdtemp(prefix="synapse-tests-")
self.addCleanup(shutil.rmtree, self.test_dir)
self.primary_base_path = os.path.join(self.test_dir, "primary")
self.secondary_base_path = os.path.join(self.test_dir, "secondary")

hs.config.media.media_store_path = self.primary_base_path

storage_providers = [
StorageProviderWrapper(
FileStorageProviderBackend(hs, self.secondary_base_path),
store_local=True,
store_remote=False,
store_synchronous=True,
)
]

self.filepaths = MediaFilePaths(self.primary_base_path)
self.media_storage = MediaStorage(hs, self.filepaths, storage_providers)
self.media_repo = hs.get_media_repository()

def test_thumbnail_download_scaled(self) -> None:
Expand Down
23 changes: 17 additions & 6 deletions tests/media/test_media_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,10 @@
from synapse.media._base import FileInfo, ThumbnailInfo
from synapse.media.filepath import MediaFilePaths
from synapse.media.media_storage import MediaStorage, ReadableFileWrapper
from synapse.media.storage_provider import FileStorageProviderBackend
from synapse.media.storage_provider import (
FileStorageProviderBackend,
StorageProviderWrapper,
)
from synapse.media.thumbnailer import ThumbnailProvider
from synapse.module_api import ModuleApi
from synapse.module_api.callbacks.spamchecker_callbacks import load_legacy_spam_checkers
Expand Down Expand Up @@ -78,14 +81,22 @@ def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
hs.config.media.media_store_path = self.primary_base_path

storage_providers = [
FileStorageProviderBackend(hs, self.primary_base_path),
FileStorageProviderBackend(hs, self.secondary_base_path),
StorageProviderWrapper(
FileStorageProviderBackend(hs, self.primary_base_path),
store_local=True,
store_remote=False,
store_synchronous=True,
),
StorageProviderWrapper(
FileStorageProviderBackend(hs, self.secondary_base_path),
store_local=True,
store_remote=False,
store_synchronous=True,
),
]

self.filepaths = MediaFilePaths(self.primary_base_path)
self.media_storage = MediaStorage(
hs, self.filepaths, storage_providers
)
self.media_storage = MediaStorage(hs, self.filepaths, storage_providers)

def test_ensure_media_is_in_local_cache(self) -> None:
media_id = "some_media_id"
Expand Down