Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit 8ec2217

Browse files
authored
Reduce the memory usage of previewing media files. (#9421)
This reduces the memory usage of previewing media files which end up larger than the `max_spider_size` by avoiding buffering content internally in treq. It also checks the `Content-Length` header in additional places instead of streaming the content to check the body length.
1 parent bb2577f commit 8ec2217

File tree

3 files changed

+18
-18
lines changed

3 files changed

+18
-18
lines changed

changelog.d/9421.bugfix

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Reduce the amount of memory used when generating the URL preview of a file that is larger than the `max_spider_size`.

synapse/http/client.py

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@
5656
)
5757
from twisted.web.http import PotentialDataLoss
5858
from twisted.web.http_headers import Headers
59-
from twisted.web.iweb import IAgent, IBodyProducer, IResponse
59+
from twisted.web.iweb import UNKNOWN_LENGTH, IAgent, IBodyProducer, IResponse
6060

6161
from synapse.api.errors import Codes, HttpResponseException, SynapseError
6262
from synapse.http import QuieterFileBodyProducer, RequestTimedOutError, redact_uri
@@ -408,6 +408,9 @@ async def request(
408408
agent=self.agent,
409409
data=body_producer,
410410
headers=headers,
411+
# Avoid buffering the body in treq since we do not reuse
412+
# response bodies.
413+
unbuffered=True,
411414
**self._extra_treq_args,
412415
) # type: defer.Deferred
413416

@@ -702,18 +705,6 @@ async def get_file(
702705

703706
resp_headers = dict(response.headers.getAllRawHeaders())
704707

705-
if (
706-
b"Content-Length" in resp_headers
707-
and max_size
708-
and int(resp_headers[b"Content-Length"][0]) > max_size
709-
):
710-
logger.warning("Requested URL is too large > %r bytes" % (max_size,))
711-
raise SynapseError(
712-
502,
713-
"Requested file is too large > %r bytes" % (max_size,),
714-
Codes.TOO_LARGE,
715-
)
716-
717708
if response.code > 299:
718709
logger.warning("Got %d when downloading %s" % (response.code, url))
719710
raise SynapseError(502, "Got error %d" % (response.code,), Codes.UNKNOWN)
@@ -780,7 +771,9 @@ def dataReceived(self, data: bytes) -> None:
780771
# in the meantime.
781772
if self.max_size is not None and self.length >= self.max_size:
782773
self.deferred.errback(BodyExceededMaxSize())
783-
self.transport.loseConnection()
774+
# Close the connection (forcefully) since all the data will get
775+
# discarded anyway.
776+
self.transport.abortConnection()
784777

785778
def connectionLost(self, reason: Failure) -> None:
786779
# If the maximum size was already exceeded, there's nothing to do.
@@ -814,6 +807,11 @@ def read_body_with_max_size(
814807
Returns:
815808
A Deferred which resolves to the length of the read body.
816809
"""
810+
# If the Content-Length header gives a size larger than the maximum allowed
811+
# size, do not bother downloading the body.
812+
if max_size is not None and response.length != UNKNOWN_LENGTH:
813+
if response.length > max_size:
814+
return defer.fail(BodyExceededMaxSize())
817815

818816
d = defer.Deferred()
819817
response.deliverBody(_ReadBodyWithMaxSizeProtocol(stream, d, max_size))

tests/http/test_client.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
from twisted.python.failure import Failure
2020
from twisted.web.client import ResponseDone
21+
from twisted.web.iweb import UNKNOWN_LENGTH
2122

2223
from synapse.http.client import BodyExceededMaxSize, read_body_with_max_size
2324

@@ -27,12 +28,12 @@
2728
class ReadBodyWithMaxSizeTests(TestCase):
2829
def setUp(self):
2930
"""Start reading the body, returns the response, result and proto"""
30-
self.response = Mock()
31+
response = Mock(length=UNKNOWN_LENGTH)
3132
self.result = BytesIO()
32-
self.deferred = read_body_with_max_size(self.response, self.result, 6)
33+
self.deferred = read_body_with_max_size(response, self.result, 6)
3334

3435
# Fish the protocol out of the response.
35-
self.protocol = self.response.deliverBody.call_args[0][0]
36+
self.protocol = response.deliverBody.call_args[0][0]
3637
self.protocol.transport = Mock()
3738

3839
def _cleanup_error(self):
@@ -88,7 +89,7 @@ def test_additional_data(self):
8889
self.protocol.dataReceived(b"1234567890")
8990
self.assertIsInstance(self.deferred.result, Failure)
9091
self.assertIsInstance(self.deferred.result.value, BodyExceededMaxSize)
91-
self.protocol.transport.loseConnection.assert_called_once()
92+
self.protocol.transport.abortConnection.assert_called_once()
9293

9394
# More data might have come in.
9495
self.protocol.dataReceived(b"1234567890")

0 commit comments

Comments
 (0)