Skip to content
Open
Show file tree
Hide file tree
Changes from 27 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
40d60c8
Add HTTP 413 response when incoming request is too large
devonh Nov 21, 2025
1e8eaa1
Add changelog entry
devonh Nov 21, 2025
27b5918
Merge branch 'develop' into devon/too-large-http-response
devonh Nov 21, 2025
c3be074
Log redacted uri
devonh Nov 21, 2025
26a2c8f
Log redacted uri in content-type check
devonh Nov 21, 2025
5cf7b96
Address review comments
devonh Nov 26, 2025
85d84f5
Upade comment wording
devonh Nov 26, 2025
9c4536b
Rejig max_request_size usage to use common constants everywhere
devonh Nov 26, 2025
5f4a097
Add ref to upstream twisted changes needed
devonh Nov 26, 2025
21b114c
Don't add duplicate Content-Length header in tests
devonh Nov 26, 2025
426b676
Remove unused variable
devonh Nov 27, 2025
296ed42
Update synapse/http/site.py
devonh Dec 2, 2025
7e86d3d
Log rejection at info level
devonh Dec 1, 2025
e8d8a3b
Temp changes for incorrect Content-Length
devonh Dec 2, 2025
36973e0
Merge branch 'develop' into devon/too-large-http-response
devonh Dec 4, 2025
3f9af8d
Remove invalid comment
devonh Dec 4, 2025
536bf2d
Address review comments
devonh Dec 4, 2025
8c594c9
Add more comments
devonh Dec 4, 2025
9e2b2a1
Add FIXME comment
devonh Dec 4, 2025
e4f2194
Refactor content-length extraction
devonh Dec 4, 2025
c647a0a
Fix exception handling
devonh Dec 4, 2025
d1b0429
Add test for too small Content-Length header
devonh Dec 4, 2025
7648b3b
Fix linter errors
devonh Dec 4, 2025
72c3ab3
Add comment about why we reject multiple content-length headers
devonh Dec 4, 2025
c2b8d2c
Fix mypy errors
devonh Dec 4, 2025
5ab4736
Fix test content-length header addition logic
devonh Dec 4, 2025
7b0b9b2
Refactor error response writing to be reusable
devonh Dec 4, 2025
b2c219e
Add return type
devonh Dec 4, 2025
2a2cd4b
Move log
devonh Dec 4, 2025
485283c
Update log
devonh Dec 4, 2025
b095437
Add changelog entry
devonh Dec 4, 2025
b5b4643
Refactor extracted function to be class member
devonh Dec 5, 2025
e7fe25f
Update synapse/http/site.py
devonh Dec 5, 2025
bea38d4
Update tests/test_server.py
devonh Dec 5, 2025
e07fd8b
Better error messages
devonh Dec 5, 2025
93f58e6
Add validation for content_length too large
devonh Dec 5, 2025
070356d
Merge branch 'develop' into devon/too-large-http-response
devonh Dec 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/19212.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add HTTP 413 response when incoming request is too large.
13 changes: 13 additions & 0 deletions synapse/api/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,19 @@
# the max size of a (canonical-json-encoded) event
MAX_PDU_SIZE = 65536

# The maximum allowed size of an HTTP request.
# Other than media uploads, the biggest request we expect to see is a fully-loaded
# /federation/v1/send request.
#
# The main thing in such a request is up to 50 PDUs, and up to 100 EDUs. PDUs are
# limited to 65536 bytes (possibly slightly more if the sender didn't use canonical
# json encoding); there is no specced limit to EDUs (see
# https://github.com/matrix-org/matrix-doc/issues/3121).
#
# in short, we somewhat arbitrarily limit requests to 200 * 64K (about 12.5M)
#
MAX_REQUEST_SIZE = 200 * MAX_PDU_SIZE

# Max/min size of ints in canonical JSON
CANONICALJSON_MAX_INT = (2**53) - 1
CANONICALJSON_MIN_INT = -CANONICALJSON_MAX_INT
Expand Down
15 changes: 3 additions & 12 deletions synapse/app/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
from twisted.web.resource import Resource

import synapse.util.caches
from synapse.api.constants import MAX_PDU_SIZE
from synapse.api.constants import MAX_REQUEST_SIZE
from synapse.app import check_bind_error
from synapse.config import ConfigError
from synapse.config._base import format_config_error
Expand Down Expand Up @@ -895,17 +895,8 @@ def sdnotify(state: bytes) -> None:
def max_request_body_size(config: HomeServerConfig) -> int:
"""Get a suitable maximum size for incoming HTTP requests"""

# Other than media uploads, the biggest request we expect to see is a fully-loaded
# /federation/v1/send request.
#
# The main thing in such a request is up to 50 PDUs, and up to 100 EDUs. PDUs are
# limited to 65536 bytes (possibly slightly more if the sender didn't use canonical
# json encoding); there is no specced limit to EDUs (see
# https://github.com/matrix-org/matrix-doc/issues/3121).
#
# in short, we somewhat arbitrarily limit requests to 200 * 64K (about 12.5M)
#
max_request_size = 200 * MAX_PDU_SIZE
# Baseline default for any request that isn't configured in the homeserver config
max_request_size = MAX_REQUEST_SIZE

# if we have a media repo enabled, we may need to allow larger uploads than that
if config.media.can_load_media_repo:
Expand Down
139 changes: 119 additions & 20 deletions synapse/http/site.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#
#
import contextlib
import json
import logging
import time
from http import HTTPStatus
Expand All @@ -33,9 +34,11 @@
from twisted.internet.protocol import Protocol
from twisted.python.failure import Failure
from twisted.web.http import HTTPChannel
from twisted.web.http_headers import Headers
from twisted.web.resource import IResource, Resource
from twisted.web.server import Request

from synapse.api.errors import Codes
from synapse.config.server import ListenerConfig
from synapse.http import get_request_user_agent, redact_uri
from synapse.http.proxy import ProxySite
Expand All @@ -46,7 +49,7 @@
PreserveLoggingContext,
)
from synapse.metrics import SERVER_NAME_LABEL
from synapse.types import ISynapseReactor, Requester
from synapse.types import ISynapseReactor, JsonDict, Requester

if TYPE_CHECKING:
import opentracing
Expand Down Expand Up @@ -146,34 +149,130 @@

# Twisted machinery: this method is called by the Channel once the full request has
# been received, to dispatch the request to a resource.
#
# We're patching Twisted to bail/abort early when we see someone trying to upload
# `multipart/form-data` so we can avoid Twisted parsing the entire request body into
# in-memory (specific problem of this specific `Content-Type`). This protects us
# from an attacker uploading something bigger than the available RAM and crashing
# the server with a `MemoryError`, or carefully block just enough resources to cause
# all other requests to fail.
#
# FIXME: This can be removed once we Twisted releases a fix and we update to a
# version that is patched
def requestReceived(self, command: bytes, path: bytes, version: bytes) -> None:
# In the case of a Content-Length header being present, and it's value being too
# large, throw a proper error to make debugging issues due to overly large requests much
# easier. Currently we handle such cases in `handleContentChunk` and abort the
# connection without providing a proper HTTP response.
#
# Attempting to write an HTTP response from within `handleContentChunk` does not
# work, so the code here has been added to at least provide a response in the
# case of the Content-Length header being present.
self.method, self.uri = command, path
self.clientproto = version

def get_content_length_from_headers(headers: Headers) -> int | None:
content_length_headers = headers.getRawHeaders(b"Content-Length")
if content_length_headers is not None:
# If there are multiple `Content-Length` headers return an error.
# We don't want to even try to pick the right one if there are multiple
# as we could run into problems similar to request smuggling vulnerabilities
# which rely on the mismatch of how different systems interpret information.
if len(content_length_headers) != 1:
logger.warning(
"Dropping request from %s because it contains multiple Content-Length headers: %s %s",
self.client,
self.get_method(),
self.get_redacted_uri(),
)
raise Exception("Too many Content-Length headers.")

try:
content_length = int(content_length_headers[0])
return content_length
except Exception:
raise Exception("Content-Length value invalid.")
return None

def respond_with_error(error_code: HTTPStatus, error_json: JsonDict):
self.code = error_code.value
self.code_message = bytes(error_code.phrase, "ascii")
error_response_bytes = (json.dumps(error_json)).encode()

self.responseHeaders.setRawHeaders(b"Content-Type", [b"application/json"])
self.responseHeaders.setRawHeaders(
b"Content-Length", [f"{len(error_response_bytes)}"]
)
self.write(error_response_bytes)

try:
content_length = get_content_length_from_headers(self.requestHeaders)
except Exception as e:
error_response_json = {
"errcode": Codes.UNKNOWN,
Copy link
Contributor

@MadLittleMods MadLittleMods Dec 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Optional: We could have our own errcode for the known scenarios.

IO.ELEMENT.INVALID_CONTENT_LENGTH, etc

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think using M_UNKNOWN here is more in the spirit of the spec https://spec.matrix.org/v1.16/client-server-api/#standard-error-response
We are trying to provide an HTTP error here since this is an HTTP issue, not a matrix endpoint issue. Assigning it a custom matrix errcode carries with it the implication that this is a matrix error.

From the spec:
When encountering the error code M_UNKNOWN, clients should prefer the HTTP status code as a more reliable reference for what the issue was.
if the client were to receive an error code of M_UNKNOWN with a 400 Bad Request, the client should assume that the request being made was invalid

Copy link
Contributor

@MadLittleMods MadLittleMods Dec 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that works. But I also think we could do better ⏩

By the same logic, M_TOO_LARGE would be inappropriate here then. Bottom-line is that we just need better error codes in the spec and that could start with our own namespaced versions which can be parsed by downstream code.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

M_TOO_LARGE happens due to an internal Synapse limit based on the size of transactions from the matrix spec (or config setting). So it's easy to argue that it's a matrix based error here, not a generic HTTP error.

"error": f"{str(e)}",
}
respond_with_error(HTTPStatus.BAD_REQUEST, error_response_json)
self.loseConnection()
return

if content_length is not None and self.content is not None:
if content_length < self.content.tell():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about the opposite case: content_length > self.content.tell()?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It wasn't clear to me from the spec that this is always an error.
But we can treat it that way and be spec compliant. So I have added it for completeness.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also refactored things now that we have so many comparisons to reduce duplication.

logger.info(
"Rejecting request from %s because Content-Length %d is smaller than the request content size %d: %s %s",
self.client,
content_length,
self.content.tell(),
self.get_method(),
self.get_redacted_uri(),
)

error_response_json = {
"errcode": Codes.UNKNOWN,
"error": "Request content is too small",
}
respond_with_error(HTTPStatus.BAD_REQUEST, error_response_json)
self.loseConnection()
return

if content_length > self._max_request_body_size:
logger.info(
"Rejecting request from %s because Content-Length %d exceeds maximum size %d: %s %s",
self.client,
content_length,
self._max_request_body_size,
self.get_method(),
self.get_redacted_uri(),
)

error_response_json = {
"errcode": Codes.TOO_LARGE,
"error": "Request content is too large",
}
respond_with_error(
HTTPStatus.REQUEST_ENTITY_TOO_LARGE, error_response_json
)
self.loseConnection()
return

# We're patching Twisted to bail/abort early when we see someone trying to upload
# `multipart/form-data` so we can avoid Twisted parsing the entire request body into
# in-memory (specific problem of this specific `Content-Type`). This protects us
# from an attacker uploading something bigger than the available RAM and crashing
# the server with a `MemoryError`, or carefully block just enough resources to cause
# all other requests to fail.
#
# FIXME: This can be removed once Twisted releases a fix and we update to a
# version that is patched
# See: https://github.com/element-hq/synapse/security/advisories/GHSA-rfq8-j7rh-8hf2
if command == b"POST":
ctype = self.requestHeaders.getRawHeaders(b"content-type")
if ctype and b"multipart/form-data" in ctype[0]:
self.method, self.uri = command, path
self.clientproto = version
logger.warning(
"Aborting connection from %s because `content-type: multipart/form-data` is unsupported: %s %s",
self.client,
self.get_method(),
self.get_redacted_uri(),
)

self.code = HTTPStatus.UNSUPPORTED_MEDIA_TYPE.value
self.code_message = bytes(
HTTPStatus.UNSUPPORTED_MEDIA_TYPE.phrase, "ascii"
)
self.responseHeaders.setRawHeaders(b"content-length", [b"0"])

logger.warning(
"Aborting connection from %s because `content-type: multipart/form-data` is unsupported: %s %s",
self.client,
command,
path,
)
# FIXME: Return a better error response here similar to the
# `error_response_json` returned in other code paths here.
self.responseHeaders.setRawHeaders(b"Content-Length", [b"0"])
self.write(b"")
self.loseConnection()
return
Expand Down
102 changes: 102 additions & 0 deletions tests/http/test_site.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from twisted.internet.address import IPv6Address
from twisted.internet.testing import MemoryReactor, StringTransport

from synapse.app._base import max_request_body_size
from synapse.app.homeserver import SynapseHomeServer
from synapse.server import HomeServer
from synapse.util.clock import Clock
Expand Down Expand Up @@ -143,3 +144,104 @@ def test_content_type_multipart(self) -> None:

# we should get a 415
self.assertRegex(transport.value().decode(), r"^HTTP/1\.1 415 ")

def test_content_length_too_large(self) -> None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should also have a test for oversized_length content but small false Content-Length header

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure what the best way to write a test for this is.
I did a bunch of testing and Synapse (Twisted) handles this by truncating the HTTP request at the specified size of the Content-Length. So the request will go through to the server correctly but the body will be silently truncated. The rest of the bytes are dropped.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(test hasn't been added yet)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did a bunch of testing and Synapse (Twisted) handles this by truncating the HTTP request at the specified size of the Content-Length. So the request will go through to the server correctly but the body will be silently truncated. The rest of the bytes are dropped.

Perhaps some request with a basic JSON body that will be cut-off and we expect M_NOT_JSON 🤷

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After more testing, it seems that Synapse doesn't care if a Content-Length is present and the value is smaller than the actual size of the content. It just deals with it.
The HTTP spec isn't exactly clear what is the correct thing to do in this case. And googling didn't give great results.
This section of the spec (see point 4) could be taken to mean we should be responding with 400 Bad Request in this case.

I can make that change here (to add the assertion).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alright - there is a test for this now. I added it in tests/test_server.py since I was able to get it working properly in there.

"""HTTP requests with Content-Length exceeding max size should be rejected with 413"""
self.hs.start_listening()

# find the HTTP server which is configured to listen on port 0
(port, factory, _backlog, interface) = self.reactor.tcpServers[0]
self.assertEqual(interface, "::")
self.assertEqual(port, 0)

# complete the connection and wire it up to a fake transport
client_address = IPv6Address("TCP", "::1", 2345)
protocol = factory.buildProtocol(client_address)
transport = StringTransport()
protocol.makeConnection(transport)

# Send a request with Content-Length header that exceeds the limit.
# Default max is 50MB (from media max_upload_size), so send something larger.
oversized_length = 1 + max_request_body_size(self.hs.config)
protocol.dataReceived(
b"POST / HTTP/1.1\r\n"
b"Connection: close\r\n"
b"Content-Length: " + str(oversized_length).encode() + b"\r\n"
b"\r\n"
b"" + b"x" * oversized_length + b"\r\n"
b"\r\n"
)

# Advance the reactor to process the request
while not transport.disconnecting:
self.reactor.advance(1)

# We should get a 413 Content Too Large
response = transport.value().decode()
self.assertRegex(response, r"^HTTP/1\.1 413 ")
self.assertSubstring("M_TOO_LARGE", response)

def test_too_many_content_length_headers(self) -> None:
"""HTTP requests with multiple Content-Length headers should be rejected with 400"""
self.hs.start_listening()

# find the HTTP server which is configured to listen on port 0
(port, factory, _backlog, interface) = self.reactor.tcpServers[0]
self.assertEqual(interface, "::")
self.assertEqual(port, 0)

# complete the connection and wire it up to a fake transport
client_address = IPv6Address("TCP", "::1", 2345)
protocol = factory.buildProtocol(client_address)
transport = StringTransport()
protocol.makeConnection(transport)

protocol.dataReceived(
b"POST / HTTP/1.1\r\n"
b"Connection: close\r\n"
b"Content-Length: " + str(5).encode() + b"\r\n"
b"Content-Length: " + str(5).encode() + b"\r\n"
b"\r\n"
b"" + b"xxxxx" + b"\r\n"
b"\r\n"
)

# Advance the reactor to process the request
while not transport.disconnecting:
self.reactor.advance(1)

# We should get a 400
response = transport.value().decode()
self.assertRegex(response, r"^HTTP/1\.1 400 ")

def test_invalid_content_length_headers(self) -> None:
"""HTTP requests with invalid Content-Length header should be rejected with 400"""
self.hs.start_listening()

# find the HTTP server which is configured to listen on port 0
(port, factory, _backlog, interface) = self.reactor.tcpServers[0]
self.assertEqual(interface, "::")
self.assertEqual(port, 0)

# complete the connection and wire it up to a fake transport
client_address = IPv6Address("TCP", "::1", 2345)
protocol = factory.buildProtocol(client_address)
transport = StringTransport()
protocol.makeConnection(transport)

protocol.dataReceived(
b"POST / HTTP/1.1\r\n"
b"Connection: close\r\n"
b"Content-Length: eight\r\n"
b"\r\n"
b"" + b"xxxxx" + b"\r\n"
b"\r\n"
)

# Advance the reactor to process the request
while not transport.disconnecting:
self.reactor.advance(1)

# We should get a 400
response = transport.value().decode()
self.assertRegex(response, r"^HTTP/1\.1 400 ")
6 changes: 0 additions & 6 deletions tests/rest/client/test_login.py
Original file line number Diff line number Diff line change
Expand Up @@ -1728,9 +1728,6 @@ def test_username_picker_use_displayname_avatar_and_email(self) -> None:
content_is_form=True,
custom_headers=[
("Cookie", "username_mapping_session=" + session_id),
# old versions of twisted don't do form-parsing without a valid
# content-length header.
("Content-Length", str(len(content))),
],
)
self.assertEqual(chan.code, 302, chan.result)
Expand Down Expand Up @@ -1818,9 +1815,6 @@ def test_username_picker_dont_use_displayname_avatar_or_email(self) -> None:
content_is_form=True,
custom_headers=[
("Cookie", "username_mapping_session=" + session_id),
# old versions of twisted don't do form-parsing without a valid
# content-length header.
("Content-Length", str(len(content))),
],
)
self.assertEqual(chan.code, 302, chan.result)
Expand Down
4 changes: 0 additions & 4 deletions tests/rest/client/test_media.py
Original file line number Diff line number Diff line change
Expand Up @@ -2590,7 +2590,6 @@ def test_authenticated_media(self) -> None:
self.tok,
shorthand=False,
content_type=b"image/png",
custom_headers=[("Content-Length", str(67))],
)
self.assertEqual(channel.code, 200)
res = channel.json_body.get("content_uri")
Expand Down Expand Up @@ -2750,7 +2749,6 @@ def test_authenticated_media_etag(self) -> None:
self.tok,
shorthand=False,
content_type=b"image/png",
custom_headers=[("Content-Length", str(67))],
)
self.assertEqual(channel.code, 200)
res = channel.json_body.get("content_uri")
Expand Down Expand Up @@ -2909,7 +2907,6 @@ def upload_media(self, size: int) -> FakeChannel:
access_token=self.tok,
shorthand=False,
content_type=b"text/plain",
custom_headers=[("Content-Length", str(size))],
)

def test_upload_under_limit(self) -> None:
Expand Down Expand Up @@ -3074,7 +3071,6 @@ def upload_media(self, size: int, tok: str) -> FakeChannel:
access_token=tok,
shorthand=False,
content_type=b"text/plain",
custom_headers=[("Content-Length", str(size))],
)

def test_upload_under_limit(self) -> None:
Expand Down
Loading
Loading