[PR #11655/1e24afc9 backport][3.13] [PR #11580/d261f8a backport][3.14] Ensure that application/octet-stream is the default content_type (#11660)

patchback[bot] · sgaist · web-flow · commit 7e5a2d7b0c6d · 2025-10-15T14:49:37.000+01:00
**This is a backport of PR #11655 as merged into 3.14 (1e24afc).** (cherry picked from commit d261f8a) Co-authored-by: Samuel Gaist <samuel.gaist@idiap.ch>
diff --git a/CHANGES/10889.bugfix.rst b/CHANGES/10889.bugfix.rst
@@ -0,0 +1,4 @@
+Updated ``Content-Type`` header parsing to return ``application/octet-stream`` when header contains invalid syntax.
+See :rfc:`9110#section-8.3-5`.
+
+-- by :user:`sgaist`.
diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt
@@ -311,6 +311,7 @@ Roman Postnov
 Rong Zhang
 Samir Akarioh
 Samuel Colvin
+Samuel Gaist
 Sean Hunt
 Sebastian Acuna
 Sebastian Hanula
diff --git a/aiohttp/helpers.py b/aiohttp/helpers.py
@@ -17,7 +17,9 @@
 import weakref
 from collections import namedtuple
 from contextlib import suppress
+from email.message import EmailMessage
 from email.parser import HeaderParser
+from email.policy import HTTP
 from email.utils import parsedate
 from math import ceil
 from pathlib import Path
@@ -357,14 +359,40 @@ def parse_mimetype(mimetype: str) -> MimeType:
     )
 
 
+class EnsureOctetStream(EmailMessage):
+    def __init__(self) -> None:
+        super().__init__()
+        # https://www.rfc-editor.org/rfc/rfc9110#section-8.3-5
+        self.set_default_type("application/octet-stream")
+
+    def get_content_type(self) -> Any:
+        """Re-implementation from Message
+
+        Returns application/octet-stream in place of plain/text when
+        value is wrong.
+
+        The way this class is used guarantees that content-type will
+        be present so simplify the checks wrt to the base implementation.
+        """
+        value = self.get("content-type", "").lower()
+
+        # Based on the implementation of _splitparam in the standard library
+        ctype, _, _ = value.partition(";")
+        ctype = ctype.strip()
+        if ctype.count("/") != 1:
+            return self.get_default_type()
+        return ctype
+
+
 @functools.lru_cache(maxsize=56)
 def parse_content_type(raw: str) -> Tuple[str, MappingProxyType[str, str]]:
     """Parse Content-Type header.
 
     Returns a tuple of the parsed content type and a
-    MappingProxyType of parameters.
+    MappingProxyType of parameters. The default returned value
+    is `application/octet-stream`
     """
-    msg = HeaderParser().parsestr(f"Content-Type: {raw}")
+    msg = HeaderParser(EnsureOctetStream, policy=HTTP).parsestr(f"Content-Type: {raw}")
     content_type = msg.get_content_type()
     params = msg.get_params(())
     content_dict = dict(params[1:])  # First element is content type again
diff --git a/docs/client_reference.rst b/docs/client_reference.rst
@@ -1566,16 +1566,14 @@ Response object
 
       .. note::
 
-         Returns value is ``'application/octet-stream'`` if no
-         Content-Type header present in HTTP headers according to
-         :rfc:`9110`. If the *Content-Type* header is invalid (e.g., ``jpg``
-         instead of ``image/jpeg``), the value is ``text/plain`` by default
-         according to :rfc:`2045`. To see the original header check
-         ``resp.headers['CONTENT-TYPE']``.
+         Returns ``'application/octet-stream'`` if no Content-Type header
+         is present or the value contains invalid syntax according to
+         :rfc:`9110`. To see the original header check
+         ``resp.headers["Content-Type"]``.
 
          To make sure Content-Type header is not present in
          the server reply, use :attr:`headers` or :attr:`raw_headers`, e.g.
-         ``'CONTENT-TYPE' not in resp.headers``.
+         ``'Content-Type' not in resp.headers``.
 
    .. attribute:: charset
 
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
@@ -6,11 +6,12 @@
 import weakref
 from math import ceil, modf
 from pathlib import Path
+from types import MappingProxyType
 from unittest import mock
 from urllib.request import getproxies_environment
 
 import pytest
-from multidict import MultiDict
+from multidict import MultiDict, MultiDictProxy
 from yarl import URL
 
 from aiohttp import helpers
@@ -65,6 +66,30 @@ def test_parse_mimetype(mimetype, expected) -> None:
     assert result == expected
 
 
+# ------------------- parse_content_type ------------------------------
+
+
+@pytest.mark.parametrize(
+    "content_type, expected",
+    [
+        (
+            "text/plain",
+            ("text/plain", MultiDictProxy(MultiDict())),
+        ),
+        (
+            "wrong",
+            ("application/octet-stream", MultiDictProxy(MultiDict())),
+        ),
+    ],
+)
+def test_parse_content_type(
+    content_type: str, expected: tuple[str, MappingProxyType[str, str]]
+) -> None:
+    result = helpers.parse_content_type(content_type)
+
+    assert result == expected
+
+
 # ------------------- guess_filename ----------------------------------
 
 
diff --git a/tests/test_web_response.py b/tests/test_web_response.py
@@ -1164,10 +1164,10 @@ def test_ctor_content_type_with_extra() -> None:
     assert resp.headers["content-type"] == "text/plain; version=0.0.4; charset=utf-8"
 
 
-def test_invalid_content_type_parses_to_text_plain() -> None:
+def test_invalid_content_type_parses_to_application_octect_stream() -> None:
     resp = Response(text="test test", content_type="jpeg")
 
-    assert resp.content_type == "text/plain"
+    assert resp.content_type == "application/octet-stream"
     assert resp.headers["content-type"] == "jpeg; charset=utf-8"