Skip to content

Commit 7e5a2d7

Browse files
[PR #11655/1e24afc9 backport][3.13] [PR #11580/d261f8a backport][3.14] Ensure that application/octet-stream is the default content_type (#11660)
**This is a backport of PR #11655 as merged into 3.14 (1e24afc).** (cherry picked from commit d261f8a) Co-authored-by: Samuel Gaist <[email protected]>
1 parent 9cb35e2 commit 7e5a2d7

File tree

6 files changed

+68
-12
lines changed

6 files changed

+68
-12
lines changed

CHANGES/10889.bugfix.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Updated ``Content-Type`` header parsing to return ``application/octet-stream`` when header contains invalid syntax.
2+
See :rfc:`9110#section-8.3-5`.
3+
4+
-- by :user:`sgaist`.

CONTRIBUTORS.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,7 @@ Roman Postnov
311311
Rong Zhang
312312
Samir Akarioh
313313
Samuel Colvin
314+
Samuel Gaist
314315
Sean Hunt
315316
Sebastian Acuna
316317
Sebastian Hanula

aiohttp/helpers.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@
1717
import weakref
1818
from collections import namedtuple
1919
from contextlib import suppress
20+
from email.message import EmailMessage
2021
from email.parser import HeaderParser
22+
from email.policy import HTTP
2123
from email.utils import parsedate
2224
from math import ceil
2325
from pathlib import Path
@@ -357,14 +359,40 @@ def parse_mimetype(mimetype: str) -> MimeType:
357359
)
358360

359361

362+
class EnsureOctetStream(EmailMessage):
363+
def __init__(self) -> None:
364+
super().__init__()
365+
# https://www.rfc-editor.org/rfc/rfc9110#section-8.3-5
366+
self.set_default_type("application/octet-stream")
367+
368+
def get_content_type(self) -> Any:
369+
"""Re-implementation from Message
370+
371+
Returns application/octet-stream in place of plain/text when
372+
value is wrong.
373+
374+
The way this class is used guarantees that content-type will
375+
be present so simplify the checks wrt to the base implementation.
376+
"""
377+
value = self.get("content-type", "").lower()
378+
379+
# Based on the implementation of _splitparam in the standard library
380+
ctype, _, _ = value.partition(";")
381+
ctype = ctype.strip()
382+
if ctype.count("/") != 1:
383+
return self.get_default_type()
384+
return ctype
385+
386+
360387
@functools.lru_cache(maxsize=56)
361388
def parse_content_type(raw: str) -> Tuple[str, MappingProxyType[str, str]]:
362389
"""Parse Content-Type header.
363390
364391
Returns a tuple of the parsed content type and a
365-
MappingProxyType of parameters.
392+
MappingProxyType of parameters. The default returned value
393+
is `application/octet-stream`
366394
"""
367-
msg = HeaderParser().parsestr(f"Content-Type: {raw}")
395+
msg = HeaderParser(EnsureOctetStream, policy=HTTP).parsestr(f"Content-Type: {raw}")
368396
content_type = msg.get_content_type()
369397
params = msg.get_params(())
370398
content_dict = dict(params[1:]) # First element is content type again

docs/client_reference.rst

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1566,16 +1566,14 @@ Response object
15661566

15671567
.. note::
15681568

1569-
Returns value is ``'application/octet-stream'`` if no
1570-
Content-Type header present in HTTP headers according to
1571-
:rfc:`9110`. If the *Content-Type* header is invalid (e.g., ``jpg``
1572-
instead of ``image/jpeg``), the value is ``text/plain`` by default
1573-
according to :rfc:`2045`. To see the original header check
1574-
``resp.headers['CONTENT-TYPE']``.
1569+
Returns ``'application/octet-stream'`` if no Content-Type header
1570+
is present or the value contains invalid syntax according to
1571+
:rfc:`9110`. To see the original header check
1572+
``resp.headers["Content-Type"]``.
15751573

15761574
To make sure Content-Type header is not present in
15771575
the server reply, use :attr:`headers` or :attr:`raw_headers`, e.g.
1578-
``'CONTENT-TYPE' not in resp.headers``.
1576+
``'Content-Type' not in resp.headers``.
15791577

15801578
.. attribute:: charset
15811579

tests/test_helpers.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,12 @@
66
import weakref
77
from math import ceil, modf
88
from pathlib import Path
9+
from types import MappingProxyType
910
from unittest import mock
1011
from urllib.request import getproxies_environment
1112

1213
import pytest
13-
from multidict import MultiDict
14+
from multidict import MultiDict, MultiDictProxy
1415
from yarl import URL
1516

1617
from aiohttp import helpers
@@ -65,6 +66,30 @@ def test_parse_mimetype(mimetype, expected) -> None:
6566
assert result == expected
6667

6768

69+
# ------------------- parse_content_type ------------------------------
70+
71+
72+
@pytest.mark.parametrize(
73+
"content_type, expected",
74+
[
75+
(
76+
"text/plain",
77+
("text/plain", MultiDictProxy(MultiDict())),
78+
),
79+
(
80+
"wrong",
81+
("application/octet-stream", MultiDictProxy(MultiDict())),
82+
),
83+
],
84+
)
85+
def test_parse_content_type(
86+
content_type: str, expected: tuple[str, MappingProxyType[str, str]]
87+
) -> None:
88+
result = helpers.parse_content_type(content_type)
89+
90+
assert result == expected
91+
92+
6893
# ------------------- guess_filename ----------------------------------
6994

7095

tests/test_web_response.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1164,10 +1164,10 @@ def test_ctor_content_type_with_extra() -> None:
11641164
assert resp.headers["content-type"] == "text/plain; version=0.0.4; charset=utf-8"
11651165

11661166

1167-
def test_invalid_content_type_parses_to_text_plain() -> None:
1167+
def test_invalid_content_type_parses_to_application_octect_stream() -> None:
11681168
resp = Response(text="test test", content_type="jpeg")
11691169

1170-
assert resp.content_type == "text/plain"
1170+
assert resp.content_type == "application/octet-stream"
11711171
assert resp.headers["content-type"] == "jpeg; charset=utf-8"
11721172

11731173

0 commit comments

Comments
 (0)