Skip to content

Commit 741cb61

Browse files
authored
PR #11112/8edec63 backport][3.12] Fix cookie parsing issues (#11117)
1 parent a57ff76 commit 741cb61

16 files changed

+1769
-65
lines changed

CHANGES/11112.bugfix.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
Fixed cookie parsing to be more lenient when handling cookies with special characters
2+
in names or values. Cookies with characters like ``{``, ``}``, and ``/`` in names are now
3+
accepted instead of causing a :exc:`~http.cookies.CookieError` and 500 errors. Additionally,
4+
cookies with mismatched quotes in values are now parsed correctly, and quoted cookie
5+
values are now handled consistently whether or not they include special attributes
6+
like ``Domain``. Also fixed :class:`~aiohttp.CookieJar` to ensure shared cookies (domain="", path="")
7+
respect the ``quote_cookie`` parameter, making cookie quoting behavior consistent for
8+
all cookies -- by :user:`bdraco`.

CHANGES/2683.bugfix.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
11112.bugfix.rst

CHANGES/5397.bugfix.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
11112.bugfix.rst

CHANGES/7993.bugfix.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
11112.bugfix.rst

aiohttp/_cookie_helpers.py

Lines changed: 221 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
"""
2+
Internal cookie handling helpers.
3+
4+
This module contains internal utilities for cookie parsing and manipulation.
5+
These are not part of the public API and may change without notice.
6+
"""
7+
8+
import re
9+
import sys
10+
from http.cookies import Morsel
11+
from typing import List, Optional, Sequence, Tuple, cast
12+
13+
from .log import internal_logger
14+
15+
__all__ = ("parse_cookie_headers", "preserve_morsel_with_coded_value")
16+
17+
# Cookie parsing constants
18+
# Allow more characters in cookie names to handle real-world cookies
19+
# that don't strictly follow RFC standards (fixes #2683)
20+
# RFC 6265 defines cookie-name token as per RFC 2616 Section 2.2,
21+
# but many servers send cookies with characters like {} [] () etc.
22+
# This makes the cookie parser more tolerant of real-world cookies
23+
# while still providing some validation to catch obviously malformed names.
24+
_COOKIE_NAME_RE = re.compile(r"^[!#$%&\'()*+\-./0-9:<=>?@A-Z\[\]^_`a-z{|}~]+$")
25+
_COOKIE_KNOWN_ATTRS = frozenset( # AKA Morsel._reserved
26+
(
27+
"path",
28+
"domain",
29+
"max-age",
30+
"expires",
31+
"secure",
32+
"httponly",
33+
"samesite",
34+
"partitioned",
35+
"version",
36+
"comment",
37+
)
38+
)
39+
_COOKIE_BOOL_ATTRS = frozenset( # AKA Morsel._flags
40+
("secure", "httponly", "partitioned")
41+
)
42+
43+
# SimpleCookie's pattern for parsing cookies with relaxed validation
44+
# Based on http.cookies pattern but extended to allow more characters in cookie names
45+
# to handle real-world cookies (fixes #2683)
46+
_COOKIE_PATTERN = re.compile(
47+
r"""
48+
\s* # Optional whitespace at start of cookie
49+
(?P<key> # Start of group 'key'
50+
# aiohttp has extended to include [] for compatibility with real-world cookies
51+
[\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=\[\]]+? # Any word of at least one letter
52+
) # End of group 'key'
53+
( # Optional group: there may not be a value.
54+
\s*=\s* # Equal Sign
55+
(?P<val> # Start of group 'val'
56+
"(?:[^\\"]|\\.)*" # Any double-quoted string (properly closed)
57+
| # or
58+
"[^";]* # Unmatched opening quote (differs from SimpleCookie - issue #7993)
59+
| # or
60+
# Special case for "expires" attr - RFC 822, RFC 850, RFC 1036, RFC 1123
61+
(\w{3,6}day|\w{3}),\s # Day of the week or abbreviated day (with comma)
62+
[\w\d\s-]{9,11}\s[\d:]{8}\s # Date and time in specific format
63+
(GMT|[+-]\d{4}) # Timezone: GMT or RFC 2822 offset like -0000, +0100
64+
# NOTE: RFC 2822 timezone support is an aiohttp extension
65+
# for issue #4493 - SimpleCookie does NOT support this
66+
| # or
67+
# ANSI C asctime() format: "Wed Jun 9 10:18:14 2021"
68+
# NOTE: This is an aiohttp extension for issue #4327 - SimpleCookie does NOT support this format
69+
\w{3}\s+\w{3}\s+[\s\d]\d\s+\d{2}:\d{2}:\d{2}\s+\d{4}
70+
| # or
71+
[\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=\[\]]* # Any word or empty string
72+
) # End of group 'val'
73+
)? # End of optional value group
74+
\s* # Any number of spaces.
75+
(\s+|;|$) # Ending either at space, semicolon, or EOS.
76+
""",
77+
re.VERBOSE | re.ASCII,
78+
)
79+
80+
81+
def preserve_morsel_with_coded_value(cookie: Morsel[str]) -> Morsel[str]:
82+
"""
83+
Preserve a Morsel's coded_value exactly as received from the server.
84+
85+
This function ensures that cookie encoding is preserved exactly as sent by
86+
the server, which is critical for compatibility with old servers that have
87+
strict requirements about cookie formats.
88+
89+
This addresses the issue described in https://github.com/aio-libs/aiohttp/pull/1453
90+
where Python's SimpleCookie would re-encode cookies, breaking authentication
91+
with certain servers.
92+
93+
Args:
94+
cookie: A Morsel object from SimpleCookie
95+
96+
Returns:
97+
A Morsel object with preserved coded_value
98+
99+
"""
100+
mrsl_val = cast("Morsel[str]", cookie.get(cookie.key, Morsel()))
101+
# We use __setstate__ instead of the public set() API because it allows us to
102+
# bypass validation and set already validated state. This is more stable than
103+
# setting protected attributes directly and unlikely to change since it would
104+
# break pickling.
105+
mrsl_val.__setstate__( # type: ignore[attr-defined]
106+
{"key": cookie.key, "value": cookie.value, "coded_value": cookie.coded_value}
107+
)
108+
return mrsl_val
109+
110+
111+
def _unquote(text: str) -> str:
112+
"""
113+
Unquote a cookie value.
114+
115+
Vendored from http.cookies._unquote to ensure compatibility.
116+
"""
117+
# If there are no quotes, return as-is
118+
if len(text) < 2 or text[0] != '"' or text[-1] != '"':
119+
return text
120+
# Remove quotes and handle escaped characters
121+
text = text[1:-1]
122+
# Replace escaped quotes and backslashes
123+
text = text.replace('\\"', '"').replace("\\\\", "\\")
124+
return text
125+
126+
127+
def parse_cookie_headers(headers: Sequence[str]) -> List[Tuple[str, Morsel[str]]]:
128+
"""
129+
Parse cookie headers using a vendored version of SimpleCookie parsing.
130+
131+
This implementation is based on SimpleCookie.__parse_string to ensure
132+
compatibility with how SimpleCookie parses cookies, including handling
133+
of malformed cookies with missing semicolons.
134+
135+
This function is used for both Cookie and Set-Cookie headers in order to be
136+
forgiving. Ideally we would have followed RFC 6265 Section 5.2 (for Cookie
137+
headers) and RFC 6265 Section 4.2.1 (for Set-Cookie headers), but the
138+
real world data makes it impossible since we need to be a bit more forgiving.
139+
140+
NOTE: This implementation differs from SimpleCookie in handling unmatched quotes.
141+
SimpleCookie will stop parsing when it encounters a cookie value with an unmatched
142+
quote (e.g., 'cookie="value'), causing subsequent cookies to be silently dropped.
143+
This implementation handles unmatched quotes more gracefully to prevent cookie loss.
144+
See https://github.com/aio-libs/aiohttp/issues/7993
145+
"""
146+
parsed_cookies: List[Tuple[str, Morsel[str]]] = []
147+
148+
for header in headers:
149+
if not header:
150+
continue
151+
152+
# Parse cookie string using SimpleCookie's algorithm
153+
i = 0
154+
n = len(header)
155+
current_morsel: Optional[Morsel[str]] = None
156+
morsel_seen = False
157+
158+
while 0 <= i < n:
159+
# Start looking for a cookie
160+
match = _COOKIE_PATTERN.match(header, i)
161+
if not match:
162+
# No more cookies
163+
break
164+
165+
key, value = match.group("key"), match.group("val")
166+
i = match.end(0)
167+
lower_key = key.lower()
168+
169+
if key[0] == "$":
170+
if not morsel_seen:
171+
# We ignore attributes which pertain to the cookie
172+
# mechanism as a whole, such as "$Version".
173+
continue
174+
# Process as attribute
175+
if current_morsel is not None:
176+
attr_lower_key = lower_key[1:]
177+
if attr_lower_key in _COOKIE_KNOWN_ATTRS:
178+
current_morsel[attr_lower_key] = value or ""
179+
elif lower_key in _COOKIE_KNOWN_ATTRS:
180+
if not morsel_seen:
181+
# Invalid cookie string - attribute before cookie
182+
break
183+
if lower_key in _COOKIE_BOOL_ATTRS:
184+
# Boolean attribute with any value should be True
185+
if current_morsel is not None:
186+
if lower_key == "partitioned" and sys.version_info < (3, 14):
187+
dict.__setitem__(current_morsel, lower_key, True)
188+
else:
189+
current_morsel[lower_key] = True
190+
elif value is None:
191+
# Invalid cookie string - non-boolean attribute without value
192+
break
193+
elif current_morsel is not None:
194+
# Regular attribute with value
195+
current_morsel[lower_key] = _unquote(value)
196+
elif value is not None:
197+
# This is a cookie name=value pair
198+
# Validate the name
199+
if key in _COOKIE_KNOWN_ATTRS or not _COOKIE_NAME_RE.match(key):
200+
internal_logger.warning(
201+
"Can not load cookies: Illegal cookie name %r", key
202+
)
203+
current_morsel = None
204+
else:
205+
# Create new morsel
206+
current_morsel = Morsel()
207+
# Preserve the original value as coded_value (with quotes if present)
208+
# We use __setstate__ instead of the public set() API because it allows us to
209+
# bypass validation and set already validated state. This is more stable than
210+
# setting protected attributes directly and unlikely to change since it would
211+
# break pickling.
212+
current_morsel.__setstate__( # type: ignore[attr-defined]
213+
{"key": key, "value": _unquote(value), "coded_value": value}
214+
)
215+
parsed_cookies.append((key, current_morsel))
216+
morsel_seen = True
217+
else:
218+
# Invalid cookie string - no value for non-attribute
219+
break
220+
221+
return parsed_cookies

aiohttp/abc.py

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import socket
44
from abc import ABC, abstractmethod
55
from collections.abc import Sized
6-
from http.cookies import BaseCookie, CookieError, Morsel, SimpleCookie
6+
from http.cookies import BaseCookie, Morsel
77
from typing import (
88
TYPE_CHECKING,
99
Any,
@@ -23,7 +23,7 @@
2323
from multidict import CIMultiDict
2424
from yarl import URL
2525

26-
from .log import client_logger
26+
from ._cookie_helpers import parse_cookie_headers
2727
from .typedefs import LooseCookies
2828

2929
if TYPE_CHECKING:
@@ -197,26 +197,8 @@ def update_cookies(self, cookies: LooseCookies, response_url: URL = URL()) -> No
197197
def update_cookies_from_headers(
198198
self, headers: Sequence[str], response_url: URL
199199
) -> None:
200-
"""
201-
Update cookies from raw Set-Cookie headers.
202-
203-
Default implementation parses each header separately to preserve
204-
cookies with same name but different domain/path.
205-
"""
206-
# Default implementation for backward compatibility
207-
cookies_to_update: List[Tuple[str, Morsel[str]]] = []
208-
for cookie_header in headers:
209-
tmp_cookie = SimpleCookie()
210-
try:
211-
tmp_cookie.load(cookie_header)
212-
# Collect all cookies as tuples (name, morsel)
213-
for name, morsel in tmp_cookie.items():
214-
cookies_to_update.append((name, morsel))
215-
except CookieError as exc:
216-
client_logger.warning("Can not load response cookies: %s", exc)
217-
218-
# Update all cookies at once for efficiency
219-
if cookies_to_update:
200+
"""Update cookies from raw Set-Cookie headers."""
201+
if headers and (cookies_to_update := parse_cookie_headers(headers)):
220202
self.update_cookies(cookies_to_update, response_url)
221203

222204
@abstractmethod

aiohttp/client_reqrep.py

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import warnings
1010
from collections.abc import Mapping
1111
from hashlib import md5, sha1, sha256
12-
from http.cookies import CookieError, Morsel, SimpleCookie
12+
from http.cookies import Morsel, SimpleCookie
1313
from types import MappingProxyType, TracebackType
1414
from typing import (
1515
TYPE_CHECKING,
@@ -31,6 +31,7 @@
3131
from yarl import URL
3232

3333
from . import hdrs, helpers, http, multipart, payload
34+
from ._cookie_helpers import parse_cookie_headers, preserve_morsel_with_coded_value
3435
from .abc import AbstractStreamWriter
3536
from .client_exceptions import (
3637
ClientConnectionError,
@@ -62,7 +63,6 @@
6263
HttpVersion11,
6364
StreamWriter,
6465
)
65-
from .log import client_logger
6666
from .streams import StreamReader
6767
from .typedefs import (
6868
DEFAULT_JSON_DECODER,
@@ -376,11 +376,9 @@ def cookies(self) -> SimpleCookie:
376376
if self._raw_cookie_headers is not None:
377377
# Parse cookies for response.cookies (SimpleCookie for backward compatibility)
378378
cookies = SimpleCookie()
379-
for hdr in self._raw_cookie_headers:
380-
try:
381-
cookies.load(hdr)
382-
except CookieError as exc:
383-
client_logger.warning("Can not load response cookies: %s", exc)
379+
# Use parse_cookie_headers for more lenient parsing that handles
380+
# malformed cookies better than SimpleCookie.load
381+
cookies.update(parse_cookie_headers(self._raw_cookie_headers))
384382
self._cookies = cookies
385383
else:
386384
self._cookies = SimpleCookie()
@@ -1095,7 +1093,8 @@ def update_cookies(self, cookies: Optional[LooseCookies]) -> None:
10951093

10961094
c = SimpleCookie()
10971095
if hdrs.COOKIE in self.headers:
1098-
c.load(self.headers.get(hdrs.COOKIE, ""))
1096+
# parse_cookie_headers already preserves coded values
1097+
c.update(parse_cookie_headers((self.headers.get(hdrs.COOKIE, ""),)))
10991098
del self.headers[hdrs.COOKIE]
11001099

11011100
if isinstance(cookies, Mapping):
@@ -1104,10 +1103,8 @@ def update_cookies(self, cookies: Optional[LooseCookies]) -> None:
11041103
iter_cookies = cookies # type: ignore[assignment]
11051104
for name, value in iter_cookies:
11061105
if isinstance(value, Morsel):
1107-
# Preserve coded_value
1108-
mrsl_val = value.get(value.key, Morsel())
1109-
mrsl_val.set(value.key, value.value, value.coded_value)
1110-
c[name] = mrsl_val
1106+
# Use helper to preserve coded_value exactly as sent by server
1107+
c[name] = preserve_morsel_with_coded_value(value)
11111108
else:
11121109
c[name] = value # type: ignore[assignment]
11131110

0 commit comments

Comments
 (0)