|
| 1 | +""" |
| 2 | +Internal cookie handling helpers. |
| 3 | +
|
| 4 | +This module contains internal utilities for cookie parsing and manipulation. |
| 5 | +These are not part of the public API and may change without notice. |
| 6 | +""" |
| 7 | + |
| 8 | +import re |
| 9 | +import sys |
| 10 | +from http.cookies import Morsel |
| 11 | +from typing import List, Optional, Sequence, Tuple, cast |
| 12 | + |
| 13 | +from .log import internal_logger |
| 14 | + |
| 15 | +__all__ = ("parse_cookie_headers", "preserve_morsel_with_coded_value") |
| 16 | + |
| 17 | +# Cookie parsing constants |
| 18 | +# Allow more characters in cookie names to handle real-world cookies |
| 19 | +# that don't strictly follow RFC standards (fixes #2683) |
| 20 | +# RFC 6265 defines cookie-name token as per RFC 2616 Section 2.2, |
| 21 | +# but many servers send cookies with characters like {} [] () etc. |
| 22 | +# This makes the cookie parser more tolerant of real-world cookies |
| 23 | +# while still providing some validation to catch obviously malformed names. |
| 24 | +_COOKIE_NAME_RE = re.compile(r"^[!#$%&\'()*+\-./0-9:<=>?@A-Z\[\]^_`a-z{|}~]+$") |
| 25 | +_COOKIE_KNOWN_ATTRS = frozenset( # AKA Morsel._reserved |
| 26 | + ( |
| 27 | + "path", |
| 28 | + "domain", |
| 29 | + "max-age", |
| 30 | + "expires", |
| 31 | + "secure", |
| 32 | + "httponly", |
| 33 | + "samesite", |
| 34 | + "partitioned", |
| 35 | + "version", |
| 36 | + "comment", |
| 37 | + ) |
| 38 | +) |
| 39 | +_COOKIE_BOOL_ATTRS = frozenset( # AKA Morsel._flags |
| 40 | + ("secure", "httponly", "partitioned") |
| 41 | +) |
| 42 | + |
| 43 | +# SimpleCookie's pattern for parsing cookies with relaxed validation |
| 44 | +# Based on http.cookies pattern but extended to allow more characters in cookie names |
| 45 | +# to handle real-world cookies (fixes #2683) |
| 46 | +_COOKIE_PATTERN = re.compile( |
| 47 | + r""" |
| 48 | + \s* # Optional whitespace at start of cookie |
| 49 | + (?P<key> # Start of group 'key' |
| 50 | + # aiohttp has extended to include [] for compatibility with real-world cookies |
| 51 | + [\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=\[\]]+? # Any word of at least one letter |
| 52 | + ) # End of group 'key' |
| 53 | + ( # Optional group: there may not be a value. |
| 54 | + \s*=\s* # Equal Sign |
| 55 | + (?P<val> # Start of group 'val' |
| 56 | + "(?:[^\\"]|\\.)*" # Any double-quoted string (properly closed) |
| 57 | + | # or |
| 58 | + "[^";]* # Unmatched opening quote (differs from SimpleCookie - issue #7993) |
| 59 | + | # or |
| 60 | + # Special case for "expires" attr - RFC 822, RFC 850, RFC 1036, RFC 1123 |
| 61 | + (\w{3,6}day|\w{3}),\s # Day of the week or abbreviated day (with comma) |
| 62 | + [\w\d\s-]{9,11}\s[\d:]{8}\s # Date and time in specific format |
| 63 | + (GMT|[+-]\d{4}) # Timezone: GMT or RFC 2822 offset like -0000, +0100 |
| 64 | + # NOTE: RFC 2822 timezone support is an aiohttp extension |
| 65 | + # for issue #4493 - SimpleCookie does NOT support this |
| 66 | + | # or |
| 67 | + # ANSI C asctime() format: "Wed Jun 9 10:18:14 2021" |
| 68 | + # NOTE: This is an aiohttp extension for issue #4327 - SimpleCookie does NOT support this format |
| 69 | + \w{3}\s+\w{3}\s+[\s\d]\d\s+\d{2}:\d{2}:\d{2}\s+\d{4} |
| 70 | + | # or |
| 71 | + [\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=\[\]]* # Any word or empty string |
| 72 | + ) # End of group 'val' |
| 73 | + )? # End of optional value group |
| 74 | + \s* # Any number of spaces. |
| 75 | + (\s+|;|$) # Ending either at space, semicolon, or EOS. |
| 76 | + """, |
| 77 | + re.VERBOSE | re.ASCII, |
| 78 | +) |
| 79 | + |
| 80 | + |
| 81 | +def preserve_morsel_with_coded_value(cookie: Morsel[str]) -> Morsel[str]: |
| 82 | + """ |
| 83 | + Preserve a Morsel's coded_value exactly as received from the server. |
| 84 | +
|
| 85 | + This function ensures that cookie encoding is preserved exactly as sent by |
| 86 | + the server, which is critical for compatibility with old servers that have |
| 87 | + strict requirements about cookie formats. |
| 88 | +
|
| 89 | + This addresses the issue described in https://github.com/aio-libs/aiohttp/pull/1453 |
| 90 | + where Python's SimpleCookie would re-encode cookies, breaking authentication |
| 91 | + with certain servers. |
| 92 | +
|
| 93 | + Args: |
| 94 | + cookie: A Morsel object from SimpleCookie |
| 95 | +
|
| 96 | + Returns: |
| 97 | + A Morsel object with preserved coded_value |
| 98 | +
|
| 99 | + """ |
| 100 | + mrsl_val = cast("Morsel[str]", cookie.get(cookie.key, Morsel())) |
| 101 | + # We use __setstate__ instead of the public set() API because it allows us to |
| 102 | + # bypass validation and set already validated state. This is more stable than |
| 103 | + # setting protected attributes directly and unlikely to change since it would |
| 104 | + # break pickling. |
| 105 | + mrsl_val.__setstate__( # type: ignore[attr-defined] |
| 106 | + {"key": cookie.key, "value": cookie.value, "coded_value": cookie.coded_value} |
| 107 | + ) |
| 108 | + return mrsl_val |
| 109 | + |
| 110 | + |
| 111 | +def _unquote(text: str) -> str: |
| 112 | + """ |
| 113 | + Unquote a cookie value. |
| 114 | +
|
| 115 | + Vendored from http.cookies._unquote to ensure compatibility. |
| 116 | + """ |
| 117 | + # If there are no quotes, return as-is |
| 118 | + if len(text) < 2 or text[0] != '"' or text[-1] != '"': |
| 119 | + return text |
| 120 | + # Remove quotes and handle escaped characters |
| 121 | + text = text[1:-1] |
| 122 | + # Replace escaped quotes and backslashes |
| 123 | + text = text.replace('\\"', '"').replace("\\\\", "\\") |
| 124 | + return text |
| 125 | + |
| 126 | + |
| 127 | +def parse_cookie_headers(headers: Sequence[str]) -> List[Tuple[str, Morsel[str]]]: |
| 128 | + """ |
| 129 | + Parse cookie headers using a vendored version of SimpleCookie parsing. |
| 130 | +
|
| 131 | + This implementation is based on SimpleCookie.__parse_string to ensure |
| 132 | + compatibility with how SimpleCookie parses cookies, including handling |
| 133 | + of malformed cookies with missing semicolons. |
| 134 | +
|
| 135 | + This function is used for both Cookie and Set-Cookie headers in order to be |
| 136 | + forgiving. Ideally we would have followed RFC 6265 Section 5.2 (for Cookie |
| 137 | + headers) and RFC 6265 Section 4.2.1 (for Set-Cookie headers), but the |
| 138 | + real world data makes it impossible since we need to be a bit more forgiving. |
| 139 | +
|
| 140 | + NOTE: This implementation differs from SimpleCookie in handling unmatched quotes. |
| 141 | + SimpleCookie will stop parsing when it encounters a cookie value with an unmatched |
| 142 | + quote (e.g., 'cookie="value'), causing subsequent cookies to be silently dropped. |
| 143 | + This implementation handles unmatched quotes more gracefully to prevent cookie loss. |
| 144 | + See https://github.com/aio-libs/aiohttp/issues/7993 |
| 145 | + """ |
| 146 | + parsed_cookies: List[Tuple[str, Morsel[str]]] = [] |
| 147 | + |
| 148 | + for header in headers: |
| 149 | + if not header: |
| 150 | + continue |
| 151 | + |
| 152 | + # Parse cookie string using SimpleCookie's algorithm |
| 153 | + i = 0 |
| 154 | + n = len(header) |
| 155 | + current_morsel: Optional[Morsel[str]] = None |
| 156 | + morsel_seen = False |
| 157 | + |
| 158 | + while 0 <= i < n: |
| 159 | + # Start looking for a cookie |
| 160 | + match = _COOKIE_PATTERN.match(header, i) |
| 161 | + if not match: |
| 162 | + # No more cookies |
| 163 | + break |
| 164 | + |
| 165 | + key, value = match.group("key"), match.group("val") |
| 166 | + i = match.end(0) |
| 167 | + lower_key = key.lower() |
| 168 | + |
| 169 | + if key[0] == "$": |
| 170 | + if not morsel_seen: |
| 171 | + # We ignore attributes which pertain to the cookie |
| 172 | + # mechanism as a whole, such as "$Version". |
| 173 | + continue |
| 174 | + # Process as attribute |
| 175 | + if current_morsel is not None: |
| 176 | + attr_lower_key = lower_key[1:] |
| 177 | + if attr_lower_key in _COOKIE_KNOWN_ATTRS: |
| 178 | + current_morsel[attr_lower_key] = value or "" |
| 179 | + elif lower_key in _COOKIE_KNOWN_ATTRS: |
| 180 | + if not morsel_seen: |
| 181 | + # Invalid cookie string - attribute before cookie |
| 182 | + break |
| 183 | + if lower_key in _COOKIE_BOOL_ATTRS: |
| 184 | + # Boolean attribute with any value should be True |
| 185 | + if current_morsel is not None: |
| 186 | + if lower_key == "partitioned" and sys.version_info < (3, 14): |
| 187 | + dict.__setitem__(current_morsel, lower_key, True) |
| 188 | + else: |
| 189 | + current_morsel[lower_key] = True |
| 190 | + elif value is None: |
| 191 | + # Invalid cookie string - non-boolean attribute without value |
| 192 | + break |
| 193 | + elif current_morsel is not None: |
| 194 | + # Regular attribute with value |
| 195 | + current_morsel[lower_key] = _unquote(value) |
| 196 | + elif value is not None: |
| 197 | + # This is a cookie name=value pair |
| 198 | + # Validate the name |
| 199 | + if key in _COOKIE_KNOWN_ATTRS or not _COOKIE_NAME_RE.match(key): |
| 200 | + internal_logger.warning( |
| 201 | + "Can not load cookies: Illegal cookie name %r", key |
| 202 | + ) |
| 203 | + current_morsel = None |
| 204 | + else: |
| 205 | + # Create new morsel |
| 206 | + current_morsel = Morsel() |
| 207 | + # Preserve the original value as coded_value (with quotes if present) |
| 208 | + # We use __setstate__ instead of the public set() API because it allows us to |
| 209 | + # bypass validation and set already validated state. This is more stable than |
| 210 | + # setting protected attributes directly and unlikely to change since it would |
| 211 | + # break pickling. |
| 212 | + current_morsel.__setstate__( # type: ignore[attr-defined] |
| 213 | + {"key": key, "value": _unquote(value), "coded_value": value} |
| 214 | + ) |
| 215 | + parsed_cookies.append((key, current_morsel)) |
| 216 | + morsel_seen = True |
| 217 | + else: |
| 218 | + # Invalid cookie string - no value for non-attribute |
| 219 | + break |
| 220 | + |
| 221 | + return parsed_cookies |
0 commit comments