Skip to content

Commit d5c12ba

Browse files
[PR #7661/85713a48 backport][3.8] Update Python parser for RFCs 9110/9112 (#7662)
**This is a backport of PR #7661 as merged into 3.9 (85713a4).** None Co-authored-by: Sam Bull <[email protected]>
1 parent 8a3977a commit d5c12ba

File tree

2 files changed

+131
-40
lines changed

2 files changed

+131
-40
lines changed

aiohttp/http_parser.py

Lines changed: 49 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -60,16 +60,16 @@
6060

6161
ASCIISET: Final[Set[str]] = set(string.printable)
6262

63-
# See https://tools.ietf.org/html/rfc7230#section-3.1.1
64-
# and https://tools.ietf.org/html/rfc7230#appendix-B
63+
# See https://www.rfc-editor.org/rfc/rfc9110.html#name-overview
64+
# and https://www.rfc-editor.org/rfc/rfc9110.html#name-tokens
6565
#
6666
# method = token
6767
# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
6868
# "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
6969
# token = 1*tchar
7070
METHRE: Final[Pattern[str]] = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]+")
71-
VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d+).(\d+)")
72-
HDRRE: Final[Pattern[bytes]] = re.compile(rb"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\\\\\"]")
71+
VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d).(\d)")
72+
HDRRE: Final[Pattern[bytes]] = re.compile(rb"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\"\\]")
7373

7474

7575
class RawRequestMessage(NamedTuple):
@@ -148,8 +148,11 @@ def parse_headers(
148148
except ValueError:
149149
raise InvalidHeader(line) from None
150150

151-
bname = bname.strip(b" \t")
152-
bvalue = bvalue.lstrip()
151+
# https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2
152+
if {bname[0], bname[-1]} & {32, 9}: # {" ", "\t"}
153+
raise InvalidHeader(line)
154+
155+
bvalue = bvalue.lstrip(b" \t")
153156
if HDRRE.search(bname):
154157
raise InvalidHeader(bname)
155158
if len(bname) > self.max_field_size:
@@ -170,6 +173,7 @@ def parse_headers(
170173
# consume continuation lines
171174
continuation = line and line[0] in (32, 9) # (' ', '\t')
172175

176+
# Deprecated: https://www.rfc-editor.org/rfc/rfc9112.html#name-obsolete-line-folding
173177
if continuation:
174178
bvalue_lst = [bvalue]
175179
while continuation:
@@ -204,10 +208,14 @@ def parse_headers(
204208
str(header_length),
205209
)
206210

207-
bvalue = bvalue.strip()
211+
bvalue = bvalue.strip(b" \t")
208212
name = bname.decode("utf-8", "surrogateescape")
209213
value = bvalue.decode("utf-8", "surrogateescape")
210214

215+
# https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5
216+
if "\n" in value or "\r" in value or "\x00" in value:
217+
raise InvalidHeader(bvalue)
218+
211219
headers.add(name, value)
212220
raw_headers.append((bname, bvalue))
213221

@@ -322,15 +330,12 @@ def get_content_length() -> Optional[int]:
322330
if length_hdr is None:
323331
return None
324332

325-
try:
326-
length = int(length_hdr)
327-
except ValueError:
333+
# Shouldn't allow +/- or other number formats.
334+
# https://www.rfc-editor.org/rfc/rfc9110#section-8.6-2
335+
if not length_hdr.strip(" \t").isdigit():
328336
raise InvalidHeader(CONTENT_LENGTH)
329337

330-
if length < 0:
331-
raise InvalidHeader(CONTENT_LENGTH)
332-
333-
return length
338+
return int(length_hdr)
334339

335340
length = get_content_length()
336341
# do not support old websocket spec
@@ -470,6 +475,24 @@ def parse_headers(
470475
upgrade = False
471476
chunked = False
472477

478+
# https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-6
479+
# https://www.rfc-editor.org/rfc/rfc9110.html#name-collected-abnf
480+
singletons = (
481+
hdrs.CONTENT_LENGTH,
482+
hdrs.CONTENT_LOCATION,
483+
hdrs.CONTENT_RANGE,
484+
hdrs.CONTENT_TYPE,
485+
hdrs.ETAG,
486+
hdrs.HOST,
487+
hdrs.MAX_FORWARDS,
488+
hdrs.SERVER,
489+
hdrs.TRANSFER_ENCODING,
490+
hdrs.USER_AGENT,
491+
)
492+
bad_hdr = next((h for h in singletons if len(headers.getall(h, ())) > 1), None)
493+
if bad_hdr is not None:
494+
raise BadHttpMessage(f"Duplicate '{bad_hdr}' header found.")
495+
473496
# keep-alive
474497
conn = headers.get(hdrs.CONNECTION)
475498
if conn:
@@ -523,7 +546,7 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage:
523546
# request line
524547
line = lines[0].decode("utf-8", "surrogateescape")
525548
try:
526-
method, path, version = line.split(None, 2)
549+
method, path, version = line.split(maxsplit=2)
527550
except ValueError:
528551
raise BadStatusLine(line) from None
529552

@@ -537,14 +560,10 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage:
537560
raise BadStatusLine(method)
538561

539562
# version
540-
try:
541-
if version.startswith("HTTP/"):
542-
n1, n2 = version[5:].split(".", 1)
543-
version_o = HttpVersion(int(n1), int(n2))
544-
else:
545-
raise BadStatusLine(version)
546-
except Exception:
547-
raise BadStatusLine(version)
563+
match = VERSRE.match(version)
564+
if match is None:
565+
raise BadStatusLine(line)
566+
version_o = HttpVersion(int(match.group(1)), int(match.group(2)))
548567

549568
if method == "CONNECT":
550569
# authority-form,
@@ -611,12 +630,12 @@ class HttpResponseParser(HttpParser[RawResponseMessage]):
611630
def parse_message(self, lines: List[bytes]) -> RawResponseMessage:
612631
line = lines[0].decode("utf-8", "surrogateescape")
613632
try:
614-
version, status = line.split(None, 1)
633+
version, status = line.split(maxsplit=1)
615634
except ValueError:
616635
raise BadStatusLine(line) from None
617636

618637
try:
619-
status, reason = status.split(None, 1)
638+
status, reason = status.split(maxsplit=1)
620639
except ValueError:
621640
reason = ""
622641

@@ -632,13 +651,9 @@ def parse_message(self, lines: List[bytes]) -> RawResponseMessage:
632651
version_o = HttpVersion(int(match.group(1)), int(match.group(2)))
633652

634653
# The status code is a three-digit number
635-
try:
636-
status_i = int(status)
637-
except ValueError:
638-
raise BadStatusLine(line) from None
639-
640-
if status_i > 999:
654+
if len(status) != 3 or not status.isdigit():
641655
raise BadStatusLine(line)
656+
status_i = int(status)
642657

643658
# read headers
644659
(
@@ -773,14 +788,13 @@ def feed_data(
773788
else:
774789
size_b = chunk[:pos]
775790

776-
try:
777-
size = int(bytes(size_b), 16)
778-
except ValueError:
791+
if not size_b.isdigit():
779792
exc = TransferEncodingError(
780793
chunk[:pos].decode("ascii", "surrogateescape")
781794
)
782795
self.payload.set_exception(exc)
783-
raise exc from None
796+
raise exc
797+
size = int(bytes(size_b), 16)
784798

785799
chunk = chunk[pos + 2 :]
786800
if size == 0: # eof marker

tests/test_http_parser.py

Lines changed: 82 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,74 @@ def test_invalid_name(parser) -> None:
465465
parser.feed_data(text)
466466

467467

468+
def test_cve_2023_37276(parser: Any) -> None:
469+
text = b"""POST / HTTP/1.1\r\nHost: localhost:8080\r\nX-Abc: \rxTransfer-Encoding: chunked\r\n\r\n"""
470+
with pytest.raises(http_exceptions.BadHttpMessage):
471+
parser.feed_data(text)
472+
473+
474+
@pytest.mark.parametrize(
475+
"hdr",
476+
(
477+
"Content-Length: -5", # https://www.rfc-editor.org/rfc/rfc9110.html#name-content-length
478+
"Content-Length: +256",
479+
"Foo: abc\rdef", # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5
480+
"Bar: abc\ndef",
481+
"Baz: abc\x00def",
482+
"Foo : bar", # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2
483+
"Foo\t: bar",
484+
),
485+
)
486+
def test_bad_headers(parser: Any, hdr: str) -> None:
487+
text = f"POST / HTTP/1.1\r\n{hdr}\r\n\r\n".encode()
488+
with pytest.raises(http_exceptions.InvalidHeader):
489+
parser.feed_data(text)
490+
491+
492+
def test_bad_chunked_py(loop: Any, protocol: Any) -> None:
493+
"""Test that invalid chunked encoding doesn't allow content-length to be used."""
494+
parser = HttpRequestParserPy(
495+
protocol,
496+
loop,
497+
2**16,
498+
max_line_size=8190,
499+
max_field_size=8190,
500+
)
501+
text = (
502+
b"GET / HTTP/1.1\r\nHost: a\r\nTransfer-Encoding: chunked\r\n\r\n0_2e\r\n\r\n"
503+
+ b"GET / HTTP/1.1\r\nHost: a\r\nContent-Length: 5\r\n\r\n0\r\n\r\n"
504+
)
505+
messages, upgrade, tail = parser.feed_data(text)
506+
assert isinstance(messages[0][1].exception(), http_exceptions.TransferEncodingError)
507+
508+
509+
@pytest.mark.skipif(
510+
"HttpRequestParserC" not in dir(aiohttp.http_parser),
511+
reason="C based HTTP parser not available",
512+
)
513+
def test_bad_chunked_c(loop: Any, protocol: Any) -> None:
514+
"""C parser behaves differently. Maybe we should align them later."""
515+
parser = HttpRequestParserC(
516+
protocol,
517+
loop,
518+
2**16,
519+
max_line_size=8190,
520+
max_field_size=8190,
521+
)
522+
text = (
523+
b"GET / HTTP/1.1\r\nHost: a\r\nTransfer-Encoding: chunked\r\n\r\n0_2e\r\n\r\n"
524+
+ b"GET / HTTP/1.1\r\nHost: a\r\nContent-Length: 5\r\n\r\n0\r\n\r\n"
525+
)
526+
with pytest.raises(http_exceptions.BadHttpMessage):
527+
parser.feed_data(text)
528+
529+
530+
def test_whitespace_before_header(parser: Any) -> None:
531+
text = b"GET / HTTP/1.1\r\n\tContent-Length: 1\r\n\r\nX"
532+
with pytest.raises(http_exceptions.BadHttpMessage):
533+
parser.feed_data(text)
534+
535+
468536
@pytest.mark.parametrize("size", [40960, 8191])
469537
def test_max_header_field_size(parser, size) -> None:
470538
name = b"t" * size
@@ -646,6 +714,11 @@ def test_http_request_parser_bad_version(parser) -> None:
646714
parser.feed_data(b"GET //get HT/11\r\n\r\n")
647715

648716

717+
def test_http_request_parser_bad_version_number(parser: Any) -> None:
718+
with pytest.raises(http_exceptions.BadHttpMessage):
719+
parser.feed_data(b"GET /test HTTP/12.3\r\n\r\n")
720+
721+
649722
@pytest.mark.parametrize("size", [40965, 8191])
650723
def test_http_request_max_status_line(parser, size) -> None:
651724
path = b"t" * (size - 5)
@@ -713,6 +786,11 @@ def test_http_response_parser_bad_version(response) -> None:
713786
response.feed_data(b"HT/11 200 Ok\r\n\r\n")
714787

715788

789+
def test_http_response_parser_bad_version_number(response) -> None:
790+
with pytest.raises(http_exceptions.BadHttpMessage):
791+
response.feed_data(b"HTTP/12.3 200 Ok\r\n\r\n")
792+
793+
716794
def test_http_response_parser_no_reason(response) -> None:
717795
msg = response.feed_data(b"HTTP/1.1 200\r\n\r\n")[0][0][0]
718796

@@ -743,19 +821,18 @@ def test_http_response_parser_bad(response) -> None:
743821
response.feed_data(b"HTT/1\r\n\r\n")
744822

745823

746-
@pytest.mark.skipif(not NO_EXTENSIONS, reason="Behaviour has changed in C parser")
747824
def test_http_response_parser_code_under_100(response) -> None:
748-
msg = response.feed_data(b"HTTP/1.1 99 test\r\n\r\n")[0][0][0]
749-
assert msg.code == 99
825+
with pytest.raises(http_exceptions.BadStatusLine):
826+
response.feed_data(b"HTTP/1.1 99 test\r\n\r\n")
750827

751828

752829
def test_http_response_parser_code_above_999(response) -> None:
753-
with pytest.raises(http_exceptions.BadHttpMessage):
830+
with pytest.raises(http_exceptions.BadStatusLine):
754831
response.feed_data(b"HTTP/1.1 9999 test\r\n\r\n")
755832

756833

757834
def test_http_response_parser_code_not_int(response) -> None:
758-
with pytest.raises(http_exceptions.BadHttpMessage):
835+
with pytest.raises(http_exceptions.BadStatusLine):
759836
response.feed_data(b"HTTP/1.1 ttt test\r\n\r\n")
760837

761838

0 commit comments

Comments
 (0)