Skip to content

Commit e8d774f

Browse files
Add trailer parsing logic (#11269) (#11287)
(cherry picked from commit 7dd4b55)
1 parent 0389371 commit e8d774f

File tree

4 files changed

+100
-121
lines changed

4 files changed

+100
-121
lines changed

CHANGES/11269.feature.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Added initial trailer parsing logic to Python HTTP parser -- by :user:`Dreamsorcerer`.

aiohttp/http_parser.py

Lines changed: 37 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -142,8 +142,8 @@ def parse_headers(
142142
# note: "raw" does not mean inclusion of OWS before/after the field value
143143
raw_headers = []
144144

145-
lines_idx = 1
146-
line = lines[1]
145+
lines_idx = 0
146+
line = lines[lines_idx]
147147
line_count = len(lines)
148148

149149
while line:
@@ -400,6 +400,7 @@ def get_content_length() -> Optional[int]:
400400
response_with_body=self.response_with_body,
401401
auto_decompress=self._auto_decompress,
402402
lax=self.lax,
403+
headers_parser=self._headers_parser,
403404
)
404405
if not payload_parser.done:
405406
self._payload_parser = payload_parser
@@ -418,6 +419,7 @@ def get_content_length() -> Optional[int]:
418419
compression=msg.compression,
419420
auto_decompress=self._auto_decompress,
420421
lax=self.lax,
422+
headers_parser=self._headers_parser,
421423
)
422424
elif not empty_body and length is None and self.read_until_eof:
423425
payload = StreamReader(
@@ -436,6 +438,7 @@ def get_content_length() -> Optional[int]:
436438
response_with_body=self.response_with_body,
437439
auto_decompress=self._auto_decompress,
438440
lax=self.lax,
441+
headers_parser=self._headers_parser,
439442
)
440443
if not payload_parser.done:
441444
self._payload_parser = payload_parser
@@ -473,6 +476,10 @@ def get_content_length() -> Optional[int]:
473476

474477
eof = True
475478
data = b""
479+
if isinstance(
480+
underlying_exc, (InvalidHeader, TransferEncodingError)
481+
):
482+
raise
476483

477484
if eof:
478485
start_pos = 0
@@ -635,7 +642,7 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage:
635642
compression,
636643
upgrade,
637644
chunked,
638-
) = self.parse_headers(lines)
645+
) = self.parse_headers(lines[1:])
639646

640647
if close is None: # then the headers weren't set in the request
641648
if version_o <= HttpVersion10: # HTTP 1.0 must asks to not close
@@ -721,7 +728,7 @@ def parse_message(self, lines: List[bytes]) -> RawResponseMessage:
721728
compression,
722729
upgrade,
723730
chunked,
724-
) = self.parse_headers(lines)
731+
) = self.parse_headers(lines[1:])
725732

726733
if close is None:
727734
if version_o <= HttpVersion10:
@@ -764,6 +771,8 @@ def __init__(
764771
response_with_body: bool = True,
765772
auto_decompress: bool = True,
766773
lax: bool = False,
774+
*,
775+
headers_parser: HeadersParser,
767776
) -> None:
768777
self._length = 0
769778
self._type = ParseState.PARSE_UNTIL_EOF
@@ -772,6 +781,8 @@ def __init__(
772781
self._chunk_tail = b""
773782
self._auto_decompress = auto_decompress
774783
self._lax = lax
784+
self._headers_parser = headers_parser
785+
self._trailer_lines: list[bytes] = []
775786
self.done = False
776787

777788
# payload decompression wrapper
@@ -848,7 +859,7 @@ def feed_data(
848859
size_b = chunk[:i] # strip chunk-extensions
849860
# Verify no LF in the chunk-extension
850861
if b"\n" in (ext := chunk[i:pos]):
851-
exc = BadHttpMessage(
862+
exc = TransferEncodingError(
852863
f"Unexpected LF in chunk-extension: {ext!r}"
853864
)
854865
set_exception(self.payload, exc)
@@ -869,7 +880,7 @@ def feed_data(
869880

870881
chunk = chunk[pos + len(SEP) :]
871882
if size == 0: # eof marker
872-
self._chunk = ChunkState.PARSE_MAYBE_TRAILERS
883+
self._chunk = ChunkState.PARSE_TRAILERS
873884
if self._lax and chunk.startswith(b"\r"):
874885
chunk = chunk[1:]
875886
else:
@@ -907,38 +918,31 @@ def feed_data(
907918
self._chunk_tail = chunk
908919
return False, b""
909920

910-
# if stream does not contain trailer, after 0\r\n
911-
# we should get another \r\n otherwise
912-
# trailers needs to be skipped until \r\n\r\n
913-
if self._chunk == ChunkState.PARSE_MAYBE_TRAILERS:
914-
head = chunk[: len(SEP)]
915-
if head == SEP:
916-
# end of stream
917-
self.payload.feed_eof()
918-
return True, chunk[len(SEP) :]
919-
# Both CR and LF, or only LF may not be received yet. It is
920-
# expected that CRLF or LF will be shown at the very first
921-
# byte next time, otherwise trailers should come. The last
922-
# CRLF which marks the end of response might not be
923-
# contained in the same TCP segment which delivered the
924-
# size indicator.
925-
if not head:
926-
return False, b""
927-
if head == SEP[:1]:
928-
self._chunk_tail = head
929-
return False, b""
930-
self._chunk = ChunkState.PARSE_TRAILERS
931-
932-
# read and discard trailer up to the CRLF terminator
933921
if self._chunk == ChunkState.PARSE_TRAILERS:
934922
pos = chunk.find(SEP)
935-
if pos >= 0:
936-
chunk = chunk[pos + len(SEP) :]
937-
self._chunk = ChunkState.PARSE_MAYBE_TRAILERS
938-
else:
923+
if pos < 0: # No line found
939924
self._chunk_tail = chunk
940925
return False, b""
941926

927+
line = chunk[:pos]
928+
chunk = chunk[pos + len(SEP) :]
929+
if SEP == b"\n": # For lax response parsing
930+
line = line.rstrip(b"\r")
931+
self._trailer_lines.append(line)
932+
933+
# \r\n\r\n found, end of stream
934+
if self._trailer_lines[-1] == b"":
935+
# Headers and trailers are defined the same way,
936+
# so we reuse the HeadersParser here.
937+
try:
938+
trailers, raw_trailers = self._headers_parser.parse_headers(
939+
self._trailer_lines
940+
)
941+
finally:
942+
self._trailer_lines.clear()
943+
self.payload.feed_eof()
944+
return True, chunk
945+
942946
# Read all bytes until eof
943947
elif self._type == ParseState.PARSE_UNTIL_EOF:
944948
self.payload.feed_data(chunk, len(chunk))

aiohttp/multipart.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -777,7 +777,7 @@ async def _read_boundary(self) -> None:
777777
raise ValueError(f"Invalid boundary {chunk!r}, expected {self._boundary!r}")
778778

779779
async def _read_headers(self) -> "CIMultiDictProxy[str]":
780-
lines = [b""]
780+
lines = []
781781
while True:
782782
chunk = await self._content.readline()
783783
chunk = chunk.strip()

0 commit comments

Comments
 (0)