@@ -142,8 +142,8 @@ def parse_headers(
142
142
# note: "raw" does not mean inclusion of OWS before/after the field value
143
143
raw_headers = []
144
144
145
- lines_idx = 1
146
- line = lines [1 ]
145
+ lines_idx = 0
146
+ line = lines [lines_idx ]
147
147
line_count = len (lines )
148
148
149
149
while line :
@@ -400,6 +400,7 @@ def get_content_length() -> Optional[int]:
400
400
response_with_body = self .response_with_body ,
401
401
auto_decompress = self ._auto_decompress ,
402
402
lax = self .lax ,
403
+ headers_parser = self ._headers_parser ,
403
404
)
404
405
if not payload_parser .done :
405
406
self ._payload_parser = payload_parser
@@ -418,6 +419,7 @@ def get_content_length() -> Optional[int]:
418
419
compression = msg .compression ,
419
420
auto_decompress = self ._auto_decompress ,
420
421
lax = self .lax ,
422
+ headers_parser = self ._headers_parser ,
421
423
)
422
424
elif not empty_body and length is None and self .read_until_eof :
423
425
payload = StreamReader (
@@ -436,6 +438,7 @@ def get_content_length() -> Optional[int]:
436
438
response_with_body = self .response_with_body ,
437
439
auto_decompress = self ._auto_decompress ,
438
440
lax = self .lax ,
441
+ headers_parser = self ._headers_parser ,
439
442
)
440
443
if not payload_parser .done :
441
444
self ._payload_parser = payload_parser
@@ -473,6 +476,10 @@ def get_content_length() -> Optional[int]:
473
476
474
477
eof = True
475
478
data = b""
479
+ if isinstance (
480
+ underlying_exc , (InvalidHeader , TransferEncodingError )
481
+ ):
482
+ raise
476
483
477
484
if eof :
478
485
start_pos = 0
@@ -635,7 +642,7 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage:
635
642
compression ,
636
643
upgrade ,
637
644
chunked ,
638
- ) = self .parse_headers (lines )
645
+ ) = self .parse_headers (lines [ 1 :] )
639
646
640
647
if close is None : # then the headers weren't set in the request
641
648
if version_o <= HttpVersion10 : # HTTP 1.0 must asks to not close
@@ -721,7 +728,7 @@ def parse_message(self, lines: List[bytes]) -> RawResponseMessage:
721
728
compression ,
722
729
upgrade ,
723
730
chunked ,
724
- ) = self .parse_headers (lines )
731
+ ) = self .parse_headers (lines [ 1 :] )
725
732
726
733
if close is None :
727
734
if version_o <= HttpVersion10 :
@@ -764,6 +771,8 @@ def __init__(
764
771
response_with_body : bool = True ,
765
772
auto_decompress : bool = True ,
766
773
lax : bool = False ,
774
+ * ,
775
+ headers_parser : HeadersParser ,
767
776
) -> None :
768
777
self ._length = 0
769
778
self ._type = ParseState .PARSE_UNTIL_EOF
@@ -772,6 +781,8 @@ def __init__(
772
781
self ._chunk_tail = b""
773
782
self ._auto_decompress = auto_decompress
774
783
self ._lax = lax
784
+ self ._headers_parser = headers_parser
785
+ self ._trailer_lines : list [bytes ] = []
775
786
self .done = False
776
787
777
788
# payload decompression wrapper
@@ -848,7 +859,7 @@ def feed_data(
848
859
size_b = chunk [:i ] # strip chunk-extensions
849
860
# Verify no LF in the chunk-extension
850
861
if b"\n " in (ext := chunk [i :pos ]):
851
- exc = BadHttpMessage (
862
+ exc = TransferEncodingError (
852
863
f"Unexpected LF in chunk-extension: { ext !r} "
853
864
)
854
865
set_exception (self .payload , exc )
@@ -869,7 +880,7 @@ def feed_data(
869
880
870
881
chunk = chunk [pos + len (SEP ) :]
871
882
if size == 0 : # eof marker
872
- self ._chunk = ChunkState .PARSE_MAYBE_TRAILERS
883
+ self ._chunk = ChunkState .PARSE_TRAILERS
873
884
if self ._lax and chunk .startswith (b"\r " ):
874
885
chunk = chunk [1 :]
875
886
else :
@@ -907,38 +918,31 @@ def feed_data(
907
918
self ._chunk_tail = chunk
908
919
return False , b""
909
920
910
- # if stream does not contain trailer, after 0\r\n
911
- # we should get another \r\n otherwise
912
- # trailers needs to be skipped until \r\n\r\n
913
- if self ._chunk == ChunkState .PARSE_MAYBE_TRAILERS :
914
- head = chunk [: len (SEP )]
915
- if head == SEP :
916
- # end of stream
917
- self .payload .feed_eof ()
918
- return True , chunk [len (SEP ) :]
919
- # Both CR and LF, or only LF may not be received yet. It is
920
- # expected that CRLF or LF will be shown at the very first
921
- # byte next time, otherwise trailers should come. The last
922
- # CRLF which marks the end of response might not be
923
- # contained in the same TCP segment which delivered the
924
- # size indicator.
925
- if not head :
926
- return False , b""
927
- if head == SEP [:1 ]:
928
- self ._chunk_tail = head
929
- return False , b""
930
- self ._chunk = ChunkState .PARSE_TRAILERS
931
-
932
- # read and discard trailer up to the CRLF terminator
933
921
if self ._chunk == ChunkState .PARSE_TRAILERS :
934
922
pos = chunk .find (SEP )
935
- if pos >= 0 :
936
- chunk = chunk [pos + len (SEP ) :]
937
- self ._chunk = ChunkState .PARSE_MAYBE_TRAILERS
938
- else :
923
+ if pos < 0 : # No line found
939
924
self ._chunk_tail = chunk
940
925
return False , b""
941
926
927
+ line = chunk [:pos ]
928
+ chunk = chunk [pos + len (SEP ) :]
929
+ if SEP == b"\n " : # For lax response parsing
930
+ line = line .rstrip (b"\r " )
931
+ self ._trailer_lines .append (line )
932
+
933
+ # \r\n\r\n found, end of stream
934
+ if self ._trailer_lines [- 1 ] == b"" :
935
+ # Headers and trailers are defined the same way,
936
+ # so we reuse the HeadersParser here.
937
+ try :
938
+ trailers , raw_trailers = self ._headers_parser .parse_headers (
939
+ self ._trailer_lines
940
+ )
941
+ finally :
942
+ self ._trailer_lines .clear ()
943
+ self .payload .feed_eof ()
944
+ return True , chunk
945
+
942
946
# Read all bytes until eof
943
947
elif self ._type == ParseState .PARSE_UNTIL_EOF :
944
948
self .payload .feed_data (chunk , len (chunk ))
0 commit comments