60
60
61
61
ASCIISET : Final [Set [str ]] = set (string .printable )
62
62
63
- # See https://tools.ietf .org/html/rfc7230#section-3.1.1
64
- # and https://tools.ietf .org/html/rfc7230#appendix-B
63
+ # See https://www.rfc-editor .org/rfc/rfc9110.html#name-overview
64
+ # and https://www.rfc-editor .org/rfc/rfc9110.html#name-tokens
65
65
#
66
66
# method = token
67
67
# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
68
68
# "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
69
69
# token = 1*tchar
70
70
METHRE : Final [Pattern [str ]] = re .compile (r"[!#$%&'*+\-.^_`|~0-9A-Za-z]+" )
71
- VERSRE : Final [Pattern [str ]] = re .compile (r"HTTP/(\d+ ).(\d+ )" )
72
- HDRRE : Final [Pattern [bytes ]] = re .compile (rb"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\\\\\" ]" )
71
+ VERSRE : Final [Pattern [str ]] = re .compile (r"HTTP/(\d).(\d)" )
72
+ HDRRE : Final [Pattern [bytes ]] = re .compile (rb"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\"\\ ]" )
73
73
74
74
75
75
class RawRequestMessage (NamedTuple ):
@@ -148,8 +148,11 @@ def parse_headers(
148
148
except ValueError :
149
149
raise InvalidHeader (line ) from None
150
150
151
- bname = bname .strip (b" \t " )
152
- bvalue = bvalue .lstrip ()
151
+ # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2
152
+ if {bname [0 ], bname [- 1 ]} & {32 , 9 }: # {" ", "\t"}
153
+ raise InvalidHeader (line )
154
+
155
+ bvalue = bvalue .lstrip (b" \t " )
153
156
if HDRRE .search (bname ):
154
157
raise InvalidHeader (bname )
155
158
if len (bname ) > self .max_field_size :
@@ -170,6 +173,7 @@ def parse_headers(
170
173
# consume continuation lines
171
174
continuation = line and line [0 ] in (32 , 9 ) # (' ', '\t')
172
175
176
+ # Deprecated: https://www.rfc-editor.org/rfc/rfc9112.html#name-obsolete-line-folding
173
177
if continuation :
174
178
bvalue_lst = [bvalue ]
175
179
while continuation :
@@ -204,10 +208,14 @@ def parse_headers(
204
208
str (header_length ),
205
209
)
206
210
207
- bvalue = bvalue .strip ()
211
+ bvalue = bvalue .strip (b" \t " )
208
212
name = bname .decode ("utf-8" , "surrogateescape" )
209
213
value = bvalue .decode ("utf-8" , "surrogateescape" )
210
214
215
+ # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5
216
+ if "\n " in value or "\r " in value or "\x00 " in value :
217
+ raise InvalidHeader (bvalue )
218
+
211
219
headers .add (name , value )
212
220
raw_headers .append ((bname , bvalue ))
213
221
@@ -322,15 +330,12 @@ def get_content_length() -> Optional[int]:
322
330
if length_hdr is None :
323
331
return None
324
332
325
- try :
326
- length = int ( length_hdr )
327
- except ValueError :
333
+ # Shouldn't allow +/- or other number formats.
334
+ # https://www.rfc-editor.org/rfc/rfc9110#section-8.6-2
335
+ if not length_hdr . strip ( " \t " ). isdigit () :
328
336
raise InvalidHeader (CONTENT_LENGTH )
329
337
330
- if length < 0 :
331
- raise InvalidHeader (CONTENT_LENGTH )
332
-
333
- return length
338
+ return int (length_hdr )
334
339
335
340
length = get_content_length ()
336
341
# do not support old websocket spec
@@ -470,6 +475,24 @@ def parse_headers(
470
475
upgrade = False
471
476
chunked = False
472
477
478
+ # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-6
479
+ # https://www.rfc-editor.org/rfc/rfc9110.html#name-collected-abnf
480
+ singletons = (
481
+ hdrs .CONTENT_LENGTH ,
482
+ hdrs .CONTENT_LOCATION ,
483
+ hdrs .CONTENT_RANGE ,
484
+ hdrs .CONTENT_TYPE ,
485
+ hdrs .ETAG ,
486
+ hdrs .HOST ,
487
+ hdrs .MAX_FORWARDS ,
488
+ hdrs .SERVER ,
489
+ hdrs .TRANSFER_ENCODING ,
490
+ hdrs .USER_AGENT ,
491
+ )
492
+ bad_hdr = next ((h for h in singletons if len (headers .getall (h , ())) > 1 ), None )
493
+ if bad_hdr is not None :
494
+ raise BadHttpMessage (f"Duplicate '{ bad_hdr } ' header found." )
495
+
473
496
# keep-alive
474
497
conn = headers .get (hdrs .CONNECTION )
475
498
if conn :
@@ -523,7 +546,7 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage:
523
546
# request line
524
547
line = lines [0 ].decode ("utf-8" , "surrogateescape" )
525
548
try :
526
- method , path , version = line .split (None , 2 )
549
+ method , path , version = line .split (maxsplit = 2 )
527
550
except ValueError :
528
551
raise BadStatusLine (line ) from None
529
552
@@ -537,14 +560,10 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage:
537
560
raise BadStatusLine (method )
538
561
539
562
# version
540
- try :
541
- if version .startswith ("HTTP/" ):
542
- n1 , n2 = version [5 :].split ("." , 1 )
543
- version_o = HttpVersion (int (n1 ), int (n2 ))
544
- else :
545
- raise BadStatusLine (version )
546
- except Exception :
547
- raise BadStatusLine (version )
563
+ match = VERSRE .match (version )
564
+ if match is None :
565
+ raise BadStatusLine (line )
566
+ version_o = HttpVersion (int (match .group (1 )), int (match .group (2 )))
548
567
549
568
if method == "CONNECT" :
550
569
# authority-form,
@@ -611,12 +630,12 @@ class HttpResponseParser(HttpParser[RawResponseMessage]):
611
630
def parse_message (self , lines : List [bytes ]) -> RawResponseMessage :
612
631
line = lines [0 ].decode ("utf-8" , "surrogateescape" )
613
632
try :
614
- version , status = line .split (None , 1 )
633
+ version , status = line .split (maxsplit = 1 )
615
634
except ValueError :
616
635
raise BadStatusLine (line ) from None
617
636
618
637
try :
619
- status , reason = status .split (None , 1 )
638
+ status , reason = status .split (maxsplit = 1 )
620
639
except ValueError :
621
640
reason = ""
622
641
@@ -632,13 +651,9 @@ def parse_message(self, lines: List[bytes]) -> RawResponseMessage:
632
651
version_o = HttpVersion (int (match .group (1 )), int (match .group (2 )))
633
652
634
653
# The status code is a three-digit number
635
- try :
636
- status_i = int (status )
637
- except ValueError :
638
- raise BadStatusLine (line ) from None
639
-
640
- if status_i > 999 :
654
+ if len (status ) != 3 or not status .isdigit ():
641
655
raise BadStatusLine (line )
656
+ status_i = int (status )
642
657
643
658
# read headers
644
659
(
@@ -773,14 +788,13 @@ def feed_data(
773
788
else :
774
789
size_b = chunk [:pos ]
775
790
776
- try :
777
- size = int (bytes (size_b ), 16 )
778
- except ValueError :
791
+ if not size_b .isdigit ():
779
792
exc = TransferEncodingError (
780
793
chunk [:pos ].decode ("ascii" , "surrogateescape" )
781
794
)
782
795
self .payload .set_exception (exc )
783
- raise exc from None
796
+ raise exc
797
+ size = int (bytes (size_b ), 16 )
784
798
785
799
chunk = chunk [pos + 2 :]
786
800
if size == 0 : # eof marker
0 commit comments