7171# To be used with str.strip() and related methods.
7272HTTP_WHITESPACE = " \t "
7373
74- HTTP_TOKEN_RE = re .compile (r"^[!#$%&'*+\-.^_`|~0-9A-Za-z]+$" )
74+
75+ class _ABNF :
76+ """Class that holds a subset of ABNF rules from RFC 9110 and friends.
77+
78+ Class attributes are re.Pattern objects, with the same name as in the RFC
79+ (with hyphens changed to underscores). Currently contains only the subset
80+ we use (which is why this class is not public). Unfortunately the fields
81+ cannot be alphabetized as they are in the RFCs because of dependencies.
82+ """
83+
84+ # RFC 5234 (ABNF)
85+ VCHAR = re .compile (r"[\x21-\x7E]" )
86+
87+ # RFC 9110 (HTTP Semantics)
88+ obs_text = re .compile (r"[\x80-\xFF]" )
89+ field_vchar = re .compile (rf"(?:{ VCHAR .pattern } |{ obs_text .pattern } )" )
90+ tchar = re .compile (r"[!#$%&'*+\-.^_`|~0-9A-Za-z]" )
91+ token = re .compile (rf"{ tchar .pattern } +" )
92+ field_name = token
93+ method = token
94+
95+ # RFC 9112 (HTTP/1.1)
96+ HTTP_version = re .compile (r"HTTP/[0-9]\.[0-9]" )
97+ reason_phrase = re .compile (rf"(?:[\t ]|{ VCHAR .pattern } |{ obs_text .pattern } )+" )
98+ # request_target delegates to the URI RFC 3986, which is complex and may be
99+ # too restrictive (for example, the WHATWG version of the URL spec allows non-ASCII
100+ # characters). Instead, we allow everything but control chars and whitespace.
101+ request_target = re .compile (rf"{ field_vchar .pattern } +" )
102+ request_line = re .compile (
103+ rf"({ method .pattern } ) ({ request_target .pattern } ) ({ HTTP_version .pattern } )"
104+ )
105+ status_code = re .compile (r"[0-9]{3}" )
106+ status_line = re .compile (
107+ rf"({ HTTP_version .pattern } ) ({ status_code .pattern } ) ({ reason_phrase .pattern } )?"
108+ )
75109
76110
77111@lru_cache (1000 )
@@ -145,7 +179,7 @@ def __init__(self, *args: typing.Any, **kwargs: str) -> None: # noqa: F811
145179
146180 def add (self , name : str , value : str ) -> None :
147181 """Adds a new value for the given key."""
148- if not HTTP_TOKEN_RE . match (name ):
182+ if not _ABNF . field_name . fullmatch (name ):
149183 raise HTTPInputError ("Invalid header name %r" % name )
150184 norm_name = _normalize_header (name )
151185 self ._last_key = norm_name
@@ -892,9 +926,6 @@ class RequestStartLine(typing.NamedTuple):
892926 version : str
893927
894928
895- _http_version_re = re .compile (r"^HTTP/1\.[0-9]$" )
896-
897-
898929def parse_request_start_line (line : str ) -> RequestStartLine :
899930 """Returns a (method, path, version) tuple for an HTTP 1.x request line.
900931
@@ -903,17 +934,18 @@ def parse_request_start_line(line: str) -> RequestStartLine:
903934 >>> parse_request_start_line("GET /foo HTTP/1.1")
904935 RequestStartLine(method='GET', path='/foo', version='HTTP/1.1')
905936 """
906- try :
907- method , path , version = line .split (" " )
908- except ValueError :
937+ match = _ABNF .request_line .fullmatch (line )
938+ if not match :
909939 # https://tools.ietf.org/html/rfc7230#section-3.1.1
910940 # invalid request-line SHOULD respond with a 400 (Bad Request)
911941 raise HTTPInputError ("Malformed HTTP request line" )
912- if not _http_version_re .match (version ):
913- raise HTTPInputError (
914- "Malformed HTTP version in HTTP Request-Line: %r" % version
915- )
916- return RequestStartLine (method , path , version )
942+ r = RequestStartLine (match .group (1 ), match .group (2 ), match .group (3 ))
943+ if not r .version .startswith ("HTTP/1" ):
944+ # HTTP/2 and above doesn't use parse_request_start_line.
945+ # This could be folded into the regex but we don't want to deviate
946+ # from the ABNF in the RFCs.
947+ raise HTTPInputError ("Unexpected HTTP version %r" % r .version )
948+ return r
917949
918950
919951class ResponseStartLine (typing .NamedTuple ):
@@ -922,9 +954,6 @@ class ResponseStartLine(typing.NamedTuple):
922954 reason : str
923955
924956
925- _http_response_line_re = re .compile (r"(HTTP/1.[0-9]) ([0-9]+) ([^\r]*)" )
926-
927-
928957def parse_response_start_line (line : str ) -> ResponseStartLine :
929958 """Returns a (version, code, reason) tuple for an HTTP 1.x response line.
930959
@@ -933,11 +962,14 @@ def parse_response_start_line(line: str) -> ResponseStartLine:
933962 >>> parse_response_start_line("HTTP/1.1 200 OK")
934963 ResponseStartLine(version='HTTP/1.1', code=200, reason='OK')
935964 """
936- line = native_str (line )
937- match = _http_response_line_re .match (line )
965+ match = _ABNF .status_line .fullmatch (line )
938966 if not match :
939967 raise HTTPInputError ("Error parsing response start line" )
940- return ResponseStartLine (match .group (1 ), int (match .group (2 )), match .group (3 ))
968+ r = ResponseStartLine (match .group (1 ), int (match .group (2 )), match .group (3 ))
969+ if not r .version .startswith ("HTTP/1" ):
970+ # HTTP/2 and above doesn't use parse_response_start_line.
971+ raise HTTPInputError ("Unexpected HTTP version %r" % r .version )
972+ return r
941973
942974
943975# _parseparam and _parse_header are copied and modified from python2.7's cgi.py
0 commit comments