@@ -86,13 +86,22 @@ class _ABNF:
8686
8787 # RFC 9110 (HTTP Semantics)
8888 obs_text = re .compile (r"[\x80-\xFF]" )
89+ field_vchar = re .compile (rf"(?:{ VCHAR .pattern } |{ obs_text .pattern } )" )
8990 tchar = re .compile (r"[!#$%&'*+\-.^_`|~0-9A-Za-z]" )
9091 token = re .compile (rf"{ tchar .pattern } +" )
9192 field_name = token
93+ method = token
9294
9395 # RFC 9112 (HTTP/1.1)
9496 HTTP_version = re .compile (r"HTTP/[0-9]\.[0-9]" )
9597 reason_phrase = re .compile (rf"(?:[\t ]|{ VCHAR .pattern } |{ obs_text .pattern } )+" )
98+ # request_target delegates to the URI RFC 3986, which is complex and may be
99+ # too restrictive (for example, the WHATWG version of the URL spec allows non-ASCII
100+ # characters). Instead, we allow everything but control chars and whitespace.
101+ request_target = re .compile (rf"{ field_vchar .pattern } +" )
102+ request_line = re .compile (
103+ rf"({ method .pattern } ) ({ request_target .pattern } ) ({ HTTP_version .pattern } )"
104+ )
96105 status_code = re .compile (r"[0-9]{3}" )
97106 status_line = re .compile (
98107 rf"({ HTTP_version .pattern } ) ({ status_code .pattern } ) ({ reason_phrase .pattern } )?"
@@ -170,7 +179,7 @@ def __init__(self, *args: typing.Any, **kwargs: str) -> None: # noqa: F811
170179
171180 def add (self , name : str , value : str ) -> None :
172181 """Adds a new value for the given key."""
173- if not _ABNF .token .fullmatch (name ):
182+ if not _ABNF .field_name .fullmatch (name ):
174183 raise HTTPInputError ("Invalid header name %r" % name )
175184 norm_name = _normalize_header (name )
176185 self ._last_key = norm_name
@@ -925,22 +934,18 @@ def parse_request_start_line(line: str) -> RequestStartLine:
925934 >>> parse_request_start_line("GET /foo HTTP/1.1")
926935 RequestStartLine(method='GET', path='/foo', version='HTTP/1.1')
927936 """
928- try :
929- method , path , version = line .split (" " )
930- except ValueError :
937+ match = _ABNF .request_line .fullmatch (line )
938+ if not match :
931939 # https://tools.ietf.org/html/rfc7230#section-3.1.1
932940 # invalid request-line SHOULD respond with a 400 (Bad Request)
933941 raise HTTPInputError ("Malformed HTTP request line" )
934- if not _ABNF .HTTP_version .fullmatch (version ):
935- raise HTTPInputError (
936- "Malformed HTTP version in HTTP Request-Line: %r" % version
937- )
938- if not version .startswith ("HTTP/1" ):
942+ r = RequestStartLine (match .group (1 ), match .group (2 ), match .group (3 ))
943+ if not r .version .startswith ("HTTP/1" ):
939944 # HTTP/2 and above doesn't use parse_request_start_line.
940945 # This could be folded into the regex but we don't want to deviate
941946 # from the ABNF in the RFCs.
942- raise HTTPInputError ("Unexpected HTTP version %r" % version )
943- return RequestStartLine ( method , path , version )
947+ raise HTTPInputError ("Unexpected HTTP version %r" % r . version )
948+ return r
944949
945950
946951class ResponseStartLine (typing .NamedTuple ):
0 commit comments