2929VERSION_RE = re .compile (r"HTTP/(\d)\.(\d)" )
3030RFC9110_5_5_INVALID_AND_DANGEROUS = re .compile (r"[\0\r\n]" )
3131
32+ RFC3986_2_URI_SPECIALS = (
33+ # gen-delims
34+ ":/?#[]@"
35+ # sub-delims
36+ "!$&'()*+,;="
37+ # for unreserved
38+ "-._~"
39+ # for pct-encoded
40+ "%"
41+ # notably absent from this list (must be pct-encoded):
42+ # \N{SPACE}
43+ # <> and {}
44+ # ` a.k.a \N{GRAVE ACCENT}
45+ # ^ a.k.a \N{CIRCUMFLEX ACCENT}
46+ # | a.k.a \N{VERTICAL LINE}
47+ # backslash a.k.a \N{REVERSE SOLIDUS}
48+ )
49+ GUNICORN_NONSTANDARD_URI_CHARACTERS = (
50+ "\N{QUOTATION MARK} "
51+ # used in tests/requests/valid/027.http (utf8 decoded as latin-1)
52+ # "\N{LATIN CAPITAL LETTER A WITH TILDE}"
53+ # "\N{NO-BREAK SPACE}"
54+ # includes the above - all latin-1 upper bits
55+ # also includes "\N{SOFT HYPHEN}"
56+ + bytes (range (0xA0 ,0xff + 1 )).decode ("latin-1" )
57+ )
58+ GUNICORN_URI_SPECIALS = RFC3986_2_URI_SPECIALS + GUNICORN_NONSTANDARD_URI_CHARACTERS
59+ URI_CHARACTERS_RE = re .compile (r"[%s0-9a-zA-Z]+" % (re .escape (GUNICORN_URI_SPECIALS )))
3260
3361class Message :
3462 def __init__ (self , cfg , unreader , peer_addr ):
@@ -425,6 +453,7 @@ def parse_request_line(self, line_bytes):
425453 if self .cfg .casefold_http_method :
426454 self .method = self .method .upper ()
427455
456+ # https://datatracker.ietf.org/doc/html/rfc9112#section-3.2
428457 # URI
429458 self .uri = bits [1 ]
430459
@@ -438,6 +467,9 @@ def parse_request_line(self, line_bytes):
438467 # => manually reject one always invalid URI: empty
439468 if len (self .uri ) == 0 :
440469 raise InvalidRequestLine (bytes_to_str (line_bytes ))
470+ # => reject URI exceeding characters listed in RFC 3986
471+ if not URI_CHARACTERS_RE .fullmatch (self .uri ):
472+ raise InvalidRequestLine (bytes_to_str (line_bytes ))
441473
442474 try :
443475 parts = split_request_uri (self .uri )
0 commit comments