2929VERSION_RE = re .compile (r"HTTP/(\d)\.(\d)" )
3030RFC9110_5_5_INVALID_AND_DANGEROUS = re .compile (r"[\0\r\n]" )
3131
32+ RFC3986_2_URI_SPECIALS = (
33+ # gen-delims
34+ ":/?#[]@"
35+ # sub-delims
36+ "!$&'()*+,;="
37+ # for unreserved
38+ "-._~"
39+ # for pct-encoded
40+ "%"
41+ # notably absent from this list (must be pct-encoded):
42+ # \N{SPACE}
43+ # <> and {}
44+ # ` a.k.a \N{GRAVE ACCENT}
45+ # ^ a.k.a \N{CIRCUMFLEX ACCENT}
46+ # | a.k.a \N{VERTICAL LINE}
47+ # backslash a.k.a \N{REVERSE SOLIDUS}
48+ )
49+ GUNICORN_NONSTANDARD_URI_CHARACTERS = (
50+ "\N{QUOTATION MARK} "
51+ # used in tests/requests/valid/027.http (utf8 decoded as latin-1)
52+ # "\N{LATIN CAPITAL LETTER A WITH TILDE}"
53+ # "\N{NO-BREAK SPACE}"
54+ # includes the above - all latin-1 upper bits
55+ # also includes "\N{SOFT HYPHEN}"
56+ + bytes (range (0xA0 , 0xff + 1 )).decode ("latin-1" )
57+ )
58+ GUNICORN_URI_SPECIALS = RFC3986_2_URI_SPECIALS + GUNICORN_NONSTANDARD_URI_CHARACTERS
59+ URI_CHARACTERS_RE = re .compile (r"[%s0-9a-zA-Z]+" % (re .escape (GUNICORN_URI_SPECIALS )))
60+
3261
3362class Message :
3463 def __init__ (self , cfg , unreader , peer_addr ):
@@ -425,6 +454,7 @@ def parse_request_line(self, line_bytes):
425454 if self .cfg .casefold_http_method :
426455 self .method = self .method .upper ()
427456
457+ # https://datatracker.ietf.org/doc/html/rfc9112#section-3.2
428458 # URI
429459 self .uri = bits [1 ]
430460
@@ -438,6 +468,9 @@ def parse_request_line(self, line_bytes):
438468 # => manually reject one always invalid URI: empty
439469 if len (self .uri ) == 0 :
440470 raise InvalidRequestLine (bytes_to_str (line_bytes ))
471+ # => reject URI exceeding characters listed in RFC 3986
472+ if not URI_CHARACTERS_RE .fullmatch (self .uri ):
473+ raise InvalidRequestLine (bytes_to_str (line_bytes ))
441474
442475 try :
443476 parts = split_request_uri (self .uri )
0 commit comments