@@ -303,6 +303,17 @@ def test_continuation(self):
303303 data = "Foo: bar\r \n \f asdf"
304304 self .assertRaises (HTTPInputError , HTTPHeaders .parse , data )
305305
306+ def test_forbidden_ascii_characters (self ):
307+ # Control characters and ASCII whitespace other than space, tab, and CRLF are not allowed in
308+ # headers.
309+ for c in range (0xFF ):
310+ data = f"Foo: bar{ chr (c )} baz\r \n "
311+ if c == 0x09 or (c >= 0x20 and c != 0x7F ):
312+ headers = HTTPHeaders .parse (data )
313+ self .assertEqual (headers ["Foo" ], f"bar{ chr (c )} baz" )
314+ else :
315+ self .assertRaises (HTTPInputError , HTTPHeaders .parse , data )
316+
306317 def test_unicode_newlines (self ):
307318 # Ensure that only \r\n is recognized as a header separator, and not
308319 # the other newline-like unicode characters.
@@ -311,10 +322,13 @@ def test_unicode_newlines(self):
311322 # and cpython's unicodeobject.c (which defines the implementation
312323 # of unicode_type.splitlines(), and uses a different list than TR13).
313324 newlines = [
314- "\u001b " , # VERTICAL TAB
315- "\u001c " , # FILE SEPARATOR
316- "\u001d " , # GROUP SEPARATOR
317- "\u001e " , # RECORD SEPARATOR
325+ # The following ascii characters are sometimes treated as newline-like,
326+ # but they're disallowed in HTTP headers. This test covers unicode
327+ # characters that are permitted in headers (under the obs-text rule).
328+ # "\u001b", # VERTICAL TAB
329+ # "\u001c", # FILE SEPARATOR
330+ # "\u001d", # GROUP SEPARATOR
331+ # "\u001e", # RECORD SEPARATOR
318332 "\u0085 " , # NEXT LINE
319333 "\u2028 " , # LINE SEPARATOR
320334 "\u2029 " , # PARAGRAPH SEPARATOR
@@ -363,13 +377,16 @@ def test_unicode_whitespace(self):
363377 self .assertEqual (expected , list (headers .get_all ()))
364378
365379 def test_optional_cr (self ):
380+ # Bare CR is not a valid line separator
381+ with self .assertRaises (HTTPInputError ):
382+ HTTPHeaders .parse ("CRLF: crlf\r \n LF: lf\n CR: cr\r More: more\r \n " )
383+
366384 # Both CRLF and LF should be accepted as separators. CR should not be
367- # part of the data when followed by LF, but it is a normal char
368- # otherwise (or should bare CR be an error?)
369- headers = HTTPHeaders .parse ("CRLF: crlf\r \n LF: lf\n CR: cr\r More: more\r \n " )
385+ # part of the data when followed by LF.
386+ headers = HTTPHeaders .parse ("CRLF: crlf\r \n LF: lf\n More: more\r \n " )
370387 self .assertEqual (
371388 sorted (headers .get_all ()),
372- [("Cr " , "cr \r More: more " ), ("Crlf " , "crlf " ), ("Lf " , "lf " )],
389+ [("Crlf " , "crlf " ), ("Lf " , "lf " ), ("More " , "more " )],
373390 )
374391
375392 def test_copy (self ):
0 commit comments