Skip to content

Commit c075088

Browse files
gh-130631: Make join_header_words() more similar to the original Perl version
* Always quote strings with non-ASCII characters. * Allow some non-separator and non-control characters (like "." or "-") be unquoted. * Always quote string that end with "\n". * Use the fullmatch() method for clarity and optimization.
1 parent 64ccbbb commit c075088

File tree

3 files changed

+21
-7
lines changed

3 files changed

+21
-7
lines changed

Lib/http/cookiejar.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -430,25 +430,26 @@ def split_header_words(header_values):
430430
if pairs: result.append(pairs)
431431
return result
432432

433+
HEADER_JOIN_TOKEN_RE = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]+")
433434
HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])")
434435
def join_header_words(lists):
435436
"""Do the inverse (almost) of the conversion done by split_header_words.
436437
437438
Takes a list of lists of (key, value) pairs and produces a single header
438439
value. Attribute values are quoted if needed.
439440
440-
>>> join_header_words([[("text/plain", None), ("charset", "iso-8859-1")]])
441-
'text/plain; charset="iso-8859-1"'
442-
>>> join_header_words([[("text/plain", None)], [("charset", "iso-8859-1")]])
443-
'text/plain, charset="iso-8859-1"'
441+
>>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]])
442+
'text/plain; charset="iso-8859/1"'
443+
>>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]])
444+
'text/plain, charset="iso-8859/1"'
444445
445446
"""
446447
headers = []
447448
for pairs in lists:
448449
attr = []
449450
for k, v in pairs:
450451
if v is not None:
451-
if not re.search(r"^\w+$", v):
452+
if not HEADER_JOIN_TOKEN_RE.fullmatch(v):
452453
v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \
453454
v = '"%s"' % v
454455
k = "%s=%s" % (k, v)

Lib/test/test_http_cookiejar.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -276,11 +276,21 @@ def test_roundtrip(self):
276276
("foo=bar;bar=baz", "foo=bar; bar=baz"),
277277
('foo bar baz', "foo; bar; baz"),
278278
(r'foo="\"" bar="\\"', r'foo="\""; bar="\\"'),
279+
#("föo=bär", 'föo="bär"'),
279280
('foo,,,bar', 'foo, bar'),
280281
('foo=bar,bar=baz', 'foo=bar, bar=baz'),
282+
("foo=\n", 'foo=""'),
283+
('foo="\n"', 'foo="\n"'),
284+
('foo=bar\n', 'foo=bar'),
285+
('foo="bar\n"', 'foo="bar\n"'),
286+
('foo=bar\nbaz', 'foo=bar; baz'),
287+
('foo="bar\nbaz"', 'foo="bar\nbaz"'),
281288

282289
('text/html; charset=iso-8859-1',
283-
'text/html; charset="iso-8859-1"'),
290+
'text/html; charset=iso-8859-1'),
291+
292+
('text/html; charset="iso-8859/1"',
293+
'text/html; charset="iso-8859/1"'),
284294

285295
('foo="bar"; port="80,81"; discard, bar=baz',
286296
'foo=bar; port="80,81"; discard, bar=baz'),
@@ -541,7 +551,7 @@ def test_missing_value(self):
541551
self.assertIsNone(cookie.value)
542552
self.assertEqual(cookie.name, '"spam"')
543553
self.assertEqual(lwp_cookie_str(cookie), (
544-
r'"spam"; path="/foo/"; domain="www.acme.com"; '
554+
r'"spam"; path="/foo/"; domain=www.acme.com; '
545555
'path_spec; discard; version=0'))
546556
old_str = repr(c)
547557
c.save(ignore_expires=True, ignore_discard=True)
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
:func:`http.cookiejar.join_header_words` is now more similar to the original
2+
Perl version. It now quotes the same set of characters and always quote
3+
values that end with ``"\n"``.

0 commit comments

Comments
 (0)