Skip to content

Commit ad99940

Browse files
committed
parser: Ignore CFWS in Message-ID header
We recently started stripping comments and folding white space from the In-Reply-To and References headers. Do so also for the Message-ID header. Because of the importance of the Message-ID header, we accept even non-compliant headers and because we now have a pattern for this, we also start (re-)accepting non-compliant In-Reply-To headers. Signed-off-by: Stephen Finucane <[email protected]> Related: #399
1 parent 7546f7e commit ad99940

File tree

2 files changed

+103
-22
lines changed

2 files changed

+103
-22
lines changed

patchwork/parser.py

Lines changed: 59 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -246,15 +246,14 @@ def _find_series_by_references(project, mail):
246246
name, prefixes = clean_subject(subject, [project.linkname])
247247
version = parse_version(name, prefixes)
248248

249-
refs = find_references(mail)
250-
h = clean_header(mail.get('Message-Id'))
251-
if h:
252-
refs = [h] + refs
249+
msg_id = find_message_id(mail)
250+
refs = [msg_id] + find_references(mail)
253251

254252
for ref in refs:
255253
try:
256254
series = SeriesReference.objects.get(
257-
msgid=ref[:255], project=project
255+
msgid=ref[:255],
256+
project=project,
258257
).series
259258

260259
if series.version != version:
@@ -492,6 +491,34 @@ def find_headers(mail):
492491
return '\n'.join(strings)
493492

494493

494+
def find_message_id(mail):
495+
"""Extract the 'message-id' headers from a given mail and validate it.
496+
497+
The validation here is simply checking that the Message-ID is correctly
498+
formatted per RFC-2822. However, even if it's not we'll attempt to use what
499+
we're given because a patch tracked in Patchwork with janky threading is
500+
better than no patch whatsoever.
501+
"""
502+
header = clean_header(mail.get('Message-Id'))
503+
if not header:
504+
raise ValueError("Broken 'Message-Id' header")
505+
506+
msgid = _msgid_re.search(header)
507+
if msgid:
508+
msgid = msgid.group(0)
509+
else:
510+
# This is only info level since the admin likely can't do anything
511+
# about this
512+
logger.info(
513+
"Malformed 'Message-Id' header. The 'msg-id' component should be "
514+
"surrounded by angle brackets. Saving raw header. This may "
515+
"include comments and extra whitespace."
516+
)
517+
msgid = header.strip()
518+
519+
return msgid[:255]
520+
521+
495522
def find_references(mail):
496523
"""Construct a list of possible reply message ids.
497524
@@ -501,19 +528,33 @@ def find_references(mail):
501528
"""
502529
refs = []
503530

504-
if 'In-Reply-To' in mail:
505-
for in_reply_to in mail.get_all('In-Reply-To'):
506-
ref = _msgid_re.search(clean_header(in_reply_to))
507-
if ref:
508-
refs.append(ref.group(0))
531+
for header in mail.get_all('In-Reply-To', []):
532+
header = clean_header(header)
533+
if not header:
534+
continue
535+
ref = _msgid_re.search(header)
536+
if ref:
537+
ref = ref.group(0)
538+
else:
539+
logger.info(
540+
"Malformed 'In-Reply-To' header. The 'msg-id' component "
541+
"should be surrounded by angle brackets. Saving raw header. "
542+
"This may include comments and extra whitespace."
543+
)
544+
ref = header.strip()
545+
refs.append(ref)
509546

510-
if 'References' in mail:
511-
for references_header in mail.get_all('References'):
512-
references = _msgid_re.findall(clean_header(references_header))
513-
references.reverse()
514-
for ref in references:
515-
if ref not in refs:
516-
refs.append(ref)
547+
for header in mail.get_all('References', []):
548+
header = clean_header(header)
549+
if not header:
550+
continue
551+
# NOTE: We can't really implement a fallback here since without angle
552+
# brackets there is no obvious way to delimit headers.
553+
references = _msgid_re.findall(header)
554+
references.reverse()
555+
for ref in references:
556+
if ref not in refs:
557+
refs.append(ref)
517558

518559
return refs
519560

@@ -1085,11 +1126,7 @@ def parse_mail(mail, list_id=None):
10851126

10861127
# parse metadata
10871128

1088-
msgid = clean_header(mail.get('Message-Id'))
1089-
if not msgid:
1090-
raise ValueError("Broken 'Message-Id' header")
1091-
msgid = msgid[:255]
1092-
1129+
msgid = find_message_id(mail)
10931130
subject = mail.get('Subject')
10941131
name, prefixes = clean_subject(subject, [project.linkname])
10951132
is_comment = subject_check(subject)

patchwork/tests/test_parser.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1338,6 +1338,37 @@ def test_duplicate_coverletter(self):
13381338
self.assertEqual(Cover.objects.count(), 1)
13391339

13401340

1341+
class TestFindMessageID(TestCase):
1342+
def test_find_message_id__missing_header(self):
1343+
email = create_email('test')
1344+
del email['Message-Id']
1345+
email['Message-Id'] = ''
1346+
1347+
with self.assertRaises(ValueError) as cm:
1348+
parser.find_message_id(email)
1349+
self.assertIn("Broken 'Message-Id' header", str(cm.exeception))
1350+
1351+
def test_find_message_id__header_with_comments(self):
1352+
"""Test that we strip comments from the Message-ID field."""
1353+
message_id = '<[email protected]> (message ID with a comment)'
1354+
email = create_email('test', msgid=message_id)
1355+
1356+
expected = '<[email protected]>'
1357+
actual = parser.find_message_id(email)
1358+
1359+
self.assertEqual(expected, actual)
1360+
1361+
def test_find_message_id__invalid_header_fallback(self):
1362+
"""Test that we accept badly formatted Message-ID fields."""
1363+
message_id = '[email protected]>'
1364+
email = create_email('test', msgid=message_id)
1365+
1366+
expected = '[email protected]>'
1367+
actual = parser.find_message_id(email)
1368+
1369+
self.assertEqual(expected, actual)
1370+
1371+
13411372
class TestFindReferences(TestCase):
13421373
def test_find_references__header_with_comments(self):
13431374
"""Test that we strip comments from References, In-Reply-To fields."""
@@ -1383,6 +1414,19 @@ def test_find_references__duplicate_references(self):
13831414

13841415
self.assertEqual(expected, actual)
13851416

1417+
def test_find_references__invalid_header_fallback(self):
1418+
"""Test that we accept badly formatted In-Reply-To fields."""
1419+
message_id = '<F35DEAC7BCE34641BA9FAC6BCA4A12E70A85B341@SHSMSX104.ccr.corp.intel.com>' # noqa: E501
1420+
in_reply_to = '[email protected]>'
1421+
email = create_email('test', msgid=message_id, in_reply_to=in_reply_to)
1422+
1423+
expected = [
1424+
1425+
]
1426+
actual = parser.find_references(email)
1427+
1428+
self.assertEqual(expected, actual)
1429+
13861430

13871431
class TestCommentCorrelation(TestCase):
13881432
def test_find_patch_for_comment__no_reply(self):

0 commit comments

Comments
 (0)