Skip to content

Commit d33bc95

Browse files
committed
parser: Ignore CFWS in Message-ID header
We recently started stripping comments and folding white space from the In-Reply-To and References headers. Do so also for the Message-ID header. Because of the importance of the Message-ID header, we accept even non-compliant headers and because we now have a pattern for this, we also start (re-)accepting non-compliant In-Reply-To headers. Conflicts: patchwork/parser.py NOTE(stephenfin): Conflicts are due to the absence of commit 94d75a1 ("Blackify code") which we don't want to backport. Signed-off-by: Stephen Finucane <[email protected]> Related: #399
1 parent cd8f14f commit d33bc95

File tree

2 files changed

+104
-22
lines changed

2 files changed

+104
-22
lines changed

patchwork/parser.py

Lines changed: 60 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -236,15 +236,15 @@ def _find_series_by_references(project, mail):
236236
name, prefixes = clean_subject(subject, [project.linkname])
237237
version = parse_version(name, prefixes)
238238

239-
refs = find_references(mail)
240-
h = clean_header(mail.get('Message-Id'))
241-
if h:
242-
refs = [h] + refs
239+
msg_id = find_message_id(mail)
240+
refs = [msg_id] + find_references(mail)
243241

244242
for ref in refs:
245243
try:
246244
series = SeriesReference.objects.get(
247-
msgid=ref[:255], project=project).series
245+
msgid=ref[:255],
246+
project=project,
247+
).series
248248

249249
if series.version != version:
250250
# if the versions don't match, at least make sure these were
@@ -473,6 +473,34 @@ def find_headers(mail):
473473
return '\n'.join(strings)
474474

475475

476+
def find_message_id(mail):
477+
"""Extract the 'message-id' headers from a given mail and validate it.
478+
479+
The validation here is simply checking that the Message-ID is correctly
480+
formatted per RFC-2822. However, even if it's not we'll attempt to use what
481+
we're given because a patch tracked in Patchwork with janky threading is
482+
better than no patch whatsoever.
483+
"""
484+
header = clean_header(mail.get('Message-Id'))
485+
if not header:
486+
raise ValueError("Broken 'Message-Id' header")
487+
488+
msgid = _msgid_re.search(header)
489+
if msgid:
490+
msgid = msgid.group(0)
491+
else:
492+
# This is only info level since the admin likely can't do anything
493+
# about this
494+
logger.info(
495+
"Malformed 'Message-Id' header. The 'msg-id' component should be "
496+
"surrounded by angle brackets. Saving raw header. This may "
497+
"include comments and extra whitespace."
498+
)
499+
msgid = header.strip()
500+
501+
return msgid[:255]
502+
503+
476504
def find_references(mail):
477505
"""Construct a list of possible reply message ids.
478506
@@ -482,19 +510,33 @@ def find_references(mail):
482510
"""
483511
refs = []
484512

485-
if 'In-Reply-To' in mail:
486-
for in_reply_to in mail.get_all('In-Reply-To'):
487-
ref = _msgid_re.search(clean_header(in_reply_to))
488-
if ref:
489-
refs.append(ref.group(0))
513+
for header in mail.get_all('In-Reply-To', []):
514+
header = clean_header(header)
515+
if not header:
516+
continue
517+
ref = _msgid_re.search(header)
518+
if ref:
519+
ref = ref.group(0)
520+
else:
521+
logger.info(
522+
"Malformed 'In-Reply-To' header. The 'msg-id' component "
523+
"should be surrounded by angle brackets. Saving raw header. "
524+
"This may include comments and extra whitespace."
525+
)
526+
ref = header.strip()
527+
refs.append(ref)
490528

491-
if 'References' in mail:
492-
for references_header in mail.get_all('References'):
493-
references = _msgid_re.findall(clean_header(references_header))
494-
references.reverse()
495-
for ref in references:
496-
if ref not in refs:
497-
refs.append(ref)
529+
for header in mail.get_all('References', []):
530+
header = clean_header(header)
531+
if not header:
532+
continue
533+
# NOTE: We can't really implement a fallback here since without angle
534+
# brackets there is no obvious way to delimit headers.
535+
references = _msgid_re.findall(header)
536+
references.reverse()
537+
for ref in references:
538+
if ref not in refs:
539+
refs.append(ref)
498540

499541
return refs
500542

@@ -1056,11 +1098,7 @@ def parse_mail(mail, list_id=None):
10561098

10571099
# parse metadata
10581100

1059-
msgid = clean_header(mail.get('Message-Id'))
1060-
if not msgid:
1061-
raise ValueError("Broken 'Message-Id' header")
1062-
msgid = msgid[:255]
1063-
1101+
msgid = find_message_id(mail)
10641102
subject = mail.get('Subject')
10651103
name, prefixes = clean_subject(subject, [project.linkname])
10661104
is_comment = subject_check(subject)

patchwork/tests/test_parser.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1208,6 +1208,37 @@ def test_duplicate_coverletter(self):
12081208
self.assertEqual(Cover.objects.count(), 1)
12091209

12101210

1211+
class TestFindMessageID(TestCase):
1212+
def test_find_message_id__missing_header(self):
1213+
email = create_email('test')
1214+
del email['Message-Id']
1215+
email['Message-Id'] = ''
1216+
1217+
with self.assertRaises(ValueError) as cm:
1218+
parser.find_message_id(email)
1219+
self.assertIn("Broken 'Message-Id' header", str(cm.exeception))
1220+
1221+
def test_find_message_id__header_with_comments(self):
1222+
"""Test that we strip comments from the Message-ID field."""
1223+
message_id = '<[email protected]> (message ID with a comment)'
1224+
email = create_email('test', msgid=message_id)
1225+
1226+
expected = '<[email protected]>'
1227+
actual = parser.find_message_id(email)
1228+
1229+
self.assertEqual(expected, actual)
1230+
1231+
def test_find_message_id__invalid_header_fallback(self):
1232+
"""Test that we accept badly formatted Message-ID fields."""
1233+
message_id = '[email protected]>'
1234+
email = create_email('test', msgid=message_id)
1235+
1236+
expected = '[email protected]>'
1237+
actual = parser.find_message_id(email)
1238+
1239+
self.assertEqual(expected, actual)
1240+
1241+
12111242
class TestFindReferences(TestCase):
12121243
def test_find_references__header_with_comments(self):
12131244
"""Test that we strip comments from References, In-Reply-To fields."""
@@ -1253,6 +1284,19 @@ def test_find_references__duplicate_references(self):
12531284

12541285
self.assertEqual(expected, actual)
12551286

1287+
def test_find_references__invalid_header_fallback(self):
1288+
"""Test that we accept badly formatted In-Reply-To fields."""
1289+
message_id = '<F35DEAC7BCE34641BA9FAC6BCA4A12E70A85B341@SHSMSX104.ccr.corp.intel.com>' # noqa: E501
1290+
in_reply_to = '[email protected]>'
1291+
email = create_email('test', msgid=message_id, in_reply_to=in_reply_to)
1292+
1293+
expected = [
1294+
1295+
]
1296+
actual = parser.find_references(email)
1297+
1298+
self.assertEqual(expected, actual)
1299+
12561300

12571301
class TestCommentCorrelation(TestCase):
12581302

0 commit comments

Comments
 (0)