Skip to content

Commit 7546f7e

Browse files
committed
parser: Ignore CFWS in In-Reply-To, References headers
RFC2822 states that [1] a comment or folding white space is permitted to be inserted before or after a msg-id in in the Message-ID, In-Reply-To or References fields. Allow for this. [1] https://tools.ietf.org/html/rfc2822#section-3.6.4 Signed-off-by: Stephen Finucane <[email protected]> Reviewed-by: DJ Delorie <[email protected]> Closes: #399
1 parent b90ba52 commit 7546f7e

File tree

3 files changed

+84
-14
lines changed

3 files changed

+84
-14
lines changed

patchwork/parser.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
from patchwork.models import State
3232

3333

34+
_msgid_re = re.compile(r'<[^>]+>')
3435
_hunk_re = re.compile(r'^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@')
3536
_filename_re = re.compile(r'^(---|\+\+\+) (\S+)')
3637
list_id_headers = ['List-ID', 'X-Mailing-List', 'X-list']
@@ -502,19 +503,15 @@ def find_references(mail):
502503

503504
if 'In-Reply-To' in mail:
504505
for in_reply_to in mail.get_all('In-Reply-To'):
505-
r = clean_header(in_reply_to)
506-
if r:
507-
refs.append(r)
506+
ref = _msgid_re.search(clean_header(in_reply_to))
507+
if ref:
508+
refs.append(ref.group(0))
508509

509510
if 'References' in mail:
510511
for references_header in mail.get_all('References'):
511-
h = clean_header(references_header)
512-
if not h:
513-
continue
514-
references = h.split()
512+
references = _msgid_re.findall(clean_header(references_header))
515513
references.reverse()
516514
for ref in references:
517-
ref = ref.strip()
518515
if ref not in refs:
519516
refs.append(ref)
520517

patchwork/tests/test_parser.py

Lines changed: 68 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ def _create_email(
7474
sender=None,
7575
listid=None,
7676
in_reply_to=None,
77+
references=None,
7778
headers=None,
7879
):
7980
msg['Message-Id'] = msgid or make_msgid()
@@ -84,6 +85,9 @@ def _create_email(
8485
if in_reply_to:
8586
msg['In-Reply-To'] = in_reply_to
8687

88+
if references:
89+
msg['References'] = references
90+
8791
for header in headers or {}:
8892
msg[header] = headers[header]
8993

@@ -97,12 +101,20 @@ def create_email(
97101
sender=None,
98102
listid=None,
99103
in_reply_to=None,
104+
references=None,
100105
headers=None,
101106
):
102107
msg = MIMEText(content, _charset='us-ascii')
103108

104109
return _create_email(
105-
msg, msgid, subject, sender, listid, in_reply_to, headers
110+
msg,
111+
msgid,
112+
subject,
113+
sender,
114+
listid,
115+
in_reply_to,
116+
references,
117+
headers,
106118
)
107119

108120

@@ -1290,30 +1302,34 @@ def log_query_errors(execute, sql, params, many, context):
12901302

12911303
def test_duplicate_patch(self):
12921304
diff = read_patch('0001-add-line.patch')
1293-
m = create_email(diff, listid=self.listid, msgid='[email protected]')
1305+
m = create_email(diff, listid=self.listid, msgid='<[email protected]>')
12941306

12951307
self._test_duplicate_mail(m)
12961308

12971309
self.assertEqual(Patch.objects.count(), 1)
12981310

12991311
def test_duplicate_comment(self):
13001312
diff = read_patch('0001-add-line.patch')
1301-
m1 = create_email(diff, listid=self.listid, msgid='[email protected]')
1313+
m1 = create_email(
1314+
diff,
1315+
listid=self.listid,
1316+
msgid='<[email protected]>',
1317+
)
13021318
_parse_mail(m1)
13031319

13041320
m2 = create_email(
13051321
'test',
13061322
listid=self.listid,
1307-
1308-
in_reply_to='[email protected]',
1323+
msgid='<[email protected]>',
1324+
in_reply_to='<[email protected]>',
13091325
)
13101326
self._test_duplicate_mail(m2)
13111327

13121328
self.assertEqual(Patch.objects.count(), 1)
13131329
self.assertEqual(PatchComment.objects.count(), 1)
13141330

13151331
def test_duplicate_coverletter(self):
1316-
m = create_email('test', listid=self.listid, msgid='[email protected]')
1332+
m = create_email('test', listid=self.listid, msgid='<[email protected]>')
13171333
del m['Subject']
13181334
m['Subject'] = '[PATCH 0/1] test cover letter'
13191335

@@ -1322,6 +1338,52 @@ def test_duplicate_coverletter(self):
13221338
self.assertEqual(Cover.objects.count(), 1)
13231339

13241340

1341+
class TestFindReferences(TestCase):
1342+
def test_find_references__header_with_comments(self):
1343+
"""Test that we strip comments from References, In-Reply-To fields."""
1344+
in_reply_to = (
1345+
'<[email protected]> (message from\n'
1346+
' liqingqing on Thu, 1 Apr 2021 16:51:45 +0800)'
1347+
)
1348+
email = create_email('test', in_reply_to=in_reply_to)
1349+
1350+
expected = ['<[email protected]>']
1351+
actual = parser.find_references(email)
1352+
1353+
self.assertEqual(expected, actual)
1354+
1355+
def test_find_references__duplicate_references(self):
1356+
"""Test that we ignore duplicate message IDs in 'References'."""
1357+
message_id = '<[email protected]>'
1358+
in_reply_to = (
1359+
1360+
)
1361+
references = (
1362+
'<[email protected]>\n' # noqa: E501
1363+
' <CAE68AUOr7B5a2QvduJhH0kEHPi+sR9X3qfrtumgLxT1BK4VS+Q@mail.gmail.com>\n' # noqa: E501
1364+
1365+
1366+
1367+
)
1368+
email = create_email(
1369+
'test',
1370+
msgid=message_id,
1371+
in_reply_to=in_reply_to,
1372+
references=references,
1373+
)
1374+
1375+
expected = [
1376+
1377+
1378+
1379+
'<CAE68AUOr7B5a2QvduJhH0kEHPi+sR9X3qfrtumgLxT1BK4VS+Q@mail.gmail.com>', # noqa: E501
1380+
1381+
]
1382+
actual = parser.find_references(email)
1383+
1384+
self.assertEqual(expected, actual)
1385+
1386+
13251387
class TestCommentCorrelation(TestCase):
13261388
def test_find_patch_for_comment__no_reply(self):
13271389
"""Test behavior for mails that don't match anything we have."""
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
---
2+
fixes:
3+
- |
4+
Comments and whitespace are now correctly stripped from the ``Message-ID``,
5+
``In-Reply-To``, and ``References`` headers. One side effect of this change
6+
is that the parser is now stricter with regards to the format of the
7+
``msg-id`` component of these headers: all identifiers must now be
8+
surrounded by angle brackets, e.g. ``<[email protected]>``. This is
9+
mandated in the spec and a review of mailing lists archives suggest it is
10+
broadly adhered to. Without these markers, there is no way to delimit
11+
``msg-id`` from any surrounding comments and whitespace.

0 commit comments

Comments
 (0)