Skip to content

Commit cd8f14f

Browse files
committed
parser: Ignore CFWS in In-Reply-To, References headers
RFC2822 states that [1] a comment or folding white space is permitted to be inserted before or after a msg-id in in the Message-ID, In-Reply-To or References fields. Allow for this. [1] https://tools.ietf.org/html/rfc2822#section-3.6.4 Conflicts: patchwork/tests/test_parser.py NOTE(stephenfin): Conflicts are due to the absence of commits f5cd521 ("parser: Add 'X-Patchwork-Action-Required' header") and 94d75a1 ("Blackify code"), neither of which we want to backport. Signed-off-by: Stephen Finucane <[email protected]> Reviewed-by: DJ Delorie <[email protected]> Closes: #399
1 parent 215cede commit cd8f14f

File tree

3 files changed

+105
-17
lines changed

3 files changed

+105
-17
lines changed

patchwork/parser.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
from patchwork.models import State
3232

3333

34+
_msgid_re = re.compile(r'<[^>]+>')
3435
_hunk_re = re.compile(r'^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@')
3536
_filename_re = re.compile(r'^(---|\+\+\+) (\S+)')
3637
list_id_headers = ['List-ID', 'X-Mailing-List', 'X-list']
@@ -483,19 +484,15 @@ def find_references(mail):
483484

484485
if 'In-Reply-To' in mail:
485486
for in_reply_to in mail.get_all('In-Reply-To'):
486-
r = clean_header(in_reply_to)
487-
if r:
488-
refs.append(r)
487+
ref = _msgid_re.search(clean_header(in_reply_to))
488+
if ref:
489+
refs.append(ref.group(0))
489490

490491
if 'References' in mail:
491492
for references_header in mail.get_all('References'):
492-
h = clean_header(references_header)
493-
if not h:
494-
continue
495-
references = h.split()
493+
references = _msgid_re.findall(clean_header(references_header))
496494
references.reverse()
497495
for ref in references:
498-
ref = ref.strip()
499496
if ref not in refs:
500497
refs.append(ref)
501498

patchwork/tests/test_parser.py

Lines changed: 89 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -68,22 +68,48 @@ def read_mail(filename, project=None):
6868
return mail
6969

7070

71-
def _create_email(msg, msgid=None, sender=None, listid=None, in_reply_to=None):
71+
def _create_email(
72+
msg,
73+
msgid=None,
74+
subject=None,
75+
sender=None,
76+
listid=None,
77+
in_reply_to=None,
78+
references=None,
79+
):
7280
msg['Message-Id'] = msgid or make_msgid()
7381
msg['Subject'] = 'Test subject'
7482
msg['From'] = sender or 'Test Author <[email protected]>'
7583
msg['List-Id'] = listid or 'test.example.com'
7684
if in_reply_to:
7785
msg['In-Reply-To'] = in_reply_to
7886

87+
if references:
88+
msg['References'] = references
89+
7990
return msg
8091

8192

82-
def create_email(content, msgid=None, sender=None, listid=None,
83-
in_reply_to=None):
93+
def create_email(
94+
content,
95+
msgid=None,
96+
subject=None,
97+
sender=None,
98+
listid=None,
99+
in_reply_to=None,
100+
references=None,
101+
):
84102
msg = MIMEText(content, _charset='us-ascii')
85103

86-
return _create_email(msg, msgid, sender, listid, in_reply_to)
104+
return _create_email(
105+
msg,
106+
msgid,
107+
subject,
108+
sender,
109+
listid,
110+
in_reply_to,
111+
references,
112+
)
87113

88114

89115
def parse_mail(*args, **kwargs):
@@ -1146,26 +1172,34 @@ def log_query_errors(execute, sql, params, many, context):
11461172

11471173
def test_duplicate_patch(self):
11481174
diff = read_patch('0001-add-line.patch')
1149-
m = create_email(diff, listid=self.listid, msgid='[email protected]')
1175+
m = create_email(diff, listid=self.listid, msgid='<[email protected]>')
11501176

11511177
self._test_duplicate_mail(m)
11521178

11531179
self.assertEqual(Patch.objects.count(), 1)
11541180

11551181
def test_duplicate_comment(self):
11561182
diff = read_patch('0001-add-line.patch')
1157-
m1 = create_email(diff, listid=self.listid, msgid='[email protected]')
1183+
m1 = create_email(
1184+
diff,
1185+
listid=self.listid,
1186+
msgid='<[email protected]>',
1187+
)
11581188
_parse_mail(m1)
11591189

1160-
m2 = create_email('test', listid=self.listid, msgid='[email protected]',
1161-
in_reply_to='[email protected]')
1190+
m2 = create_email(
1191+
'test',
1192+
listid=self.listid,
1193+
msgid='<[email protected]>',
1194+
in_reply_to='<[email protected]>',
1195+
)
11621196
self._test_duplicate_mail(m2)
11631197

11641198
self.assertEqual(Patch.objects.count(), 1)
11651199
self.assertEqual(PatchComment.objects.count(), 1)
11661200

11671201
def test_duplicate_coverletter(self):
1168-
m = create_email('test', listid=self.listid, msgid='[email protected]')
1202+
m = create_email('test', listid=self.listid, msgid='<[email protected]>')
11691203
del m['Subject']
11701204
m['Subject'] = '[PATCH 0/1] test cover letter'
11711205

@@ -1174,6 +1208,52 @@ def test_duplicate_coverletter(self):
11741208
self.assertEqual(Cover.objects.count(), 1)
11751209

11761210

1211+
class TestFindReferences(TestCase):
1212+
def test_find_references__header_with_comments(self):
1213+
"""Test that we strip comments from References, In-Reply-To fields."""
1214+
in_reply_to = (
1215+
'<[email protected]> (message from\n'
1216+
' liqingqing on Thu, 1 Apr 2021 16:51:45 +0800)'
1217+
)
1218+
email = create_email('test', in_reply_to=in_reply_to)
1219+
1220+
expected = ['<[email protected]>']
1221+
actual = parser.find_references(email)
1222+
1223+
self.assertEqual(expected, actual)
1224+
1225+
def test_find_references__duplicate_references(self):
1226+
"""Test that we ignore duplicate message IDs in 'References'."""
1227+
message_id = '<[email protected]>'
1228+
in_reply_to = (
1229+
1230+
)
1231+
references = (
1232+
'<[email protected]>\n' # noqa: E501
1233+
' <CAE68AUOr7B5a2QvduJhH0kEHPi+sR9X3qfrtumgLxT1BK4VS+Q@mail.gmail.com>\n' # noqa: E501
1234+
1235+
1236+
1237+
)
1238+
email = create_email(
1239+
'test',
1240+
msgid=message_id,
1241+
in_reply_to=in_reply_to,
1242+
references=references,
1243+
)
1244+
1245+
expected = [
1246+
1247+
1248+
1249+
'<CAE68AUOr7B5a2QvduJhH0kEHPi+sR9X3qfrtumgLxT1BK4VS+Q@mail.gmail.com>', # noqa: E501
1250+
1251+
]
1252+
actual = parser.find_references(email)
1253+
1254+
self.assertEqual(expected, actual)
1255+
1256+
11771257
class TestCommentCorrelation(TestCase):
11781258

11791259
def test_find_patch_for_comment__no_reply(self):
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
---
2+
fixes:
3+
- |
4+
Comments and whitespace are now correctly stripped from the ``Message-ID``,
5+
``In-Reply-To``, and ``References`` headers. One side effect of this change
6+
is that the parser is now stricter with regards to the format of the
7+
``msg-id`` component of these headers: all identifiers must now be
8+
surrounded by angle brackets, e.g. ``<[email protected]>``. This is
9+
mandated in the spec and a review of mailing lists archives suggest it is
10+
broadly adhered to. Without these markers, there is no way to delimit
11+
``msg-id`` from any surrounding comments and whitespace.

0 commit comments

Comments
 (0)