python · RanKKI · Jan 8, 2025 · Jan 8, 2025 · Jan 8, 2025
@@ -863,6 +863,10 @@ class MessageID(MsgID):
     token_type = 'message-id'
 
 
+class MessageIDList(TokenList):
+    token_type = "message-id-list"
+
+
 class InvalidMessageID(MessageID):
     token_type = 'invalid-message-id'
 
@@ -2141,6 +2145,23 @@ def get_msg_id(value):
     return msg_id, value
 
 
+def get_invalid_msg_id(value, endchars):
+    """ Read everything up to one of the chars in endchars, return InvalidMessageID
+    and rest of the value
+
+    """
+    invalid_msg_id = InvalidMessageID()
+    while value and value[0] not in endchars:
+        if value[0] in PHRASE_ENDS:
+            invalid_msg_id.append(ValueTerminal(value[0],
+                                                 'misplaced-special'))
+            value = value[1:]
+        else:
+            token, value = get_phrase(value)
+            invalid_msg_id.append(token)
+    return invalid_msg_id, value
+
+
 def parse_message_id(value):
     """message-id      =   "Message-ID:" msg-id CRLF
     """
@@ -2161,6 +2182,37 @@ def parse_message_id(value):
 
     return message_id
 
+def parse_message_id_list(value):
+    """ in-reply-to     =   "In-Reply-To:" 1*msg-id CRLF
+        references      =   "References:" 1*msg-id CRLF
+    """
+
+    message_id_list = MessageIDList()
+
+    # ignore initial CFWS
+    if value and value[0] in CFWS_LEADER:
+        _, value = get_cfws(value)
+
+    # required at least one msg-id
+    if not value:
+        message_id_list.defects.append(errors.InvalidHeaderDefect(
+            "Empty message-id-list"
+        ))
+        return message_id_list
+
+    while value:
+        try:
+            token, value = get_msg_id(value)
+            message_id_list.append(MessageID([token]))
+        except errors.HeaderParseError:
+            token, value = get_invalid_msg_id(value, "<")
+            message_id_list.append(token)
+            message_id_list.defects.append(
+                errors.InvalidHeaderDefect("Invalid msg-id: {!r}".format(str(token))))
+
+
+    return message_id_list
+
 #
 # XXX: As I begin to add additional header parsers, I'm realizing we probably
 # have two level of parser routines: the get_XXX methods that get a token in

@@ -534,6 +534,18 @@ def parse(cls, value, kwds):
         kwds['defects'].extend(parse_tree.all_defects)
 
 
+class MessageIDListHeader:
+
+    max_count = None
+    value_parser = staticmethod(parser.parse_message_id_list)
+
+    @classmethod
+    def parse(cls, value, kwds):
+        kwds['parse_tree'] = parse_tree = cls.value_parser(value)
+        kwds['decoded'] = str(parse_tree)
+        kwds['defects'].extend(parse_tree.all_defects)
+
+
 # The header factory #
 
 _default_header_map = {
@@ -557,6 +569,8 @@ def parse(cls, value, kwds):
     'content-disposition':          ContentDispositionHeader,
     'content-transfer-encoding':    ContentTransferEncodingHeader,
     'message-id':                   MessageIDHeader,
+    'references':                   MessageIDListHeader,
+    'in-reply-to':                  MessageIDListHeader,
     }
 
 class HeaderRegistry:

@@ -2789,6 +2789,96 @@ def test_get_msg_id_ws_only_local(self):
         )
         self.assertEqual(msg_id.token_type, 'msg-id')
 
+    def test_parse_message_id_list_with_one_id(self):
+        text = "<[email protected]>"
+        msg_id_list = self._test_parse_x(
+            parser.parse_message_id_list,
+            text,
+            text,
+            text,
+            [],
+        )
+        self.assertEqual(msg_id_list.token_type, 'message-id-list')
+
+    def test_parse_message_id_list(self):
+        text = "<[email protected]> <[email protected]> <[email protected]>"
+        self._test_parse_x(
+            parser.parse_message_id_list,
+            text,
+            text,
+            text,
+            [],
+        )
+
+    def test_parse_message_id_list_extra_white_spaces(self):
+        text = "<[email protected]> <[email protected]>    <[email protected]>"
+        self._test_parse_x(
+            parser.parse_message_id_list,
+            text,
+            text,
+            "<[email protected]> <[email protected]> <[email protected]>",
+            [],
+        )
+
+    def test_parse_message_id_list_with_invalid_msg_id(self):
+        text = "<[email protected]> <[email protected]> abc <[email protected]>"
+        self._test_parse_x(
+            parser.parse_message_id_list,
+            text,
+            text,
+            text,
+            [errors.InvalidHeaderDefect], # "Invalid msg-id: 'abc '"
+        )
+
+    def test_parse_message_id_list_endswith_invalid_msg_id(self):
+        text = "<[email protected]> <[email protected]> abc"
+        self._test_parse_x(
+            parser.parse_message_id_list,
+            text,
+            text,
+            text,
+            [errors.InvalidHeaderDefect], # "Invalid msg-id: 'abc '"
+        )
+
+    def test_parse_message_id_list_with_no_value(self):
+        text = ""
+        self._test_parse_x(
+            parser.parse_message_id_list,
+            text,
+            text,
+            text,
+            [errors.InvalidHeaderDefect], # "Empty message-id-list"
+        )
+
+    def test_parse_message_id_list_with_invalid_id_only(self):
+        text = "abc"
+        self._test_parse_x(
+            parser.parse_message_id_list,
+            text,
+            text,
+            text,
+            [errors.InvalidHeaderDefect], # "Invalid msg-id: 'abc '"
+        )
+
+    def test_parse_message_id_list_startswith_invalid_id(self):
+        text = "abc <[email protected]> <[email protected]> abc"
+        self._test_parse_x(
+            parser.parse_message_id_list,
+            text,
+            text,
+            text,
+            [errors.InvalidHeaderDefect, errors.InvalidHeaderDefect], # "Invalid msg-id: 'abc '"
+        )
+
+    def test_parse_message_id_list_with_leading_whitespace(self):
+        text = "    <[email protected]> <[email protected]>"
+        self._test_parse_x(
+            parser.parse_message_id_list,
+            text,
+            text.strip(),
+            text.strip(),
+            [],
+        )
 
 
 @parameterize

@@ -1031,6 +1031,81 @@ def test_get_body_malformed(self):
         # AttributeError: 'str' object has no attribute 'is_attachment'
         m.get_body()
 
+    def test_long_references_header(self):
+        msg = textwrap.dedent("""\
+            Message-ID: <long-but-perfectly-valid-message-id-that-does-not-end-up-qp-encoded@example.com>
+            References: <reference-to-long-but-perfectly-valid-message-id-that-gets-qp-encoded@example.com>
+            From: Foo Bar <[email protected]>
+
+            No content
+            """)
+        m = self._str_msg(msg)
+        msg_bytes = (b'Message-ID:'
+                     b' <long-but-perfectly-valid-message-id-that-does-not-end-up-qp-encoded@example.com>\n'
+                     b'References:'
+                     b' <reference-to-long-but-perfectly-valid-message-id-that-gets-qp-encoded@example.com>\n'
+                     b'From: Foo Bar <[email protected]>\n\nNo content\n')
+        self.assertEqual(m.as_bytes(), msg_bytes)
+
+    def test_long_in_reply_to_header(self):
+        msg = textwrap.dedent("""\
+            Message-ID: <long-but-perfectly-valid-message-id-that-does-not-end-up-qp-encoded@example.com>
+            In-Reply-To: <reference-to-long-but-perfectly-valid-message-id-that-gets-qp-encoded@example.com>
+            From: Foo Bar <[email protected]>
+
+            No content
+            """)
+        m = self._str_msg(msg)
+        msg_bytes = (b'Message-ID:'
+                     b' <long-but-perfectly-valid-message-id-that-does-not-end-up-qp-encoded@example.com>\n'
+                     b'In-Reply-To:'
+                     b' <reference-to-long-but-perfectly-valid-message-id-that-gets-qp-encoded@example.com>\n'
+                     b'From: Foo Bar <[email protected]>\n\nNo content\n')
+        self.assertEqual(m.as_bytes(), msg_bytes)
+
+    def test_msg_id_list_in_header(self):
+        msg_ids = " ".join(["<reference-to-long-but-perfectly-valid-message-id-that-gets-qp-encoded@example.com>"] * 5)
+        msg = textwrap.dedent(f"""\
+            Message-ID: <long-but-perfectly-valid-message-id-that-does-not-end-up-qp-encoded@example.com>
+            In-Reply-To: {msg_ids}
+            References: {msg_ids}
+            From: Foo Bar <[email protected]>
+
+            No content
+            """)
+        m = self._str_msg(msg)
+        msg_bytes = (b'Message-ID:'
+                     b' <long-but-perfectly-valid-message-id-that-does-not-end-up-qp-encoded@example.com>\n'
+                     b'In-Reply-To:'
+                     b' <reference-to-long-but-perfectly-valid-message-id-that-gets-qp-encoded@example.com>\n'
+                     b' <reference-to-long-but-perfectly-valid-message-id-that-gets-qp-encoded@example.com>\n'
+                     b' <reference-to-long-but-perfectly-valid-message-id-that-gets-qp-encoded@example.com>\n'
+                     b' <reference-to-long-but-perfectly-valid-message-id-that-gets-qp-encoded@example.com>\n'
+                     b' <reference-to-long-but-perfectly-valid-message-id-that-gets-qp-encoded@example.com>\n'
+                     b'References:'
+                     b' <reference-to-long-but-perfectly-valid-message-id-that-gets-qp-encoded@example.com>\n'
+                     b' <reference-to-long-but-perfectly-valid-message-id-that-gets-qp-encoded@example.com>\n'
+                     b' <reference-to-long-but-perfectly-valid-message-id-that-gets-qp-encoded@example.com>\n'
+                     b' <reference-to-long-but-perfectly-valid-message-id-that-gets-qp-encoded@example.com>\n'
+                     b' <reference-to-long-but-perfectly-valid-message-id-that-gets-qp-encoded@example.com>\n'
+                     b'From: Foo Bar <[email protected]>\n\nNo content\n')
+        self.assertEqual(m.as_bytes(), msg_bytes)
+
+    def test_no_references_value(self):
+        msg = textwrap.dedent("""\
+            Message-ID: <long-but-perfectly-valid-message-id-that-does-not-end-up-qp-encoded@example.com>
+            References:
+            From: Foo Bar <[email protected]>
+
+            No content
+            """)
+        m = self._str_msg(msg)
+        msg_bytes = (b'Message-ID:'
+                     b' <long-but-perfectly-valid-message-id-that-does-not-end-up-qp-encoded@example.com>\n'
+                     b'References: \n'
+                     b'From: Foo Bar <[email protected]>\n\nNo content\n')
+        self.assertEqual(m.as_bytes(), msg_bytes)
+
 
 class TestMIMEPart(TestEmailMessageBase, TestEmailBase):
     # Doing the full test run here may seem a bit redundant, since the two

diff --git a/Misc/NEWS.d/next/Library/2025-01-08-13-16-37.gh-issue-100911.IzrEkV.rst b/Misc/NEWS.d/next/Library/2025-01-08-13-16-37.gh-issue-100911.IzrEkV.rst
@@ -0,0 +1,2 @@
+Fixed email headers ``References`` and ``In-Reply-To`` being treated as
+unstructured.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		Fixed email headers ``References`` and ``In-Reply-To`` being treated as
		unstructured.