Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions Lib/email/_header_value_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -863,6 +863,10 @@ class MessageID(MsgID):
token_type = 'message-id'


class MessageIDList(TokenList):
token_type = "message-id-list"


class InvalidMessageID(MessageID):
token_type = 'invalid-message-id'

Expand Down Expand Up @@ -2141,6 +2145,23 @@ def get_msg_id(value):
return msg_id, value


def get_invalid_msg_id(value, endchars):
""" Read everything up to one of the chars in endchars, return InvalidMessageID
and rest of the value

"""
invalid_msg_id = InvalidMessageID()
while value and value[0] not in endchars:
if value[0] in PHRASE_ENDS:
invalid_msg_id.append(ValueTerminal(value[0],
'misplaced-special'))
value = value[1:]
else:
token, value = get_phrase(value)
invalid_msg_id.append(token)
return invalid_msg_id, value


def parse_message_id(value):
"""message-id = "Message-ID:" msg-id CRLF
"""
Expand All @@ -2161,6 +2182,37 @@ def parse_message_id(value):

return message_id

def parse_message_id_list(value):
""" in-reply-to = "In-Reply-To:" 1*msg-id CRLF
references = "References:" 1*msg-id CRLF
"""

message_id_list = MessageIDList()

# ignore initial CFWS
if value and value[0] in CFWS_LEADER:
_, value = get_cfws(value)

# required at least one msg-id
if not value:
message_id_list.defects.append(errors.InvalidHeaderDefect(
"Empty message-id-list"
))
return message_id_list

while value:
try:
token, value = get_msg_id(value)
message_id_list.append(MessageID([token]))
except errors.HeaderParseError:
token, value = get_invalid_msg_id(value, "<")
message_id_list.append(token)
message_id_list.defects.append(
errors.InvalidHeaderDefect("Invalid msg-id: {!r}".format(str(token))))


return message_id_list

#
# XXX: As I begin to add additional header parsers, I'm realizing we probably
# have two level of parser routines: the get_XXX methods that get a token in
Expand Down
14 changes: 14 additions & 0 deletions Lib/email/headerregistry.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,18 @@ def parse(cls, value, kwds):
kwds['defects'].extend(parse_tree.all_defects)


class MessageIDListHeader:

max_count = None
value_parser = staticmethod(parser.parse_message_id_list)

@classmethod
def parse(cls, value, kwds):
kwds['parse_tree'] = parse_tree = cls.value_parser(value)
kwds['decoded'] = str(parse_tree)
kwds['defects'].extend(parse_tree.all_defects)


# The header factory #

_default_header_map = {
Expand All @@ -557,6 +569,8 @@ def parse(cls, value, kwds):
'content-disposition': ContentDispositionHeader,
'content-transfer-encoding': ContentTransferEncodingHeader,
'message-id': MessageIDHeader,
'references': MessageIDListHeader,
'in-reply-to': MessageIDListHeader,
}

class HeaderRegistry:
Expand Down
90 changes: 90 additions & 0 deletions Lib/test/test_email/test__header_value_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2789,6 +2789,96 @@ def test_get_msg_id_ws_only_local(self):
)
self.assertEqual(msg_id.token_type, 'msg-id')

def test_parse_message_id_list_with_one_id(self):
text = "<[email protected]>"
msg_id_list = self._test_parse_x(
parser.parse_message_id_list,
text,
text,
text,
[],
)
self.assertEqual(msg_id_list.token_type, 'message-id-list')

def test_parse_message_id_list(self):
text = "<[email protected]> <[email protected]> <[email protected]>"
self._test_parse_x(
parser.parse_message_id_list,
text,
text,
text,
[],
)

def test_parse_message_id_list_extra_white_spaces(self):
text = "<[email protected]> <[email protected]> <[email protected]>"
self._test_parse_x(
parser.parse_message_id_list,
text,
text,
"<[email protected]> <[email protected]> <[email protected]>",
[],
)

def test_parse_message_id_list_with_invalid_msg_id(self):
text = "<[email protected]> <[email protected]> abc <[email protected]>"
self._test_parse_x(
parser.parse_message_id_list,
text,
text,
text,
[errors.InvalidHeaderDefect], # "Invalid msg-id: 'abc '"
)

def test_parse_message_id_list_endswith_invalid_msg_id(self):
text = "<[email protected]> <[email protected]> abc"
self._test_parse_x(
parser.parse_message_id_list,
text,
text,
text,
[errors.InvalidHeaderDefect], # "Invalid msg-id: 'abc '"
)

def test_parse_message_id_list_with_no_value(self):
text = ""
self._test_parse_x(
parser.parse_message_id_list,
text,
text,
text,
[errors.InvalidHeaderDefect], # "Empty message-id-list"
)

def test_parse_message_id_list_with_invalid_id_only(self):
text = "abc"
self._test_parse_x(
parser.parse_message_id_list,
text,
text,
text,
[errors.InvalidHeaderDefect], # "Invalid msg-id: 'abc '"
)

def test_parse_message_id_list_startswith_invalid_id(self):
text = "abc <[email protected]> <[email protected]> abc"
self._test_parse_x(
parser.parse_message_id_list,
text,
text,
text,
[errors.InvalidHeaderDefect, errors.InvalidHeaderDefect], # "Invalid msg-id: 'abc '"
)

def test_parse_message_id_list_with_leading_whitespace(self):
text = " <[email protected]> <[email protected]>"
self._test_parse_x(
parser.parse_message_id_list,
text,
text.strip(),
text.strip(),
[],
)


@parameterize
Expand Down
75 changes: 75 additions & 0 deletions Lib/test/test_email/test_message.py
Original file line number Diff line number Diff line change
Expand Up @@ -1031,6 +1031,81 @@ def test_get_body_malformed(self):
# AttributeError: 'str' object has no attribute 'is_attachment'
m.get_body()

def test_long_references_header(self):
msg = textwrap.dedent("""\
Message-ID: <long-but-perfectly-valid-message-id-that-does-not-end-up-qp-encoded@example.com>
References: <reference-to-long-but-perfectly-valid-message-id-that-gets-qp-encoded@example.com>
From: Foo Bar <[email protected]>

No content
""")
m = self._str_msg(msg)
msg_bytes = (b'Message-ID:'
b' <long-but-perfectly-valid-message-id-that-does-not-end-up-qp-encoded@example.com>\n'
b'References:'
b' <reference-to-long-but-perfectly-valid-message-id-that-gets-qp-encoded@example.com>\n'
b'From: Foo Bar <[email protected]>\n\nNo content\n')
self.assertEqual(m.as_bytes(), msg_bytes)

def test_long_in_reply_to_header(self):
msg = textwrap.dedent("""\
Message-ID: <long-but-perfectly-valid-message-id-that-does-not-end-up-qp-encoded@example.com>
In-Reply-To: <reference-to-long-but-perfectly-valid-message-id-that-gets-qp-encoded@example.com>
From: Foo Bar <[email protected]>

No content
""")
m = self._str_msg(msg)
msg_bytes = (b'Message-ID:'
b' <long-but-perfectly-valid-message-id-that-does-not-end-up-qp-encoded@example.com>\n'
b'In-Reply-To:'
b' <reference-to-long-but-perfectly-valid-message-id-that-gets-qp-encoded@example.com>\n'
b'From: Foo Bar <[email protected]>\n\nNo content\n')
self.assertEqual(m.as_bytes(), msg_bytes)

def test_msg_id_list_in_header(self):
msg_ids = " ".join(["<reference-to-long-but-perfectly-valid-message-id-that-gets-qp-encoded@example.com>"] * 5)
msg = textwrap.dedent(f"""\
Message-ID: <long-but-perfectly-valid-message-id-that-does-not-end-up-qp-encoded@example.com>
In-Reply-To: {msg_ids}
References: {msg_ids}
From: Foo Bar <[email protected]>

No content
""")
m = self._str_msg(msg)
msg_bytes = (b'Message-ID:'
b' <long-but-perfectly-valid-message-id-that-does-not-end-up-qp-encoded@example.com>\n'
b'In-Reply-To:'
b' <reference-to-long-but-perfectly-valid-message-id-that-gets-qp-encoded@example.com>\n'
b' <reference-to-long-but-perfectly-valid-message-id-that-gets-qp-encoded@example.com>\n'
b' <reference-to-long-but-perfectly-valid-message-id-that-gets-qp-encoded@example.com>\n'
b' <reference-to-long-but-perfectly-valid-message-id-that-gets-qp-encoded@example.com>\n'
b' <reference-to-long-but-perfectly-valid-message-id-that-gets-qp-encoded@example.com>\n'
b'References:'
b' <reference-to-long-but-perfectly-valid-message-id-that-gets-qp-encoded@example.com>\n'
b' <reference-to-long-but-perfectly-valid-message-id-that-gets-qp-encoded@example.com>\n'
b' <reference-to-long-but-perfectly-valid-message-id-that-gets-qp-encoded@example.com>\n'
b' <reference-to-long-but-perfectly-valid-message-id-that-gets-qp-encoded@example.com>\n'
b' <reference-to-long-but-perfectly-valid-message-id-that-gets-qp-encoded@example.com>\n'
b'From: Foo Bar <[email protected]>\n\nNo content\n')
self.assertEqual(m.as_bytes(), msg_bytes)

def test_no_references_value(self):
msg = textwrap.dedent("""\
Message-ID: <long-but-perfectly-valid-message-id-that-does-not-end-up-qp-encoded@example.com>
References:
From: Foo Bar <[email protected]>

No content
""")
m = self._str_msg(msg)
msg_bytes = (b'Message-ID:'
b' <long-but-perfectly-valid-message-id-that-does-not-end-up-qp-encoded@example.com>\n'
b'References: \n'
b'From: Foo Bar <[email protected]>\n\nNo content\n')
self.assertEqual(m.as_bytes(), msg_bytes)


class TestMIMEPart(TestEmailMessageBase, TestEmailBase):
# Doing the full test run here may seem a bit redundant, since the two
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fixed email headers ``References`` and ``In-Reply-To`` being treated as
unstructured.
Loading