From c02bc473f19d2c0b0f4c5a68e0f788dbf93f14be Mon Sep 17 00:00:00 2001 From: RanKKI Date: Thu, 24 Oct 2024 15:52:30 +1100 Subject: [PATCH 01/14] fix: redundant WSP when parsed email --- Lib/email/policy.py | 28 ++++++++++++++++++++---- Lib/test/test_email/test_message.py | 33 +++++++++++++++++++++++++++-- 2 files changed, 55 insertions(+), 6 deletions(-) diff --git a/Lib/email/policy.py b/Lib/email/policy.py index 46b7de5bb6d8ae..503faf16ba086e 100644 --- a/Lib/email/policy.py +++ b/Lib/email/policy.py @@ -4,11 +4,11 @@ import re import sys -from email._policybase import Policy, Compat32, compat32, _extend_docstrings -from email.utils import _has_surrogates -from email.headerregistry import HeaderRegistry as HeaderRegistry +from email._policybase import Compat32, Policy, _extend_docstrings, compat32 from email.contentmanager import raw_data_manager +from email.headerregistry import HeaderRegistry as HeaderRegistry from email.message import EmailMessage +from email.utils import _has_surrogates __all__ = [ 'Compat32', @@ -125,7 +125,27 @@ def header_source_parse(self, sourcelines): """ name, value = sourcelines[0].split(':', 1) - value = value.lstrip(' \t') + ''.join(sourcelines[1:]) + + # Fixed: https://github.com/python/cpython/issues/124452 + # + # Root cause: The function '_refold_parse_tree' in '_header_value_parse.py'. + # If there is no WSP, it can't figure out how to wrap the text. + # Therefore, it places the entire value directly after '\n', and because + # there is a WSP after ':', the WSP will be moved to the front + # of the value according to RFC5322, section 2.2.3. + # + # However, the WSP is not part of the value; therefore, we must + # remove it. + + # Remove leading WSP in the first line only if there no value in the + # first line, and has values after that + remove_wsp = not value.strip() and len(sourcelines) > 1 + + value = value.lstrip(' \t') + if remove_wsp and sourcelines[1][0] in ' \t': + sourcelines[1] = sourcelines[1][1:] + + value += ''.join(sourcelines[1:]) return (name, value.rstrip('\r\n')) def header_store_parse(self, name, value): diff --git a/Lib/test/test_email/test_message.py b/Lib/test/test_email/test_message.py index 034f7626c1fc7c..4f7ab4ebdde688 100644 --- a/Lib/test/test_email/test_message.py +++ b/Lib/test/test_email/test_message.py @@ -1,6 +1,6 @@ -import unittest import textwrap -from email import policy, message_from_string +import unittest +from email import message_from_bytes, message_from_string, policy from email.message import EmailMessage, MIMEPart from test.test_email import TestEmailBase, parameterize @@ -957,6 +957,35 @@ def test_folding_with_utf8_encoding_8(self): b'123456789 123456789 123456789 123456789 ' b'123456789-123456789\n 123456789 Hello ' b'=?utf-8?q?W=C3=B6rld!?= 123456789 123456789\n\n') + + + def test_folding_with_short_nospace_1(self): + # bpo-36520 + # + # Fold a line that contains a long whitespace after + # the fold point. + + m = EmailMessage(policy.default) + m['Message-ID'] = '12345678912345678123456789123456789123456789' + parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default) + self.assertEqual(parsed_msg['Message-ID'], m['Message-ID']) + + def test_folding_with_long_nospace_1(self): + # Fixed: https://github.com/python/cpython/issues/124452 + # + # When the value is too long, it should be converted back + # to its original form without any modifications. + + m = EmailMessage(policy.default) + m['Message-ID'] = '12345678912345678123456789123456789123456789'\ + '12345678912345678123456789123456789123456789' + self.assertEqual(m.as_bytes(), + b'Message-ID:\n 12345678912345678123456789123456'\ + b'78912345678912345678912345678123456789123456789'\ + b'123456789\n\n') + parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default) + self.assertEqual(parsed_msg['Message-ID'], m['Message-ID']) + def test_get_body_malformed(self): """test for bpo-42892""" From 6f64300004ea6f8907db9d11c6504075f74f3e5e Mon Sep 17 00:00:00 2001 From: RanKKI Date: Thu, 24 Oct 2024 21:29:33 +1100 Subject: [PATCH 02/14] fix: redundant \n and WSP when using policy.compat32 --- Lib/email/_policybase.py | 24 +++++++++++++++++++++++- Lib/email/policy.py | 21 ++++----------------- Lib/test/test_email/test_message.py | 14 +++++++++++++- 3 files changed, 40 insertions(+), 19 deletions(-) diff --git a/Lib/email/_policybase.py b/Lib/email/_policybase.py index c7694a44e26639..2a8452f352855a 100644 --- a/Lib/email/_policybase.py +++ b/Lib/email/_policybase.py @@ -307,7 +307,29 @@ def header_source_parse(self, sourcelines): """ name, value = sourcelines[0].split(':', 1) - value = value.lstrip(' \t') + ''.join(sourcelines[1:]) + + # Fixed: https://github.com/python/cpython/issues/124452 + # + # Root cause: The function '_refold_parse_tree' in '_header_value_parse.py'. + # If there is no WSP, it can't figure out how to wrap the text. + # Therefore, it places the entire value directly after '\n', and because + # there is a WSP after ':', the WSP will be moved to the front + # of the value according to RFC5322, section 2.2.3. + # + # However, the WSP is not part of the value; therefore, we must + # remove it. + + no_first_value = value.strip() == '' and len(sourcelines) > 1 + + # When using the compat32 policy, the value is '\n'. Therefore, + # use an empty string if there is no value (without WSP and CRLF) + # on the first line + value = '' if no_first_value else value.lstrip(' \t') + + if no_first_value and sourcelines[1][0] in ' \t': + sourcelines[1] = sourcelines[1][1:] + + value += ''.join(sourcelines[1:]) return (name, value.rstrip('\r\n')) def header_store_parse(self, name, value): diff --git a/Lib/email/policy.py b/Lib/email/policy.py index 503faf16ba086e..a15a8cb557aedb 100644 --- a/Lib/email/policy.py +++ b/Lib/email/policy.py @@ -125,24 +125,11 @@ def header_source_parse(self, sourcelines): """ name, value = sourcelines[0].split(':', 1) + no_first_value = value.strip() == '' and len(sourcelines) > 1 - # Fixed: https://github.com/python/cpython/issues/124452 - # - # Root cause: The function '_refold_parse_tree' in '_header_value_parse.py'. - # If there is no WSP, it can't figure out how to wrap the text. - # Therefore, it places the entire value directly after '\n', and because - # there is a WSP after ':', the WSP will be moved to the front - # of the value according to RFC5322, section 2.2.3. - # - # However, the WSP is not part of the value; therefore, we must - # remove it. - - # Remove leading WSP in the first line only if there no value in the - # first line, and has values after that - remove_wsp = not value.strip() and len(sourcelines) > 1 - - value = value.lstrip(' \t') - if remove_wsp and sourcelines[1][0] in ' \t': + value = '' if no_first_value else value.lstrip(' \t') + + if no_first_value and sourcelines[1][0] in ' \t': sourcelines[1] = sourcelines[1][1:] value += ''.join(sourcelines[1:]) diff --git a/Lib/test/test_email/test_message.py b/Lib/test/test_email/test_message.py index 4f7ab4ebdde688..4d74f08add3358 100644 --- a/Lib/test/test_email/test_message.py +++ b/Lib/test/test_email/test_message.py @@ -970,7 +970,7 @@ def test_folding_with_short_nospace_1(self): parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default) self.assertEqual(parsed_msg['Message-ID'], m['Message-ID']) - def test_folding_with_long_nospace_1(self): + def test_folding_with_long_nospace_default_policy_1(self): # Fixed: https://github.com/python/cpython/issues/124452 # # When the value is too long, it should be converted back @@ -986,6 +986,18 @@ def test_folding_with_long_nospace_1(self): parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default) self.assertEqual(parsed_msg['Message-ID'], m['Message-ID']) + def test_folding_with_long_nospace_compat32_policy_1(self): + # Fixed: https://github.com/python/cpython/issues/124452 + # + # When the value is too long, it should be converted back + # to its original form without any modifications. + + m = EmailMessage(policy.compat32) + m['Message-ID'] = '12345678912345678123456789123456789123456789'\ + '12345678912345678123456789123456789123456789' + parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default) + self.assertEqual(parsed_msg['Message-ID'], m['Message-ID']) + def test_get_body_malformed(self): """test for bpo-42892""" From a436bd564a935b29440dc6620e861b02cedc50bc Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Thu, 24 Oct 2024 10:49:48 +0000 Subject: [PATCH 03/14] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?= =?UTF-8?q?lurb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Library/2024-10-24-10-49-47.gh-issue-124452.eqTRgx.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-10-24-10-49-47.gh-issue-124452.eqTRgx.rst diff --git a/Misc/NEWS.d/next/Library/2024-10-24-10-49-47.gh-issue-124452.eqTRgx.rst b/Misc/NEWS.d/next/Library/2024-10-24-10-49-47.gh-issue-124452.eqTRgx.rst new file mode 100644 index 00000000000000..eac6224a9484ed --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-24-10-49-47.gh-issue-124452.eqTRgx.rst @@ -0,0 +1,3 @@ +Fix the mismatched email headers after converting them to bytes and parsing them into an email message. +This issue only occurs with long text without any spaces. +Root cause: The email library doesn't know how to wrap long text without spaces, but it wraps it anyway with a leading WSP, which is not removed when converting bytes to EmailMessage. From e9e55f25e15cb18c93153b243243247db21739fb Mon Sep 17 00:00:00 2001 From: RanKKI Date: Thu, 24 Oct 2024 21:54:36 +1100 Subject: [PATCH 04/14] fix: lint --- Lib/email/_policybase.py | 2 +- Lib/test/test_email/test_message.py | 17 ++++++++--------- ...24-10-24-10-49-47.gh-issue-124452.eqTRgx.rst | 4 ++-- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/Lib/email/_policybase.py b/Lib/email/_policybase.py index 2a8452f352855a..88f268b4868fdd 100644 --- a/Lib/email/_policybase.py +++ b/Lib/email/_policybase.py @@ -316,7 +316,7 @@ def header_source_parse(self, sourcelines): # there is a WSP after ':', the WSP will be moved to the front # of the value according to RFC5322, section 2.2.3. # - # However, the WSP is not part of the value; therefore, we must + # However, the WSP is not part of the value; therefore, we must # remove it. no_first_value = value.strip() == '' and len(sourcelines) > 1 diff --git a/Lib/test/test_email/test_message.py b/Lib/test/test_email/test_message.py index 4d74f08add3358..78e1fccdeb0e28 100644 --- a/Lib/test/test_email/test_message.py +++ b/Lib/test/test_email/test_message.py @@ -957,7 +957,7 @@ def test_folding_with_utf8_encoding_8(self): b'123456789 123456789 123456789 123456789 ' b'123456789-123456789\n 123456789 Hello ' b'=?utf-8?q?W=C3=B6rld!?= 123456789 123456789\n\n') - + def test_folding_with_short_nospace_1(self): # bpo-36520 @@ -969,12 +969,12 @@ def test_folding_with_short_nospace_1(self): m['Message-ID'] = '12345678912345678123456789123456789123456789' parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default) self.assertEqual(parsed_msg['Message-ID'], m['Message-ID']) - + def test_folding_with_long_nospace_default_policy_1(self): # Fixed: https://github.com/python/cpython/issues/124452 - # - # When the value is too long, it should be converted back - # to its original form without any modifications. + # + # When the value is too long, it should be converted back + # to its original form without any modifications. m = EmailMessage(policy.default) m['Message-ID'] = '12345678912345678123456789123456789123456789'\ @@ -988,9 +988,9 @@ def test_folding_with_long_nospace_default_policy_1(self): def test_folding_with_long_nospace_compat32_policy_1(self): # Fixed: https://github.com/python/cpython/issues/124452 - # - # When the value is too long, it should be converted back - # to its original form without any modifications. + # + # When the value is too long, it should be converted back + # to its original form without any modifications. m = EmailMessage(policy.compat32) m['Message-ID'] = '12345678912345678123456789123456789123456789'\ @@ -998,7 +998,6 @@ def test_folding_with_long_nospace_compat32_policy_1(self): parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default) self.assertEqual(parsed_msg['Message-ID'], m['Message-ID']) - def test_get_body_malformed(self): """test for bpo-42892""" msg = textwrap.dedent("""\ diff --git a/Misc/NEWS.d/next/Library/2024-10-24-10-49-47.gh-issue-124452.eqTRgx.rst b/Misc/NEWS.d/next/Library/2024-10-24-10-49-47.gh-issue-124452.eqTRgx.rst index eac6224a9484ed..9a4f29f90cdc24 100644 --- a/Misc/NEWS.d/next/Library/2024-10-24-10-49-47.gh-issue-124452.eqTRgx.rst +++ b/Misc/NEWS.d/next/Library/2024-10-24-10-49-47.gh-issue-124452.eqTRgx.rst @@ -1,3 +1,3 @@ -Fix the mismatched email headers after converting them to bytes and parsing them into an email message. +Fix the mismatched email headers after converting them to bytes and parsing them into an email message. This issue only occurs with long text without any spaces. -Root cause: The email library doesn't know how to wrap long text without spaces, but it wraps it anyway with a leading WSP, which is not removed when converting bytes to EmailMessage. +Root cause: The email library doesn't know how to wrap long text without spaces, but it wraps it anyway with a leading WSP, which is not removed when converting bytes to EmailMessage. From 16c3d11046f2d8493f2137a583248ffd68b8faeb Mon Sep 17 00:00:00 2001 From: RanKKI Date: Thu, 24 Oct 2024 21:58:58 +1100 Subject: [PATCH 05/14] revert: imports --- Lib/email/policy.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/email/policy.py b/Lib/email/policy.py index a15a8cb557aedb..ade287393abdb7 100644 --- a/Lib/email/policy.py +++ b/Lib/email/policy.py @@ -4,11 +4,11 @@ import re import sys -from email._policybase import Compat32, Policy, _extend_docstrings, compat32 -from email.contentmanager import raw_data_manager +from email._policybase import Policy, Compat32, compat32, _extend_docstrings +from email.utils import _has_surrogates from email.headerregistry import HeaderRegistry as HeaderRegistry +from email.contentmanager import raw_data_manager from email.message import EmailMessage -from email.utils import _has_surrogates __all__ = [ 'Compat32', From 7e4093853cf4248cd4d79ff4787ae1632e0a95c9 Mon Sep 17 00:00:00 2001 From: RanKKI Date: Fri, 25 Oct 2024 07:22:04 +1100 Subject: [PATCH 06/14] refactor: simplify condition check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Lib/email/policy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/email/policy.py b/Lib/email/policy.py index ade287393abdb7..4051022b375da3 100644 --- a/Lib/email/policy.py +++ b/Lib/email/policy.py @@ -125,7 +125,7 @@ def header_source_parse(self, sourcelines): """ name, value = sourcelines[0].split(':', 1) - no_first_value = value.strip() == '' and len(sourcelines) > 1 + no_first_value = not value.strip() and len(sourcelines) > 1 value = '' if no_first_value else value.lstrip(' \t') From 0c1073001ce97df12fc6c18063463425f0f5da4a Mon Sep 17 00:00:00 2001 From: RanKKI Date: Fri, 25 Oct 2024 08:25:13 +1100 Subject: [PATCH 07/14] refactor: simplify code --- Lib/email/_policybase.py | 24 +----------------------- Lib/email/policy.py | 9 +-------- Lib/test/test_email/test_message.py | 15 ++++++--------- 3 files changed, 8 insertions(+), 40 deletions(-) diff --git a/Lib/email/_policybase.py b/Lib/email/_policybase.py index 88f268b4868fdd..b9a900e1a6c483 100644 --- a/Lib/email/_policybase.py +++ b/Lib/email/_policybase.py @@ -307,29 +307,7 @@ def header_source_parse(self, sourcelines): """ name, value = sourcelines[0].split(':', 1) - - # Fixed: https://github.com/python/cpython/issues/124452 - # - # Root cause: The function '_refold_parse_tree' in '_header_value_parse.py'. - # If there is no WSP, it can't figure out how to wrap the text. - # Therefore, it places the entire value directly after '\n', and because - # there is a WSP after ':', the WSP will be moved to the front - # of the value according to RFC5322, section 2.2.3. - # - # However, the WSP is not part of the value; therefore, we must - # remove it. - - no_first_value = value.strip() == '' and len(sourcelines) > 1 - - # When using the compat32 policy, the value is '\n'. Therefore, - # use an empty string if there is no value (without WSP and CRLF) - # on the first line - value = '' if no_first_value else value.lstrip(' \t') - - if no_first_value and sourcelines[1][0] in ' \t': - sourcelines[1] = sourcelines[1][1:] - - value += ''.join(sourcelines[1:]) + value = ''.join((value, *sourcelines[1:])).lstrip(' \t\r\n') return (name, value.rstrip('\r\n')) def header_store_parse(self, name, value): diff --git a/Lib/email/policy.py b/Lib/email/policy.py index 4051022b375da3..3b83e6c11a113f 100644 --- a/Lib/email/policy.py +++ b/Lib/email/policy.py @@ -125,14 +125,7 @@ def header_source_parse(self, sourcelines): """ name, value = sourcelines[0].split(':', 1) - no_first_value = not value.strip() and len(sourcelines) > 1 - - value = '' if no_first_value else value.lstrip(' \t') - - if no_first_value and sourcelines[1][0] in ' \t': - sourcelines[1] = sourcelines[1][1:] - - value += ''.join(sourcelines[1:]) + value = ''.join((value, *sourcelines[1:])).lstrip(' \t\r\n') return (name, value.rstrip('\r\n')) def header_store_parse(self, name, value): diff --git a/Lib/test/test_email/test_message.py b/Lib/test/test_email/test_message.py index 78e1fccdeb0e28..314e5b9cc5a0a3 100644 --- a/Lib/test/test_email/test_message.py +++ b/Lib/test/test_email/test_message.py @@ -958,7 +958,6 @@ def test_folding_with_utf8_encoding_8(self): b'123456789-123456789\n 123456789 Hello ' b'=?utf-8?q?W=C3=B6rld!?= 123456789 123456789\n\n') - def test_folding_with_short_nospace_1(self): # bpo-36520 # @@ -966,7 +965,7 @@ def test_folding_with_short_nospace_1(self): # the fold point. m = EmailMessage(policy.default) - m['Message-ID'] = '12345678912345678123456789123456789123456789' + m['Message-ID'] = '123456789'*3 parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default) self.assertEqual(parsed_msg['Message-ID'], m['Message-ID']) @@ -977,12 +976,10 @@ def test_folding_with_long_nospace_default_policy_1(self): # to its original form without any modifications. m = EmailMessage(policy.default) - m['Message-ID'] = '12345678912345678123456789123456789123456789'\ - '12345678912345678123456789123456789123456789' + message = '123456789' * 10 + m['Message-ID'] = message self.assertEqual(m.as_bytes(), - b'Message-ID:\n 12345678912345678123456789123456'\ - b'78912345678912345678912345678123456789123456789'\ - b'123456789\n\n') + f'Message-ID:\n {message}\n\n'.encode()) parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default) self.assertEqual(parsed_msg['Message-ID'], m['Message-ID']) @@ -993,8 +990,8 @@ def test_folding_with_long_nospace_compat32_policy_1(self): # to its original form without any modifications. m = EmailMessage(policy.compat32) - m['Message-ID'] = '12345678912345678123456789123456789123456789'\ - '12345678912345678123456789123456789123456789' + message = '123456789' * 10 + m['Message-ID'] = message parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default) self.assertEqual(parsed_msg['Message-ID'], m['Message-ID']) From b21d326c6d2935d3bde405da9018d62a382541b8 Mon Sep 17 00:00:00 2001 From: RanKKI Date: Fri, 25 Oct 2024 08:28:02 +1100 Subject: [PATCH 08/14] test: SMTP & HTTP policy --- Lib/test/test_email/test_message.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_email/test_message.py b/Lib/test/test_email/test_message.py index 314e5b9cc5a0a3..49782050028e4e 100644 --- a/Lib/test/test_email/test_message.py +++ b/Lib/test/test_email/test_message.py @@ -984,17 +984,26 @@ def test_folding_with_long_nospace_default_policy_1(self): self.assertEqual(parsed_msg['Message-ID'], m['Message-ID']) def test_folding_with_long_nospace_compat32_policy_1(self): - # Fixed: https://github.com/python/cpython/issues/124452 - # - # When the value is too long, it should be converted back - # to its original form without any modifications. - m = EmailMessage(policy.compat32) message = '123456789' * 10 m['Message-ID'] = message parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default) self.assertEqual(parsed_msg['Message-ID'], m['Message-ID']) + def test_folding_with_long_nospace_smtp_policy_1(self): + m = EmailMessage(policy.SMTP) + message = '123456789' * 10 + m['Message-ID'] = message + parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default) + self.assertEqual(parsed_msg['Message-ID'], m['Message-ID']) + + def test_folding_with_long_nospace_http_policy_1(self): + m = EmailMessage(policy.HTTP) + message = '123456789' * 10 + m['Message-ID'] = message + parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default) + self.assertEqual(parsed_msg['Message-ID'], m['Message-ID']) + def test_get_body_malformed(self): """test for bpo-42892""" msg = textwrap.dedent("""\ From 7f824cc15c4fe596c1707ebdccea25288d5c85ff Mon Sep 17 00:00:00 2001 From: RanKKI Date: Fri, 25 Oct 2024 08:29:08 +1100 Subject: [PATCH 09/14] docs: update News --- .../Library/2024-10-24-10-49-47.gh-issue-124452.eqTRgx.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2024-10-24-10-49-47.gh-issue-124452.eqTRgx.rst b/Misc/NEWS.d/next/Library/2024-10-24-10-49-47.gh-issue-124452.eqTRgx.rst index 9a4f29f90cdc24..43212285fb3141 100644 --- a/Misc/NEWS.d/next/Library/2024-10-24-10-49-47.gh-issue-124452.eqTRgx.rst +++ b/Misc/NEWS.d/next/Library/2024-10-24-10-49-47.gh-issue-124452.eqTRgx.rst @@ -1,3 +1 @@ -Fix the mismatched email headers after converting them to bytes and parsing them into an email message. -This issue only occurs with long text without any spaces. -Root cause: The email library doesn't know how to wrap long text without spaces, but it wraps it anyway with a leading WSP, which is not removed when converting bytes to EmailMessage. +Fix email parsing bug that introduced spurious leading whitespace into header values when the header includes a newline character after the ';' and before the value. From da06939de45c98a28303fb591c677b93c28d6bcb Mon Sep 17 00:00:00 2001 From: RanKKI Date: Fri, 25 Oct 2024 21:42:25 +1100 Subject: [PATCH 10/14] docs: use markup text in news --- .../next/Library/2024-10-24-10-49-47.gh-issue-124452.eqTRgx.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2024-10-24-10-49-47.gh-issue-124452.eqTRgx.rst b/Misc/NEWS.d/next/Library/2024-10-24-10-49-47.gh-issue-124452.eqTRgx.rst index 43212285fb3141..ab7c15ca4e219b 100644 --- a/Misc/NEWS.d/next/Library/2024-10-24-10-49-47.gh-issue-124452.eqTRgx.rst +++ b/Misc/NEWS.d/next/Library/2024-10-24-10-49-47.gh-issue-124452.eqTRgx.rst @@ -1 +1 @@ -Fix email parsing bug that introduced spurious leading whitespace into header values when the header includes a newline character after the ';' and before the value. +Fix :mod:`email` parsing bug that introduced spurious leading whitespace into header values when the header includes a newline character after the ';' and before the value. From d865575672f799b9758c4e13e756b10b165106a1 Mon Sep 17 00:00:00 2001 From: RanKKI Date: Sat, 26 Oct 2024 10:04:44 +1100 Subject: [PATCH 11/14] docs: update comments --- Lib/email/_policybase.py | 2 +- Lib/email/policy.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/email/_policybase.py b/Lib/email/_policybase.py index b9a900e1a6c483..4b63b97217a835 100644 --- a/Lib/email/_policybase.py +++ b/Lib/email/_policybase.py @@ -302,7 +302,7 @@ def header_source_parse(self, sourcelines): """+ The name is parsed as everything up to the ':' and returned unmodified. The value is determined by stripping leading whitespace off the - remainder of the first line, joining all subsequent lines together, and + remainder of the first line joined with all subsequent lines, and stripping any trailing carriage return or linefeed characters. """ diff --git a/Lib/email/policy.py b/Lib/email/policy.py index 3b83e6c11a113f..6e109b65011a44 100644 --- a/Lib/email/policy.py +++ b/Lib/email/policy.py @@ -119,7 +119,7 @@ def header_source_parse(self, sourcelines): """+ The name is parsed as everything up to the ':' and returned unmodified. The value is determined by stripping leading whitespace off the - remainder of the first line, joining all subsequent lines together, and + remainder of the first line joined with all subsequent lines, and stripping any trailing carriage return or linefeed characters. (This is the same as Compat32). From 5bcc15b56f32460eefae351a87285796239472b7 Mon Sep 17 00:00:00 2001 From: RanKKI Date: Sun, 27 Oct 2024 10:13:58 +1100 Subject: [PATCH 12/14] Update Lib/test/test_email/test_message.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Lib/test/test_email/test_message.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_email/test_message.py b/Lib/test/test_email/test_message.py index 49782050028e4e..96979db27f3a21 100644 --- a/Lib/test/test_email/test_message.py +++ b/Lib/test/test_email/test_message.py @@ -965,7 +965,7 @@ def test_folding_with_short_nospace_1(self): # the fold point. m = EmailMessage(policy.default) - m['Message-ID'] = '123456789'*3 + m['Message-ID'] = '123456789' * 3 parsed_msg = message_from_bytes(m.as_bytes(), policy=policy.default) self.assertEqual(parsed_msg['Message-ID'], m['Message-ID']) From 5f6490838f040ea0e77c379a744e4b989b525b5b Mon Sep 17 00:00:00 2001 From: RanKKI Date: Sun, 27 Oct 2024 12:15:18 +1100 Subject: [PATCH 13/14] Update Misc/NEWS.d/next/Library/2024-10-24-10-49-47.gh-issue-124452.eqTRgx.rst MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- .../Library/2024-10-24-10-49-47.gh-issue-124452.eqTRgx.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2024-10-24-10-49-47.gh-issue-124452.eqTRgx.rst b/Misc/NEWS.d/next/Library/2024-10-24-10-49-47.gh-issue-124452.eqTRgx.rst index ab7c15ca4e219b..c77c0aff18820f 100644 --- a/Misc/NEWS.d/next/Library/2024-10-24-10-49-47.gh-issue-124452.eqTRgx.rst +++ b/Misc/NEWS.d/next/Library/2024-10-24-10-49-47.gh-issue-124452.eqTRgx.rst @@ -1 +1,4 @@ -Fix :mod:`email` parsing bug that introduced spurious leading whitespace into header values when the header includes a newline character after the ';' and before the value. +Fix an issue in :meth:`email.policy.EmailPolicy.header_source_parse` and +:meth:`email.policy.Compat32.header_source_parse` that introduced spurious +leading whitespaces into header values when the header includes a newline +character after the header name delimiter (`:`) and before the value. From b1d3a40acbbff8d0f4ccab2ef3e86751ce811813 Mon Sep 17 00:00:00 2001 From: RanKKI Date: Sun, 27 Oct 2024 13:15:16 +1100 Subject: [PATCH 14/14] fix: lint --- .../next/Library/2024-10-24-10-49-47.gh-issue-124452.eqTRgx.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2024-10-24-10-49-47.gh-issue-124452.eqTRgx.rst b/Misc/NEWS.d/next/Library/2024-10-24-10-49-47.gh-issue-124452.eqTRgx.rst index c77c0aff18820f..b0d63794022db4 100644 --- a/Misc/NEWS.d/next/Library/2024-10-24-10-49-47.gh-issue-124452.eqTRgx.rst +++ b/Misc/NEWS.d/next/Library/2024-10-24-10-49-47.gh-issue-124452.eqTRgx.rst @@ -1,4 +1,4 @@ Fix an issue in :meth:`email.policy.EmailPolicy.header_source_parse` and :meth:`email.policy.Compat32.header_source_parse` that introduced spurious leading whitespaces into header values when the header includes a newline -character after the header name delimiter (`:`) and before the value. +character after the header name delimiter (``:``) and before the value.