From 1326d2a808245e5f2de9e515460bab30556e8f05 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 4 Nov 2025 17:49:44 +0200 Subject: [PATCH 1/7] gh-140979: Fix off-by-one error in the RE code validator (GH-140984) It was too lenient and allowed MARK opcodes with too large value. --- Modules/_sre/sre.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_sre/sre.c b/Modules/_sre/sre.c index fdf00e6499cb6b..4e97101b699876 100644 --- a/Modules/_sre/sre.c +++ b/Modules/_sre/sre.c @@ -1946,7 +1946,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) sre_match() code is robust even if they don't, and the worst you can get is nonsensical match results. */ GET_ARG; - if (arg > 2 * (size_t)groups + 1) { + if (arg >= 2 * (size_t)groups) { VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups)); FAIL; } From 40096da95a592ac0b2ad6aa9c731631784c3b393 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Tue, 4 Nov 2025 08:31:35 -0800 Subject: [PATCH 2/7] GH-139946: Colorize error and warning messages in argparse (#140695) Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- Lib/_colorize.py | 3 ++ Lib/argparse.py | 22 ++++++++-- Lib/test/test_argparse.py | 41 +++++++++++++++++++ Lib/test/test_clinic.py | 2 + Lib/test/test_gzip.py | 3 +- Lib/test/test_uuid.py | 4 +- Lib/test/test_webbrowser.py | 2 + ...-10-28-02-46-56.gh-issue-139946.aN3_uY.rst | 1 + 8 files changed, 73 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-10-28-02-46-56.gh-issue-139946.aN3_uY.rst diff --git a/Lib/_colorize.py b/Lib/_colorize.py index 63e951d6488547..57b712bc068d4e 100644 --- a/Lib/_colorize.py +++ b/Lib/_colorize.py @@ -170,6 +170,9 @@ class Argparse(ThemeSection): label: str = ANSIColors.BOLD_YELLOW action: str = ANSIColors.BOLD_GREEN reset: str = ANSIColors.RESET + error: str = ANSIColors.BOLD_MAGENTA + warning: str = ANSIColors.BOLD_YELLOW + message: str = ANSIColors.MAGENTA @dataclass(frozen=True, kw_only=True) diff --git a/Lib/argparse.py b/Lib/argparse.py index 1f4413a9897eeb..6b79747572f48f 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -2749,6 +2749,14 @@ def _print_message(self, message, file=None): except (AttributeError, OSError): pass + def _get_theme(self, file=None): + from _colorize import can_colorize, get_theme + + if self.color and can_colorize(file=file): + return get_theme(force_color=True).argparse + else: + return get_theme(force_no_color=True).argparse + # =============== # Exiting methods # =============== @@ -2768,13 +2776,21 @@ def error(self, message): should either exit or raise an exception. """ self.print_usage(_sys.stderr) + theme = self._get_theme(file=_sys.stderr) + fmt = _('%(prog)s: error: %(message)s\n') + fmt = fmt.replace('error: %(message)s', + f'{theme.error}error:{theme.reset} {theme.message}%(message)s{theme.reset}') + args = {'prog': self.prog, 'message': message} - self.exit(2, _('%(prog)s: error: %(message)s\n') % args) + self.exit(2, fmt % args) def _warning(self, message): + theme = self._get_theme(file=_sys.stderr) + fmt = _('%(prog)s: warning: %(message)s\n') + fmt = fmt.replace('warning: %(message)s', + f'{theme.warning}warning:{theme.reset} {theme.message}%(message)s{theme.reset}') args = {'prog': self.prog, 'message': message} - self._print_message(_('%(prog)s: warning: %(message)s\n') % args, _sys.stderr) - + self._print_message(fmt % args, _sys.stderr) def __getattr__(name): if name == "__version__": diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index d6c9c1ef2c81e8..3a8be68a5468b0 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -2283,6 +2283,7 @@ class TestNegativeNumber(ParserTestCase): ('--complex -1e-3j', NS(int=None, float=None, complex=-0.001j)), ] +@force_not_colorized_test_class class TestArgumentAndSubparserSuggestions(TestCase): """Test error handling and suggestion when a user makes a typo""" @@ -6147,6 +6148,7 @@ def spam(string_to_convert): # Check that deprecated arguments output warning # ============================================== +@force_not_colorized_test_class class TestDeprecatedArguments(TestCase): def test_deprecated_option(self): @@ -7370,6 +7372,45 @@ def test_subparser_prog_is_stored_without_color(self): help_text = demo_parser.format_help() self.assertNotIn('\x1b[', help_text) + def test_error_and_warning_keywords_colorized(self): + parser = argparse.ArgumentParser(prog='PROG') + parser.add_argument('foo') + + with self.assertRaises(SystemExit): + with captured_stderr() as stderr: + parser.parse_args([]) + + err = stderr.getvalue() + error_color = self.theme.error + reset = self.theme.reset + self.assertIn(f'{error_color}error:{reset}', err) + + with captured_stderr() as stderr: + parser._warning('test warning') + + warn = stderr.getvalue() + warning_color = self.theme.warning + self.assertIn(f'{warning_color}warning:{reset}', warn) + + def test_error_and_warning_not_colorized_when_disabled(self): + parser = argparse.ArgumentParser(prog='PROG', color=False) + parser.add_argument('foo') + + with self.assertRaises(SystemExit): + with captured_stderr() as stderr: + parser.parse_args([]) + + err = stderr.getvalue() + self.assertNotIn('\x1b[', err) + self.assertIn('error:', err) + + with captured_stderr() as stderr: + parser._warning('test warning') + + warn = stderr.getvalue() + self.assertNotIn('\x1b[', warn) + self.assertIn('warning:', warn) + class TestModule(unittest.TestCase): def test_deprecated__version__(self): diff --git a/Lib/test/test_clinic.py b/Lib/test/test_clinic.py index e0dbb062eb0372..e71f9fc181bb43 100644 --- a/Lib/test/test_clinic.py +++ b/Lib/test/test_clinic.py @@ -4,6 +4,7 @@ from functools import partial from test import support, test_tools +from test.support import force_not_colorized_test_class from test.support import os_helper from test.support.os_helper import TESTFN, unlink, rmtree from textwrap import dedent @@ -2758,6 +2759,7 @@ def test_allow_negative_accepted_by_py_ssize_t_converter_only(self): with self.assertRaisesRegex((AssertionError, TypeError), errmsg): self.parse_function(block) +@force_not_colorized_test_class class ClinicExternalTest(TestCase): maxDiff = None diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py index f14a882d386866..442d30fc970fa9 100644 --- a/Lib/test/test_gzip.py +++ b/Lib/test/test_gzip.py @@ -11,7 +11,7 @@ import unittest from subprocess import PIPE, Popen from test.support import catch_unraisable_exception -from test.support import import_helper +from test.support import force_not_colorized_test_class, import_helper from test.support import os_helper from test.support import _4G, bigmemtest, requires_subprocess from test.support.script_helper import assert_python_ok, assert_python_failure @@ -1057,6 +1057,7 @@ def wrapper(*args, **kwargs): return decorator +@force_not_colorized_test_class class TestCommandLine(unittest.TestCase): data = b'This is a simple test with gzip' diff --git a/Lib/test/test_uuid.py b/Lib/test/test_uuid.py index 33045a78721aac..5f9ab048cdeb6c 100755 --- a/Lib/test/test_uuid.py +++ b/Lib/test/test_uuid.py @@ -13,7 +13,7 @@ from unittest import mock from test import support -from test.support import import_helper, warnings_helper +from test.support import force_not_colorized_test_class, import_helper, warnings_helper from test.support.script_helper import assert_python_ok py_uuid = import_helper.import_fresh_module('uuid', blocked=['_uuid']) @@ -1250,10 +1250,12 @@ def test_cli_uuid8(self): self.do_test_standalone_uuid(8) +@force_not_colorized_test_class class TestUUIDWithoutExtModule(CommandLineTestCases, BaseTestUUID, unittest.TestCase): uuid = py_uuid +@force_not_colorized_test_class @unittest.skipUnless(c_uuid, 'requires the C _uuid module') class TestUUIDWithExtModule(CommandLineTestCases, BaseTestUUID, unittest.TestCase): uuid = c_uuid diff --git a/Lib/test/test_webbrowser.py b/Lib/test/test_webbrowser.py index 6b577ae100e419..20d347168b3af8 100644 --- a/Lib/test/test_webbrowser.py +++ b/Lib/test/test_webbrowser.py @@ -7,6 +7,7 @@ import unittest import webbrowser from test import support +from test.support import force_not_colorized_test_class from test.support import import_helper from test.support import is_apple_mobile from test.support import os_helper @@ -503,6 +504,7 @@ def test_environment_preferred(self): self.assertEqual(webbrowser.get().name, sys.executable) +@force_not_colorized_test_class class CliTest(unittest.TestCase): def test_parse_args(self): for command, url, new_win in [ diff --git a/Misc/NEWS.d/next/Library/2025-10-28-02-46-56.gh-issue-139946.aN3_uY.rst b/Misc/NEWS.d/next/Library/2025-10-28-02-46-56.gh-issue-139946.aN3_uY.rst new file mode 100644 index 00000000000000..4c68d4cd94bf78 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-10-28-02-46-56.gh-issue-139946.aN3_uY.rst @@ -0,0 +1 @@ +Error and warning keywords in ``argparse.ArgumentParser`` messages are now colorized when color output is enabled, fixing a visual inconsistency in which they remained plain text while other output was colorized. From 8a7dbb7a68b5da1f3f1805f564c028f1eea4ebc3 Mon Sep 17 00:00:00 2001 From: Brett Cannon Date: Tue, 4 Nov 2025 10:28:17 -0800 Subject: [PATCH 3/7] Document that returning `sys.monitoring.DISABLE` in response to a global event raises `ValueError` (#140726) Co-authored-by: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> --- Doc/library/sys.monitoring.rst | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/Doc/library/sys.monitoring.rst b/Doc/library/sys.monitoring.rst index 0f986aa580b3c9..303655fb128b37 100644 --- a/Doc/library/sys.monitoring.rst +++ b/Doc/library/sys.monitoring.rst @@ -216,14 +216,17 @@ by another event: The :monitoring-event:`C_RETURN` and :monitoring-event:`C_RAISE` events are controlled by the :monitoring-event:`CALL` event. -:monitoring-event:`C_RETURN` and :monitoring-event:`C_RAISE` events will only be seen if the -corresponding :monitoring-event:`CALL` event is being monitored. +:monitoring-event:`C_RETURN` and :monitoring-event:`C_RAISE` events will only be +seen if the corresponding :monitoring-event:`CALL` event is being monitored. + + +.. _monitoring-event-global: Other events '''''''''''' Other events are not necessarily tied to a specific location in the -program and cannot be individually disabled. +program and cannot be individually disabled via :data:`DISABLE`. The other events that can be monitored are: @@ -289,12 +292,13 @@ in Python (see :ref:`c-api-monitoring`). .. function:: get_local_events(tool_id: int, code: CodeType, /) -> int - Returns all the local events for *code* + Returns all the :ref:`local events ` for *code* .. function:: set_local_events(tool_id: int, code: CodeType, event_set: int, /) -> None - Activates all the local events for *code* which are set in *event_set*. - Raises a :exc:`ValueError` if *tool_id* is not in use. + Activates all the :ref:`local events ` for *code* + which are set in *event_set*. Raises a :exc:`ValueError` if *tool_id* is not + in use. Disabling events @@ -305,15 +309,21 @@ Disabling events A special value that can be returned from a callback function to disable events for the current code location. -Local events can be disabled for a specific code location by returning -:data:`sys.monitoring.DISABLE` from a callback function. This does not change -which events are set, or any other code locations for the same event. +:ref:`Local events ` can be disabled for a specific code +location by returning :data:`sys.monitoring.DISABLE` from a callback function. +This does not change which events are set, or any other code locations for the +same event. Disabling events for specific locations is very important for high performance monitoring. For example, a program can be run under a debugger with no overhead if the debugger disables all monitoring except for a few breakpoints. +If :data:`DISABLE` is returned by a callback for a +:ref:`global event `, :exc:`ValueError` will be raised +by the interpreter in a non-specific location (that is, no traceback will be +provided). + .. function:: restart_events() -> None Enable all the events that were disabled by :data:`sys.monitoring.DISABLE` From 66c86c65633047c0faffba85ce6b0b3a82373657 Mon Sep 17 00:00:00 2001 From: Vinay Sajip Date: Tue, 4 Nov 2025 18:29:44 +0000 Subject: [PATCH 4/7] gh-134817: Restore accidentally deleted line in documentation. (GH-141013) --- Doc/library/logging.handlers.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Doc/library/logging.handlers.rst b/Doc/library/logging.handlers.rst index d74ef73ee28497..c9cfbdb4126fda 100644 --- a/Doc/library/logging.handlers.rst +++ b/Doc/library/logging.handlers.rst @@ -463,6 +463,7 @@ timed intervals. .. method:: getFilesToDelete() Returns a list of filenames which should be deleted as part of rollover. These + are the absolute paths of the oldest backup log files written by the handler. .. method:: shouldRollover(record) From 97d8dda980fcddf88b782be343118257f483a864 Mon Sep 17 00:00:00 2001 From: Guo Ci Date: Tue, 4 Nov 2025 14:29:13 -0500 Subject: [PATCH 5/7] Docs: Fix typo in `email.headerregistry.rst` (#140965) Fix missing 'Header' suffix on header class name in `email.headerregistry.rst` --- Doc/library/email.headerregistry.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/email.headerregistry.rst b/Doc/library/email.headerregistry.rst index 7f8044932fae99..ff8b601fe3d1bb 100644 --- a/Doc/library/email.headerregistry.rst +++ b/Doc/library/email.headerregistry.rst @@ -294,7 +294,7 @@ variant, :attr:`~.BaseHeader.max_count` is set to 1. ``inline`` and ``attachment`` are the only valid values in common use. -.. class:: ContentTransferEncoding +.. class:: ContentTransferEncodingHeader Handles the :mailheader:`Content-Transfer-Encoding` header. From ce1bb85d286130f44b7e874430b0b12990d61dc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20=C5=81ajszczak?= Date: Tue, 4 Nov 2025 20:46:07 +0100 Subject: [PATCH 6/7] gh-139434: Update selected RFC 2822 references to RFC 5322 (#139435) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update selected RFC 2822 references to RFC 5322 RFC 2822 was obsoleted by RFC 5322 in 2008. This updates references to use the current standard in documentation, docstrings, and comments. It preserves RFC 2822 references in legacy API components to maintain their historical context. RFC 822 → RFC 2822 → RFC 5322 progression is explained where relevant. In some places specific sections of RFC are referenced where it seems helpful. Scout rule was applied in some places and RFC mentions format was normalized in doc strings and comments. --- Doc/library/http.client.rst | 2 +- Doc/library/http.server.rst | 2 +- Doc/library/mailbox.rst | 2 +- Doc/library/time.rst | 5 ++-- Doc/tutorial/stdlib.rst | 2 +- Lib/email/_parseaddr.py | 13 +++++++---- Lib/email/_policybase.py | 2 +- Lib/email/feedparser.py | 4 ++-- Lib/email/generator.py | 2 +- Lib/email/message.py | 2 +- Lib/email/parser.py | 10 ++++---- Lib/http/client.py | 4 ++-- Lib/smtplib.py | 6 ++--- Lib/test/test_email/data/msg_35.txt | 2 +- Lib/test/test_email/test_email.py | 36 ++++++++++++++--------------- 15 files changed, 49 insertions(+), 45 deletions(-) diff --git a/Doc/library/http.client.rst b/Doc/library/http.client.rst index 589152f29686b6..7c258b324d9867 100644 --- a/Doc/library/http.client.rst +++ b/Doc/library/http.client.rst @@ -133,7 +133,7 @@ This module provides the following function: Parse the headers from a file pointer *fp* representing a HTTP request/response. The file has to be a :class:`~io.BufferedIOBase` reader - (i.e. not text) and must provide a valid :rfc:`2822` style header. + (i.e. not text) and must provide a valid :rfc:`5322` style header. This function returns an instance of :class:`http.client.HTTPMessage` that holds the header fields, but no payload diff --git a/Doc/library/http.server.rst b/Doc/library/http.server.rst index 063344e0284258..58f09634f95e0f 100644 --- a/Doc/library/http.server.rst +++ b/Doc/library/http.server.rst @@ -154,7 +154,7 @@ instantiation, of which this module provides three different variants: variable. This instance parses and manages the headers in the HTTP request. The :func:`~http.client.parse_headers` function from :mod:`http.client` is used to parse the headers and it requires that the - HTTP request provide a valid :rfc:`2822` style header. + HTTP request provide a valid :rfc:`5322` style header. .. attribute:: rfile diff --git a/Doc/library/mailbox.rst b/Doc/library/mailbox.rst index e8a96f29ea185e..62e289573c0c7e 100644 --- a/Doc/library/mailbox.rst +++ b/Doc/library/mailbox.rst @@ -917,7 +917,7 @@ Supported mailbox formats are Maildir, mbox, MH, Babyl, and MMDF. copied; furthermore, any format-specific information is converted insofar as possible if *message* is a :class:`!Message` instance. If *message* is a string, a byte string, - or a file, it should contain an :rfc:`2822`\ -compliant message, which is read + or a file, it should contain an :rfc:`5322`\ -compliant message, which is read and parsed. Files should be open in binary mode, but text mode files are accepted for backward compatibility. diff --git a/Doc/library/time.rst b/Doc/library/time.rst index 350ffade7af5fa..69e6433e898703 100644 --- a/Doc/library/time.rst +++ b/Doc/library/time.rst @@ -584,7 +584,7 @@ Functions calculations when the day of the week and the year are specified. Here is an example, a format for dates compatible with that specified in the - :rfc:`2822` Internet email standard. [1]_ :: + :rfc:`5322` Internet email standard. [1]_ :: >>> from time import gmtime, strftime >>> strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) @@ -1066,4 +1066,5 @@ Timezone Constants strict reading of the original 1982 :rfc:`822` standard calls for a two-digit year (``%y`` rather than ``%Y``), but practice moved to 4-digit years long before the year 2000. After that, :rfc:`822` became obsolete and the 4-digit year has - been first recommended by :rfc:`1123` and then mandated by :rfc:`2822`. + been first recommended by :rfc:`1123` and then mandated by :rfc:`2822`, + with :rfc:`5322` continuing this requirement. diff --git a/Doc/tutorial/stdlib.rst b/Doc/tutorial/stdlib.rst index d83ecca270b160..49a3e370a4c018 100644 --- a/Doc/tutorial/stdlib.rst +++ b/Doc/tutorial/stdlib.rst @@ -335,7 +335,7 @@ sophisticated and robust capabilities of its larger packages. For example: names, no direct knowledge or handling of XML is needed. * The :mod:`email` package is a library for managing email messages, including - MIME and other :rfc:`2822`-based message documents. Unlike :mod:`smtplib` and + MIME and other :rfc:`5322`-based message documents. Unlike :mod:`smtplib` and :mod:`poplib` which actually send and receive messages, the email package has a complete toolset for building or decoding complex message structures (including attachments) and for implementing internet encoding and header diff --git a/Lib/email/_parseaddr.py b/Lib/email/_parseaddr.py index 84917038874ba1..6a7c5fa06d20b6 100644 --- a/Lib/email/_parseaddr.py +++ b/Lib/email/_parseaddr.py @@ -146,8 +146,9 @@ def _parsedate_tz(data): return None # Check for a yy specified in two-digit format, then convert it to the # appropriate four-digit format, according to the POSIX standard. RFC 822 - # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822) - # mandates a 4-digit yy. For more information, see the documentation for + # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822) already + # mandated a 4-digit yy, and RFC 5322 (which obsoletes RFC 2822) continues + # this requirement. For more information, see the documentation for # the time module. if yy < 100: # The year is between 1969 and 1999 (inclusive). @@ -233,9 +234,11 @@ def __init__(self, field): self.CR = '\r\n' self.FWS = self.LWS + self.CR self.atomends = self.specials + self.LWS + self.CR - # Note that RFC 2822 now specifies '.' as obs-phrase, meaning that it - # is obsolete syntax. RFC 2822 requires that we recognize obsolete - # syntax, so allow dots in phrases. + # Note that RFC 2822 section 4.1 introduced '.' as obs-phrase to handle + # existing practice (periods in display names), even though it was not + # allowed in RFC 822. RFC 5322 section 4.1 (which obsoletes RFC 2822) + # continues this requirement. We must recognize obsolete syntax, so + # allow dots in phrases. self.phraseends = self.atomends.replace('.', '') self.field = field self.commentlist = [] diff --git a/Lib/email/_policybase.py b/Lib/email/_policybase.py index 95e79b8938bb4c..e23843df44881f 100644 --- a/Lib/email/_policybase.py +++ b/Lib/email/_policybase.py @@ -380,7 +380,7 @@ def _fold(self, name, value, sanitize): h = value if h is not None: # The Header class interprets a value of None for maxlinelen as the - # default value of 78, as recommended by RFC 2822. + # default value of 78, as recommended by RFC 5322 section 2.1.1. maxlinelen = 0 if self.max_line_length is not None: maxlinelen = self.max_line_length diff --git a/Lib/email/feedparser.py b/Lib/email/feedparser.py index 9d80a5822af48d..6479b9bab7ac5b 100644 --- a/Lib/email/feedparser.py +++ b/Lib/email/feedparser.py @@ -32,7 +32,7 @@ NLCRE_bol = re.compile(r'(\r\n|\r|\n)') NLCRE_eol = re.compile(r'(\r\n|\r|\n)\z') NLCRE_crack = re.compile(r'(\r\n|\r|\n)') -# RFC 2822 $3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character +# RFC 5322 section 3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character # except controls, SP, and ":". headerRE = re.compile(r'^(From |[\041-\071\073-\176]*:|[\t ])') EMPTYSTRING = '' @@ -294,7 +294,7 @@ def _parsegen(self): return if self._cur.get_content_maintype() == 'message': # The message claims to be a message/* type, then what follows is - # another RFC 2822 message. + # another RFC 5322 message. for retval in self._parsegen(): if retval is NeedMoreData: yield NeedMoreData diff --git a/Lib/email/generator.py b/Lib/email/generator.py index ab5bd0653e440c..03524c96559153 100644 --- a/Lib/email/generator.py +++ b/Lib/email/generator.py @@ -50,7 +50,7 @@ def __init__(self, outfp, mangle_from_=None, maxheaderlen=None, *, expanded to 8 spaces) than maxheaderlen, the header will split as defined in the Header class. Set maxheaderlen to zero to disable header wrapping. The default is 78, as recommended (but not required) - by RFC 2822. + by RFC 5322 section 2.1.1. The policy keyword specifies a policy object that controls a number of aspects of the generator's operation. If no policy is specified, diff --git a/Lib/email/message.py b/Lib/email/message.py index 4380e0ec50b46a..641fb2e944d431 100644 --- a/Lib/email/message.py +++ b/Lib/email/message.py @@ -141,7 +141,7 @@ def _decode_uu(encoded): class Message: """Basic message object. - A message object is defined as something that has a bunch of RFC 2822 + A message object is defined as something that has a bunch of RFC 5322 headers and a payload. It may optionally have an envelope header (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a multipart or a message/rfc822), then the payload is a list of Message diff --git a/Lib/email/parser.py b/Lib/email/parser.py index 039f03cba74fa0..c6a51dd8e377b8 100644 --- a/Lib/email/parser.py +++ b/Lib/email/parser.py @@ -2,7 +2,7 @@ # Author: Barry Warsaw, Thomas Wouters, Anthony Baxter # Contact: email-sig@python.org -"""A parser of RFC 2822 and MIME email messages.""" +"""A parser of RFC 5322 and MIME email messages.""" __all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser', 'FeedParser', 'BytesFeedParser'] @@ -15,13 +15,13 @@ class Parser: def __init__(self, _class=None, *, policy=compat32): - """Parser of RFC 2822 and MIME email messages. + """Parser of RFC 5322 and MIME email messages. Creates an in-memory object tree representing the email message, which can then be manipulated and turned over to a Generator to return the textual representation of the message. - The string must be formatted as a block of RFC 2822 headers and header + The string must be formatted as a block of RFC 5322 headers and header continuation lines, optionally preceded by a 'Unix-from' header. The header block is terminated either by the end of the string or by a blank line. @@ -75,13 +75,13 @@ def parsestr(self, text, headersonly=True): class BytesParser: def __init__(self, *args, **kw): - """Parser of binary RFC 2822 and MIME email messages. + """Parser of binary RFC 5322 and MIME email messages. Creates an in-memory object tree representing the email message, which can then be manipulated and turned over to a Generator to return the textual representation of the message. - The input must be formatted as a block of RFC 2822 headers and header + The input must be formatted as a block of RFC 5322 headers and header continuation lines, optionally preceded by a 'Unix-from' header. The header block is terminated either by the end of the input or by a blank line. diff --git a/Lib/http/client.py b/Lib/http/client.py index 425d9bdad8cdb5..4b9a61cfc1159f 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -231,7 +231,7 @@ def _read_headers(fp, max_headers): def _parse_header_lines(header_lines, _class=HTTPMessage): """ - Parses only RFC2822 headers from header lines. + Parses only RFC 5322 headers from header lines. email Parser wants to see strings rather than bytes. But a TextIOWrapper around self.rfile would buffer too many bytes @@ -244,7 +244,7 @@ def _parse_header_lines(header_lines, _class=HTTPMessage): return email.parser.Parser(_class=_class).parsestr(hstring) def parse_headers(fp, _class=HTTPMessage, *, _max_headers=None): - """Parses only RFC2822 headers from a file pointer.""" + """Parses only RFC 5322 headers from a file pointer.""" headers = _read_headers(fp, _max_headers) return _parse_header_lines(headers, _class) diff --git a/Lib/smtplib.py b/Lib/smtplib.py index 808f0fd47e8b4e..72093f7f8b0f2d 100644 --- a/Lib/smtplib.py +++ b/Lib/smtplib.py @@ -917,7 +917,7 @@ def send_message(self, msg, from_addr=None, to_addrs=None, The arguments are as for sendmail, except that msg is an email.message.Message object. If from_addr is None or to_addrs is None, these arguments are taken from the headers of the Message as - described in RFC 2822 (a ValueError is raised if there is more than + described in RFC 5322 (a ValueError is raised if there is more than one set of 'Resent-' headers). Regardless of the values of from_addr and to_addr, any Bcc field (or Resent-Bcc field, when the Message is a resent) of the Message object won't be transmitted. The Message @@ -931,7 +931,7 @@ def send_message(self, msg, from_addr=None, to_addrs=None, policy. """ - # 'Resent-Date' is a mandatory field if the Message is resent (RFC 2822 + # 'Resent-Date' is a mandatory field if the Message is resent (RFC 5322 # Section 3.6.6). In such a case, we use the 'Resent-*' fields. However, # if there is more than one 'Resent-' block there's no way to # unambiguously determine which one is the most recent in all cases, @@ -950,7 +950,7 @@ def send_message(self, msg, from_addr=None, to_addrs=None, else: raise ValueError("message has more than one 'Resent-' header block") if from_addr is None: - # Prefer the sender field per RFC 2822:3.6.2. + # Prefer the sender field per RFC 5322 section 3.6.2. from_addr = (msg[header_prefix + 'Sender'] if (header_prefix + 'Sender') in msg else msg[header_prefix + 'From']) diff --git a/Lib/test/test_email/data/msg_35.txt b/Lib/test/test_email/data/msg_35.txt index be7d5a2f7b9d38..0e2bbcaf71816a 100644 --- a/Lib/test/test_email/data/msg_35.txt +++ b/Lib/test/test_email/data/msg_35.txt @@ -1,4 +1,4 @@ From: aperson@dom.ain To: bperson@dom.ain Subject: here's something interesting -counter to RFC 2822, there's no separating newline here +counter to RFC 5322, there's no separating newline here diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index b458d3f0efaabd..4cd587bcd76040 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -2373,7 +2373,7 @@ def test_no_separating_blank_line(self): To: bperson@dom.ain Subject: here's something interesting -counter to RFC 2822, there's no separating newline here +counter to RFC 5322, there's no separating newline here """) # test_defect_handling @@ -2529,49 +2529,49 @@ def test_rfc2047_Q_invalid_digits(self): [(b'andr\xe9=zz', 'iso-8859-1')]) def test_rfc2047_rfc2047_1(self): - # 1st testcase at end of rfc2047 + # 1st testcase at end of RFC 2047 s = '(=?ISO-8859-1?Q?a?=)' self.assertEqual(decode_header(s), [(b'(', None), (b'a', 'iso-8859-1'), (b')', None)]) def test_rfc2047_rfc2047_2(self): - # 2nd testcase at end of rfc2047 + # 2nd testcase at end of RFC 2047 s = '(=?ISO-8859-1?Q?a?= b)' self.assertEqual(decode_header(s), [(b'(', None), (b'a', 'iso-8859-1'), (b' b)', None)]) def test_rfc2047_rfc2047_3(self): - # 3rd testcase at end of rfc2047 + # 3rd testcase at end of RFC 2047 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)' self.assertEqual(decode_header(s), [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) def test_rfc2047_rfc2047_4(self): - # 4th testcase at end of rfc2047 + # 4th testcase at end of RFC 2047 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)' self.assertEqual(decode_header(s), [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) def test_rfc2047_rfc2047_5a(self): - # 5th testcase at end of rfc2047 newline is \r\n + # 5th testcase at end of RFC 2047 newline is \r\n s = '(=?ISO-8859-1?Q?a?=\r\n =?ISO-8859-1?Q?b?=)' self.assertEqual(decode_header(s), [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) def test_rfc2047_rfc2047_5b(self): - # 5th testcase at end of rfc2047 newline is \n + # 5th testcase at end of RFC 2047 newline is \n s = '(=?ISO-8859-1?Q?a?=\n =?ISO-8859-1?Q?b?=)' self.assertEqual(decode_header(s), [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) def test_rfc2047_rfc2047_6(self): - # 6th testcase at end of rfc2047 + # 6th testcase at end of RFC 2047 s = '(=?ISO-8859-1?Q?a_b?=)' self.assertEqual(decode_header(s), [(b'(', None), (b'a b', 'iso-8859-1'), (b')', None)]) def test_rfc2047_rfc2047_7(self): - # 7th testcase at end of rfc2047 + # 7th testcase at end of RFC 2047 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)' self.assertEqual(decode_header(s), [(b'(', None), (b'a', 'iso-8859-1'), (b' b', 'iso-8859-2'), @@ -3273,8 +3273,8 @@ def test_parsedate_y2k(self): """Test for parsing a date with a two-digit year. Parsing a date with a two-digit year should return the correct - four-digit year. RFC822 allows two-digit years, but RFC2822 (which - obsoletes RFC822) requires four-digit years. + four-digit year. RFC 822 allows two-digit years, but RFC 5322 (which + obsoletes RFC 2822, which obsoletes RFC 822) requires four-digit years. """ self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'), @@ -3325,7 +3325,7 @@ def test_escape_backslashes(self): self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b)) def test_quotes_unicode_names(self): - # issue 1690608. email.utils.formataddr() should be rfc2047 aware. + # issue 1690608. email.utils.formataddr() should be RFC 2047 aware. name = "H\u00e4ns W\u00fcrst" addr = 'person@dom.ain' utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= " @@ -3335,7 +3335,7 @@ def test_quotes_unicode_names(self): latin1_quopri) def test_accepts_any_charset_like_object(self): - # issue 1690608. email.utils.formataddr() should be rfc2047 aware. + # issue 1690608. email.utils.formataddr() should be RFC 2047 aware. name = "H\u00e4ns W\u00fcrst" addr = 'person@dom.ain' utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= " @@ -3350,7 +3350,7 @@ def header_encode(self, string): utf8_base64) def test_invalid_charset_like_object_raises_error(self): - # issue 1690608. email.utils.formataddr() should be rfc2047 aware. + # issue 1690608. email.utils.formataddr() should be RFC 2047 aware. name = "H\u00e4ns W\u00fcrst" addr = 'person@dom.ain' # An object without a header_encode method: @@ -3359,7 +3359,7 @@ def test_invalid_charset_like_object_raises_error(self): bad_charset) def test_unicode_address_raises_error(self): - # issue 1690608. email.utils.formataddr() should be rfc2047 aware. + # issue 1690608. email.utils.formataddr() should be RFC 2047 aware. addr = 'pers\u00f6n@dom.in' self.assertRaises(UnicodeError, utils.formataddr, (None, addr)) self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr)) @@ -3380,7 +3380,7 @@ def test_parseaddr_preserves_quoted_pairs_in_addresses(self): # string containing a quoted backslash, followed by 'example' and two # backslashes, followed by another quoted string containing a space and # the word 'example'. parseaddr copies those two backslashes - # literally. Per rfc5322 this is not technically correct since a \ may + # literally. Per RFC 5322 this is not technically correct since a \ may # not appear in an address outside of a quoted string. It is probably # a sensible Postel interpretation, though. eq = self.assertEqual @@ -3392,12 +3392,12 @@ def test_parseaddr_preserves_quoted_pairs_in_addresses(self): ('', '"\\\\"example\\\\" example"@example.com')) def test_parseaddr_preserves_spaces_in_local_part(self): - # issue 9286. A normal RFC5322 local part should not contain any + # issue 9286. A normal RFC 5322 local part should not contain any # folding white space, but legacy local parts can (they are a sequence # of atoms, not dotatoms). On the other hand we strip whitespace from # before the @ and around dots, on the assumption that the whitespace # around the punctuation is a mistake in what would otherwise be - # an RFC5322 local part. Leading whitespace is, usual, stripped as well. + # an RFC 5322 local part. Leading whitespace is, usual, stripped as well. self.assertEqual(('', "merwok wok@xample.com"), utils.parseaddr("merwok wok@xample.com")) self.assertEqual(('', "merwok wok@xample.com"), From bfe54810c408ff066591d1af0411b1d9c10084b1 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Tue, 4 Nov 2025 21:19:06 +0000 Subject: [PATCH 7/7] gh-141004: Document `Py_UNICODE_{HIGH, LOW}_SURROGATE` functions (GH-141019) --- Doc/c-api/unicode.rst | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 22b0a6aff6e02e..ca7c8bb11a5d78 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -321,12 +321,22 @@ These APIs can be used to work with surrogates: Check if *ch* is a low surrogate (``0xDC00 <= ch <= 0xDFFF``). +.. c:function:: Py_UCS4 Py_UNICODE_HIGH_SURROGATE(Py_UCS4 ch) + + Return the high UTF-16 surrogate (``0xD800`` to ``0xDBFF``) for a Unicode + code point in the range ``[0x10000; 0x10FFFF]``. + +.. c:function:: Py_UCS4 Py_UNICODE_LOW_SURROGATE(Py_UCS4 ch) + + Return the low UTF-16 surrogate (``0xDC00`` to ``0xDFFF``) for a Unicode + code point in the range ``[0x10000; 0x10FFFF]``. + .. c:function:: Py_UCS4 Py_UNICODE_JOIN_SURROGATES(Py_UCS4 high, Py_UCS4 low) Join two surrogate code points and return a single :c:type:`Py_UCS4` value. *high* and *low* are respectively the leading and trailing surrogates in a - surrogate pair. *high* must be in the range [0xD800; 0xDBFF] and *low* must - be in the range [0xDC00; 0xDFFF]. + surrogate pair. *high* must be in the range ``[0xD800; 0xDBFF]`` and *low* must + be in the range ``[0xDC00; 0xDFFF]``. Creating and accessing Unicode strings