From 983ae79532e7b624b8e096bb56b1b3ef17983643 Mon Sep 17 00:00:00 2001 From: PuQing Date: Sun, 22 Jun 2025 18:48:29 +0800 Subject: [PATCH 001/212] fix instructions in __annotate__ have incorrect code positions --- Python/codegen.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/Python/codegen.c b/Python/codegen.c index 0023d72cd5e91d..a8744a533e63c7 100644 --- a/Python/codegen.c +++ b/Python/codegen.c @@ -685,13 +685,14 @@ codegen_setup_annotations_scope(compiler *c, location loc, PyObject *value_with_fake_globals = PyLong_FromLong(_Py_ANNOTATE_FORMAT_VALUE_WITH_FAKE_GLOBALS); assert(!SYMTABLE_ENTRY(c)->ste_has_docstring); _Py_DECLARE_STR(format, ".format"); - ADDOP_I(c, loc, LOAD_FAST, 0); - ADDOP_LOAD_CONST(c, loc, value_with_fake_globals); - ADDOP_I(c, loc, COMPARE_OP, (Py_GT << 5) | compare_masks[Py_GT]); + + ADDOP_I(c, NO_LOCATION, LOAD_FAST, 0); + ADDOP_LOAD_CONST(c, NO_LOCATION, value_with_fake_globals); + ADDOP_I(c, NO_LOCATION, COMPARE_OP, (Py_GT << 5) | compare_masks[Py_GT]); NEW_JUMP_TARGET_LABEL(c, body); - ADDOP_JUMP(c, loc, POP_JUMP_IF_FALSE, body); - ADDOP_I(c, loc, LOAD_COMMON_CONSTANT, CONSTANT_NOTIMPLEMENTEDERROR); - ADDOP_I(c, loc, RAISE_VARARGS, 1); + ADDOP_JUMP(c, NO_LOCATION, POP_JUMP_IF_FALSE, body); + ADDOP_I(c, NO_LOCATION, LOAD_COMMON_CONSTANT, CONSTANT_NOTIMPLEMENTEDERROR); + ADDOP_I(c, NO_LOCATION, RAISE_VARARGS, 1); USE_LABEL(c, body); return SUCCESS; } @@ -750,7 +751,7 @@ codegen_deferred_annotations_body(compiler *c, location loc, assert(PyList_CheckExact(conditional_annotation_indices)); assert(annotations_len == PyList_Size(conditional_annotation_indices)); - ADDOP_I(c, loc, BUILD_MAP, 0); // stack now contains + ADDOP_I(c, NO_LOCATION, BUILD_MAP, 0); // stack now contains for (Py_ssize_t i = 0; i < annotations_len; i++) { PyObject *ptr = PyList_GET_ITEM(deferred_anno, i); From dd3bea2b0f05ba96237e14a10aba171eede84458 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Sun, 22 Jun 2025 10:47:28 +0000 Subject: [PATCH 002/212] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by?= =?UTF-8?q?=20blurb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2025-06-22-10-47-27.gh-issue-135700.0qdtCl.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-22-10-47-27.gh-issue-135700.0qdtCl.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-22-10-47-27.gh-issue-135700.0qdtCl.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-22-10-47-27.gh-issue-135700.0qdtCl.rst new file mode 100644 index 00000000000000..6b1b9ddea2ab79 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-22-10-47-27.gh-issue-135700.0qdtCl.rst @@ -0,0 +1 @@ +Fix instructions in __annotate__ have incorrect code positions From c2f0a4d3c5c94fb865aaf4d52d8dd491dedb9c6b Mon Sep 17 00:00:00 2001 From: PuQing Date: Sun, 22 Jun 2025 20:13:25 +0800 Subject: [PATCH 003/212] Update Misc/NEWS.d/next/Core_and_Builtins/2025-06-22-10-47-27.gh-issue-135700.0qdtCl.rst Co-authored-by: sobolevn --- .../2025-06-22-10-47-27.gh-issue-135700.0qdtCl.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-22-10-47-27.gh-issue-135700.0qdtCl.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-22-10-47-27.gh-issue-135700.0qdtCl.rst index 6b1b9ddea2ab79..dec0f3291c5125 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-22-10-47-27.gh-issue-135700.0qdtCl.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-22-10-47-27.gh-issue-135700.0qdtCl.rst @@ -1 +1 @@ -Fix instructions in __annotate__ have incorrect code positions +Fix instructions positions in :attr:`~object.__annotate__`. From b79060ef63156aff1d072dbb5fc89776ad2a5691 Mon Sep 17 00:00:00 2001 From: PuQing Date: Sun, 22 Jun 2025 23:08:09 +0800 Subject: [PATCH 004/212] add unit tests --- Lib/test/test_dis.py | 52 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 355990ed58ee09..e03924ae4d564d 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -431,6 +431,40 @@ def foo(a: int, b: str) -> str: RETURN_VALUE """ +issue_135700 = """\ +22 +333 +__dataclass_fields__: ClassVar +""" + +class _B: + 2 + c: int + +dis_issue_135700_class = """\ +Disassembly of __annotate_func__: + -- COPY_FREE_VARS 1 + + %3d RESUME 0 + LOAD_FAST_BORROW 0 (format) + LOAD_SMALL_INT 2 + COMPARE_OP 132 (>) + POP_JUMP_IF_FALSE 3 (to L1) + NOT_TAKEN + LOAD_COMMON_CONSTANT 1 (NotImplementedError) + RAISE_VARARGS 1 + L1: BUILD_MAP 0 + + %3d LOAD_DEREF 1 (__classdict__) + LOAD_FROM_DICT_OR_GLOBALS 0 (int) + COPY 2 + LOAD_CONST 1 ('c') + + %3d STORE_SUBSCR + RETURN_VALUE + +""" % (_B.__firstlineno__, _B.__firstlineno__ + 2, _B.__firstlineno__) + compound_stmt_str = """\ x = 0 while 1: @@ -1124,6 +1158,24 @@ def test_bug_46724(self): # Test that negative operargs are handled properly self.do_disassembly_test(bug46724, dis_bug46724) + def test_annotate_no_spurious_first_node_positions(self): + # Test that __annotate__ code doesn't inherit first AST node positions + annotate_code = compile(issue_135700, "", "exec").co_consts[1] + instructions = list(dis.Bytecode(annotate_code)) + + spurious_positions = 0 + for instr in instructions: + if (instr.positions and + instr.positions.lineno == 1 and + instr.positions.col_offset == 0 and + instr.positions.end_col_offset == 1): + spurious_positions += 1 + + self.assertEqual(spurious_positions, 0, + "No instructions should have first statement's position") + got = self.get_disassembly(_B, depth=1) + self.do_disassembly_compare(got, dis_issue_135700_class) + def test_kw_names(self): # Test that value is displayed for keyword argument names: self.do_disassembly_test(wrap_func_w_kwargs, dis_kw_names) From 2201864f4dbb1c5136bd864ed947c202e83bdfc9 Mon Sep 17 00:00:00 2001 From: PuQing Date: Mon, 23 Jun 2025 10:48:31 +0800 Subject: [PATCH 005/212] fix test --- Lib/test/test_dis.py | 60 ++++++++++++++++---------------------------- 1 file changed, 21 insertions(+), 39 deletions(-) diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index e03924ae4d564d..1302b865c7734e 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -437,33 +437,11 @@ def foo(a: int, b: str) -> str: __dataclass_fields__: ClassVar """ +issue_135700_class = """\ class _B: 2 c: int - -dis_issue_135700_class = """\ -Disassembly of __annotate_func__: - -- COPY_FREE_VARS 1 - - %3d RESUME 0 - LOAD_FAST_BORROW 0 (format) - LOAD_SMALL_INT 2 - COMPARE_OP 132 (>) - POP_JUMP_IF_FALSE 3 (to L1) - NOT_TAKEN - LOAD_COMMON_CONSTANT 1 (NotImplementedError) - RAISE_VARARGS 1 - L1: BUILD_MAP 0 - - %3d LOAD_DEREF 1 (__classdict__) - LOAD_FROM_DICT_OR_GLOBALS 0 (int) - COPY 2 - LOAD_CONST 1 ('c') - - %3d STORE_SUBSCR - RETURN_VALUE - -""" % (_B.__firstlineno__, _B.__firstlineno__ + 2, _B.__firstlineno__) +""" compound_stmt_str = """\ x = 0 @@ -1160,21 +1138,25 @@ def test_bug_46724(self): def test_annotate_no_spurious_first_node_positions(self): # Test that __annotate__ code doesn't inherit first AST node positions - annotate_code = compile(issue_135700, "", "exec").co_consts[1] - instructions = list(dis.Bytecode(annotate_code)) - - spurious_positions = 0 - for instr in instructions: - if (instr.positions and - instr.positions.lineno == 1 and - instr.positions.col_offset == 0 and - instr.positions.end_col_offset == 1): - spurious_positions += 1 - - self.assertEqual(spurious_positions, 0, - "No instructions should have first statement's position") - got = self.get_disassembly(_B, depth=1) - self.do_disassembly_compare(got, dis_issue_135700_class) + test_cases = [ + ("module", compile(issue_135700, "", "exec").co_consts[1]), + ("class", compile(ast.parse(issue_135700_class), "?", "exec").co_consts[0].co_consts[1]) + ] + + for case_name, annotate_code in test_cases: + with self.subTest(case=case_name): + instructions = list(dis.Bytecode(annotate_code)) + + spurious_positions = sum( + 1 for instr in instructions + if (instr.positions and + instr.positions.lineno == 1 and + instr.positions.col_offset == 0 and + instr.positions.end_col_offset == 1) + ) + + self.assertEqual(spurious_positions, 0, + f"No instructions in {case_name} __annotate__ should have first statement's position") def test_kw_names(self): # Test that value is displayed for keyword argument names: From aed06b7a3d2a8f2ee2746768207ed2eb28bbcf8f Mon Sep 17 00:00:00 2001 From: PuQing Date: Mon, 23 Jun 2025 19:01:14 +0800 Subject: [PATCH 006/212] fix test --- Lib/test/test_dis.py | 53 +++++++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 1302b865c7734e..435bb7f02fef68 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -431,18 +431,6 @@ def foo(a: int, b: str) -> str: RETURN_VALUE """ -issue_135700 = """\ -22 -333 -__dataclass_fields__: ClassVar -""" - -issue_135700_class = """\ -class _B: - 2 - c: int -""" - compound_stmt_str = """\ x = 0 while 1: @@ -1138,25 +1126,44 @@ def test_bug_46724(self): def test_annotate_no_spurious_first_node_positions(self): # Test that __annotate__ code doesn't inherit first AST node positions + issue_135700 = "1\nx: int" + issue_135700_class = "class A:\n 1\n x: int" + test_cases = [ ("module", compile(issue_135700, "", "exec").co_consts[1]), - ("class", compile(ast.parse(issue_135700_class), "?", "exec").co_consts[0].co_consts[1]) + ( + "class", + compile(ast.parse(issue_135700_class), "?", "exec") + .co_consts[0] + .co_consts[1], + ), ] for case_name, annotate_code in test_cases: with self.subTest(case=case_name): instructions = list(dis.Bytecode(annotate_code)) - - spurious_positions = sum( - 1 for instr in instructions - if (instr.positions and - instr.positions.lineno == 1 and - instr.positions.col_offset == 0 and - instr.positions.end_col_offset == 1) + print(instructions) + resume_pos = next( + ( + inst.positions + for inst in instructions + if inst.opname == "RESUME" + ), + None, ) - - self.assertEqual(spurious_positions, 0, - f"No instructions in {case_name} __annotate__ should have first statement's position") + for instruction in instructions: + if instruction.opname == "BUILD_MAP": + break + if ( + instruction.opname != "RESUME" + and instruction.positions + and instruction.positions.lineno + ): + self.assertEqual( + instruction.positions, + resume_pos, + f"{case_name}: Unexpected position {instruction.positions} in {instruction.opname}, expected {resume_pos}", + ) def test_kw_names(self): # Test that value is displayed for keyword argument names: From d85247e8e447211407ddb1b4c33095699dad5527 Mon Sep 17 00:00:00 2001 From: PuQing Date: Tue, 24 Jun 2025 14:34:37 +0800 Subject: [PATCH 007/212] Update Lib/test/test_dis.py Co-authored-by: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> --- Lib/test/test_dis.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 435bb7f02fef68..d80f5fccc6b1da 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -1124,7 +1124,7 @@ def test_bug_46724(self): # Test that negative operargs are handled properly self.do_disassembly_test(bug46724, dis_bug46724) - def test_annotate_no_spurious_first_node_positions(self): + def test_annotate_source_locations(self): # Test that __annotate__ code doesn't inherit first AST node positions issue_135700 = "1\nx: int" issue_135700_class = "class A:\n 1\n x: int" From eca9248cf58b7073321a87567c39f673331f2502 Mon Sep 17 00:00:00 2001 From: PuQing Date: Tue, 24 Jun 2025 14:35:14 +0800 Subject: [PATCH 008/212] Apply suggestions from code review Co-authored-by: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> --- Lib/test/test_dis.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index d80f5fccc6b1da..ce12539a1ff351 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -1125,7 +1125,7 @@ def test_bug_46724(self): self.do_disassembly_test(bug46724, dis_bug46724) def test_annotate_source_locations(self): - # Test that __annotate__ code doesn't inherit first AST node positions + # See gh-135700 issue_135700 = "1\nx: int" issue_135700_class = "class A:\n 1\n x: int" @@ -1142,7 +1142,6 @@ def test_annotate_source_locations(self): for case_name, annotate_code in test_cases: with self.subTest(case=case_name): instructions = list(dis.Bytecode(annotate_code)) - print(instructions) resume_pos = next( ( inst.positions From e14cb292ba21fff3d675b9444ecc42f705cc1851 Mon Sep 17 00:00:00 2001 From: PuQing Date: Tue, 24 Jun 2025 16:33:14 +0800 Subject: [PATCH 009/212] fix test --- Lib/test/test_dis.py | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index ce12539a1ff351..02c4ca8b1ba5f6 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -1141,28 +1141,25 @@ def test_annotate_source_locations(self): for case_name, annotate_code in test_cases: with self.subTest(case=case_name): - instructions = list(dis.Bytecode(annotate_code)) - resume_pos = next( - ( - inst.positions - for inst in instructions - if inst.opname == "RESUME" - ), - None, + line_starts_iterator = dis.findlinestarts(annotate_code) + valid_line_starts = [ + item[0] + for item in line_starts_iterator + if item[1] is not None + ] # The first item is not RESUME in class case + setup_scope_begin = valid_line_starts[0] + setup_scope_end = valid_line_starts[1] + setup_annotations_scope_positions = { + instr.positions + for instr in dis.get_instructions(annotate_code) + if setup_scope_begin <= instr.offset < setup_scope_end + and instr.positions + } + self.assertEqual( + len(setup_annotations_scope_positions), + 1, + f"{case_name}: Expected uniform positions, found {len(setup_annotations_scope_positions)}: {setup_annotations_scope_positions}", ) - for instruction in instructions: - if instruction.opname == "BUILD_MAP": - break - if ( - instruction.opname != "RESUME" - and instruction.positions - and instruction.positions.lineno - ): - self.assertEqual( - instruction.positions, - resume_pos, - f"{case_name}: Unexpected position {instruction.positions} in {instruction.opname}, expected {resume_pos}", - ) def test_kw_names(self): # Test that value is displayed for keyword argument names: From f216c825c724b3d942e17185471787ca3d5885ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 22 Jun 2025 16:59:57 +0200 Subject: [PATCH 010/212] gh-135239: simpler use of mutexes in cryptographic modules (#135267) --- Lib/test/test_hashlib.py | 75 ++++++---- Modules/_hashopenssl.c | 81 ++++------- Modules/blake2module.c | 57 +++----- Modules/hashlib.h | 99 +++++++++----- Modules/hmacmodule.c | 288 +++++++++------------------------------ Modules/md5module.c | 54 +++----- Modules/sha1module.c | 53 +++---- Modules/sha2module.c | 136 +++++++----------- Modules/sha3module.c | 60 +++----- 9 files changed, 322 insertions(+), 581 deletions(-) diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index e77343f827811a..6e4e41e4fbb4f1 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -1043,49 +1043,67 @@ def test_gil(self): def test_sha256_gil(self): gil_minsize = hashlib_helper.find_gil_minsize(['_sha2', '_hashlib']) + data = b'1' + b'#' * gil_minsize + b'1' + expected = hashlib.sha256(data).hexdigest() + m = hashlib.sha256() m.update(b'1') m.update(b'#' * gil_minsize) m.update(b'1') - self.assertEqual( - m.hexdigest(), - '1cfceca95989f51f658e3f3ffe7f1cd43726c9e088c13ee10b46f57cef135b94' - ) + self.assertEqual(m.hexdigest(), expected) - m = hashlib.sha256(b'1' + b'#' * gil_minsize + b'1') - self.assertEqual( - m.hexdigest(), - '1cfceca95989f51f658e3f3ffe7f1cd43726c9e088c13ee10b46f57cef135b94' - ) + @threading_helper.reap_threads + @threading_helper.requires_working_threading() + def test_threaded_hashing_fast(self): + # Same as test_threaded_hashing_slow() but only tests some functions + # since otherwise test_hashlib.py becomes too slow during development. + for name in ['md5', 'sha1', 'sha256', 'sha3_256', 'blake2s']: + if constructor := getattr(hashlib, name, None): + with self.subTest(name): + self.do_test_threaded_hashing(constructor, is_shake=False) + if shake_128 := getattr(hashlib, 'shake_128', None): + self.do_test_threaded_hashing(shake_128, is_shake=True) + @requires_resource('cpu') @threading_helper.reap_threads @threading_helper.requires_working_threading() - def test_threaded_hashing(self): + def test_threaded_hashing_slow(self): + for algorithm, constructors in self.constructors_to_test.items(): + is_shake = algorithm in self.shakes + for constructor in constructors: + with self.subTest(constructor.__name__, is_shake=is_shake): + self.do_test_threaded_hashing(constructor, is_shake) + + def do_test_threaded_hashing(self, constructor, is_shake): # Updating the same hash object from several threads at once # using data chunk sizes containing the same byte sequences. # # If the internal locks are working to prevent multiple # updates on the same object from running at once, the resulting # hash will be the same as doing it single threaded upfront. - hasher = hashlib.sha1() - num_threads = 5 - smallest_data = b'swineflu' - data = smallest_data * 200000 - expected_hash = hashlib.sha1(data*num_threads).hexdigest() - - def hash_in_chunks(chunk_size): - index = 0 - while index < len(data): - hasher.update(data[index:index + chunk_size]) - index += chunk_size + + # The data to hash has length s|M|q^N and the chunk size for the i-th + # thread is s|M|q^(N-i), where N is the number of threads, M is a fixed + # message of small length, and s >= 1 and q >= 2 are small integers. + smallest_size, num_threads, s, q = 8, 5, 2, 10 + + smallest_data = os.urandom(smallest_size) + data = s * smallest_data * (q ** num_threads) + + h1 = constructor(usedforsecurity=False) + h2 = constructor(data * num_threads, usedforsecurity=False) + + def update(chunk_size): + for index in range(0, len(data), chunk_size): + h1.update(data[index:index + chunk_size]) threads = [] - for threadnum in range(num_threads): - chunk_size = len(data) // (10 ** threadnum) + for thread_num in range(num_threads): + # chunk_size = len(data) // (q ** thread_num) + chunk_size = s * smallest_size * q ** (num_threads - thread_num) self.assertGreater(chunk_size, 0) - self.assertEqual(chunk_size % len(smallest_data), 0) - thread = threading.Thread(target=hash_in_chunks, - args=(chunk_size,)) + self.assertEqual(chunk_size % smallest_size, 0) + thread = threading.Thread(target=update, args=(chunk_size,)) threads.append(thread) for thread in threads: @@ -1093,7 +1111,10 @@ def hash_in_chunks(chunk_size): for thread in threads: thread.join() - self.assertEqual(expected_hash, hasher.hexdigest()) + if is_shake: + self.assertEqual(h1.hexdigest(16), h2.hexdigest(16)) + else: + self.assertEqual(h1.hexdigest(), h2.hexdigest()) def test_get_fips_mode(self): fips_mode = self.is_fips_mode diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index 20eba9dd77bafb..0fe69ee8fac835 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -278,21 +278,15 @@ get_hashlib_state(PyObject *module) } typedef struct { - PyObject_HEAD + HASHLIB_OBJECT_HEAD EVP_MD_CTX *ctx; /* OpenSSL message digest context */ - // Prevents undefined behavior via multiple threads entering the C API. - bool use_mutex; - PyMutex mutex; /* OpenSSL context lock */ } HASHobject; #define HASHobject_CAST(op) ((HASHobject *)(op)) typedef struct { - PyObject_HEAD + HASHLIB_OBJECT_HEAD HMAC_CTX *ctx; /* OpenSSL hmac context */ - // Prevents undefined behavior via multiple threads entering the C API. - bool use_mutex; - PyMutex mutex; /* HMAC context lock */ } HMACobject; #define HMACobject_CAST(op) ((HMACobject *)(op)) @@ -700,9 +694,9 @@ static int _hashlib_HASH_copy_locked(HASHobject *self, EVP_MD_CTX *new_ctx_p) { int result; - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); result = EVP_MD_CTX_copy(new_ctx_p, self->ctx); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); if (result == 0) { notify_smart_ssl_error_occurred_in(Py_STRINGIFY(EVP_MD_CTX_copy)); return -1; @@ -802,27 +796,13 @@ _hashlib_HASH_update_impl(HASHobject *self, PyObject *obj) { int result; Py_buffer view; - GET_BUFFER_VIEW_OR_ERROUT(obj, &view); - - if (!self->use_mutex && view.len >= HASHLIB_GIL_MINSIZE) { - self->use_mutex = true; - } - if (self->use_mutex) { - Py_BEGIN_ALLOW_THREADS - PyMutex_Lock(&self->mutex); - result = _hashlib_HASH_hash(self, view.buf, view.len); - PyMutex_Unlock(&self->mutex); - Py_END_ALLOW_THREADS - } else { - result = _hashlib_HASH_hash(self, view.buf, view.len); - } - + HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED( + self, view.len, + result = _hashlib_HASH_hash(self, view.buf, view.len) + ); PyBuffer_Release(&view); - - if (result == -1) - return NULL; - Py_RETURN_NONE; + return result < 0 ? NULL : Py_None; } static PyMethodDef HASH_methods[] = { @@ -1144,15 +1124,12 @@ _hashlib_HASH(PyObject *module, const char *digestname, PyObject *data_obj, } if (view.buf && view.len) { - if (view.len >= HASHLIB_GIL_MINSIZE) { - /* We do not initialize self->lock here as this is the constructor - * where it is not yet possible to have concurrent access. */ - Py_BEGIN_ALLOW_THREADS - result = _hashlib_HASH_hash(self, view.buf, view.len); - Py_END_ALLOW_THREADS - } else { - result = _hashlib_HASH_hash(self, view.buf, view.len); - } + /* Do not use self->mutex here as this is the constructor + * where it is not yet possible to have concurrent access. */ + HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED( + view.len, + result = _hashlib_HASH_hash(self, view.buf, view.len) + ); if (result == -1) { assert(PyErr_Occurred()); Py_CLEAR(self); @@ -1813,9 +1790,9 @@ static int locked_HMAC_CTX_copy(HMAC_CTX *new_ctx_p, HMACobject *self) { int result; - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); result = HMAC_CTX_copy(new_ctx_p, self->ctx); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); if (result == 0) { notify_smart_ssl_error_occurred_in(Py_STRINGIFY(HMAC_CTX_copy)); return -1; @@ -1846,24 +1823,12 @@ _hmac_update(HMACobject *self, PyObject *obj) Py_buffer view = {0}; GET_BUFFER_VIEW_OR_ERROR(obj, &view, return 0); - - if (!self->use_mutex && view.len >= HASHLIB_GIL_MINSIZE) { - self->use_mutex = true; - } - if (self->use_mutex) { - Py_BEGIN_ALLOW_THREADS - PyMutex_Lock(&self->mutex); - r = HMAC_Update(self->ctx, - (const unsigned char *)view.buf, - (size_t)view.len); - PyMutex_Unlock(&self->mutex); - Py_END_ALLOW_THREADS - } else { - r = HMAC_Update(self->ctx, - (const unsigned char *)view.buf, - (size_t)view.len); - } - + HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED( + self, view.len, + r = HMAC_Update( + self->ctx, (const unsigned char *)view.buf, (size_t)view.len + ) + ); PyBuffer_Release(&view); if (r == 0) { diff --git a/Modules/blake2module.c b/Modules/blake2module.c index 2ce8c0cd3d7b6f..295bca07650916 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -352,7 +352,7 @@ type_to_impl(PyTypeObject *type) } typedef struct { - PyObject_HEAD + HASHLIB_OBJECT_HEAD union { Hacl_Hash_Blake2s_state_t *blake2s_state; Hacl_Hash_Blake2b_state_t *blake2b_state; @@ -364,8 +364,6 @@ typedef struct { #endif }; blake2_impl impl; - bool use_mutex; - PyMutex mutex; } Blake2Object; #define _Blake2Object_CAST(op) ((Blake2Object *)(op)) @@ -422,7 +420,7 @@ new_Blake2Object(PyTypeObject *type) } while (0) static void -update(Blake2Object *self, uint8_t *buf, Py_ssize_t len) +blake2_update_unlocked(Blake2Object *self, uint8_t *buf, Py_ssize_t len) { switch (self->impl) { // blake2b_256_state and blake2s_128_state must be if'd since @@ -646,14 +644,12 @@ py_blake2_new(PyTypeObject *type, PyObject *data, int digest_size, if (data != NULL) { Py_buffer buf; GET_BUFFER_VIEW_OR_ERROR(data, &buf, goto error); - if (buf.len >= HASHLIB_GIL_MINSIZE) { - Py_BEGIN_ALLOW_THREADS - update(self, buf.buf, buf.len); - Py_END_ALLOW_THREADS - } - else { - update(self, buf.buf, buf.len); - } + /* Do not use self->mutex here as this is the constructor + * where it is not yet possible to have concurrent access. */ + HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED( + buf.len, + blake2_update_unlocked(self, buf.buf, buf.len) + ); PyBuffer_Release(&buf); } @@ -744,7 +740,7 @@ py_blake2s_new_impl(PyTypeObject *type, PyObject *data_obj, int digest_size, } static int -blake2_blake2b_copy_locked(Blake2Object *self, Blake2Object *cpy) +blake2_blake2b_copy_unlocked(Blake2Object *self, Blake2Object *cpy) { assert(cpy != NULL); #define BLAKE2_COPY(TYPE, STATE_ATTR) \ @@ -801,9 +797,9 @@ _blake2_blake2b_copy_impl(Blake2Object *self) return NULL; } - ENTER_HASHLIB(self); - rc = blake2_blake2b_copy_locked(self, cpy); - LEAVE_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); + rc = blake2_blake2b_copy_unlocked(self, cpy); + HASHLIB_RELEASE_LOCK(self); if (rc < 0) { Py_DECREF(cpy); return NULL; @@ -825,25 +821,12 @@ _blake2_blake2b_update_impl(Blake2Object *self, PyObject *data) /*[clinic end generated code: output=99330230068e8c99 input=ffc4aa6a6a225d31]*/ { Py_buffer buf; - GET_BUFFER_VIEW_OR_ERROUT(data, &buf); - - if (!self->use_mutex && buf.len >= HASHLIB_GIL_MINSIZE) { - self->use_mutex = true; - } - if (self->use_mutex) { - Py_BEGIN_ALLOW_THREADS - PyMutex_Lock(&self->mutex); - update(self, buf.buf, buf.len); - PyMutex_Unlock(&self->mutex); - Py_END_ALLOW_THREADS - } - else { - update(self, buf.buf, buf.len); - } - + HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED( + self, buf.len, + blake2_update_unlocked(self, buf.buf, buf.len) + ); PyBuffer_Release(&buf); - Py_RETURN_NONE; } @@ -881,9 +864,9 @@ _blake2_blake2b_digest_impl(Blake2Object *self) /*[clinic end generated code: output=31ab8ad477f4a2f7 input=7d21659e9c5fff02]*/ { uint8_t digest_length = 0, digest[HACL_HASH_BLAKE2B_OUT_BYTES]; - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); digest_length = blake2_blake2b_compute_digest(self, digest); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); return PyBytes_FromStringAndSize((const char *)digest, digest_length); } @@ -898,9 +881,9 @@ _blake2_blake2b_hexdigest_impl(Blake2Object *self) /*[clinic end generated code: output=5ef54b138db6610a input=76930f6946351f56]*/ { uint8_t digest_length = 0, digest[HACL_HASH_BLAKE2B_OUT_BYTES]; - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); digest_length = blake2_blake2b_compute_digest(self, digest); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); return _Py_strhex((const char *)digest, digest_length); } diff --git a/Modules/hashlib.h b/Modules/hashlib.h index e82ec92be25c57..9a7e72f34a7f9d 100644 --- a/Modules/hashlib.h +++ b/Modules/hashlib.h @@ -34,47 +34,78 @@ /* * Helper code to synchronize access to the hash object when the GIL is - * released around a CPU consuming hashlib operation. All code paths that - * access a mutable part of obj must be enclosed in an ENTER_HASHLIB / - * LEAVE_HASHLIB block or explicitly acquire and release the lock inside - * a PY_BEGIN / END_ALLOW_THREADS block if they wish to release the GIL for - * an operation. + * released around a CPU consuming hashlib operation. * - * These only drop the GIL if the lock acquisition itself is likely to - * block. Thus the non-blocking acquire gating the GIL release for a - * blocking lock acquisition. The intent of these macros is to surround - * the assumed always "fast" operations that you aren't releasing the - * GIL around. Otherwise use code similar to what you see in hash - * function update() methods. + * Code accessing a mutable part of the hash object must be enclosed in + * an HASHLIB_{ACQUIRE,RELEASE}_LOCK block or explicitly acquire and release + * the mutex inside a Py_BEGIN_ALLOW_THREADS -- Py_END_ALLOW_THREADS block if + * they wish to release the GIL for an operation. */ -#include "pythread.h" -#define ENTER_HASHLIB(obj) \ - if ((obj)->use_mutex) { \ - PyMutex_Lock(&(obj)->mutex); \ - } -#define LEAVE_HASHLIB(obj) \ - if ((obj)->use_mutex) { \ - PyMutex_Unlock(&(obj)->mutex); \ - } +#define HASHLIB_OBJECT_HEAD \ + PyObject_HEAD \ + /* Guard against race conditions during incremental update(). */ \ + PyMutex mutex; -#ifdef Py_GIL_DISABLED -#define HASHLIB_INIT_MUTEX(obj) \ - do { \ - (obj)->mutex = (PyMutex){0}; \ - (obj)->use_mutex = true; \ +#define HASHLIB_INIT_MUTEX(OBJ) \ + do { \ + (OBJ)->mutex = (PyMutex){0}; \ } while (0) -#else -#define HASHLIB_INIT_MUTEX(obj) \ - do { \ - (obj)->mutex = (PyMutex){0}; \ - (obj)->use_mutex = false; \ + +#define HASHLIB_ACQUIRE_LOCK(OBJ) PyMutex_Lock(&(OBJ)->mutex) +#define HASHLIB_RELEASE_LOCK(OBJ) PyMutex_Unlock(&(OBJ)->mutex) + +/* + * Message length above which the GIL is to be released + * when performing hashing operations. + */ +#define HASHLIB_GIL_MINSIZE 2048 + +// Macros for executing code while conditionally holding the GIL. +// +// These only drop the GIL if the lock acquisition itself is likely to +// block. Thus the non-blocking acquire gating the GIL release for a +// blocking lock acquisition. The intent of these macros is to surround +// the assumed always "fast" operations that you aren't releasing the +// GIL around. + +/* + * Execute a suite of C statements 'STATEMENTS'. + * + * The GIL is held if 'SIZE' is below the HASHLIB_GIL_MINSIZE threshold. + */ +#define HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED(SIZE, STATEMENTS) \ + do { \ + if ((SIZE) > HASHLIB_GIL_MINSIZE) { \ + Py_BEGIN_ALLOW_THREADS \ + STATEMENTS; \ + Py_END_ALLOW_THREADS \ + } \ + else { \ + STATEMENTS; \ + } \ } while (0) -#endif -/* TODO(gpshead): We should make this a module or class attribute - * to allow the user to optimize based on the platform they're using. */ -#define HASHLIB_GIL_MINSIZE 2048 +/* + * Lock 'OBJ' and execute a suite of C statements 'STATEMENTS'. + * + * The GIL is held if 'SIZE' is below the HASHLIB_GIL_MINSIZE threshold. + */ +#define HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED(OBJ, SIZE, STATEMENTS) \ + do { \ + if ((SIZE) > HASHLIB_GIL_MINSIZE) { \ + Py_BEGIN_ALLOW_THREADS \ + HASHLIB_ACQUIRE_LOCK(OBJ); \ + STATEMENTS; \ + HASHLIB_RELEASE_LOCK(OBJ); \ + Py_END_ALLOW_THREADS \ + } \ + else { \ + HASHLIB_ACQUIRE_LOCK(OBJ); \ + STATEMENTS; \ + HASHLIB_RELEASE_LOCK(OBJ); \ + } \ + } while (0) static inline int _Py_hashlib_data_argument(PyObject **res, PyObject *data, PyObject *string) diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index f6ed35ae742905..e7a5ccbb19b45c 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -215,105 +215,6 @@ typedef struct py_hmac_hacl_api { #define Py_CHECK_HACL_UINT32_T_LENGTH(LEN) #endif -/* - * Call the HACL* HMAC-HASH update function on the given data. - * - * The magnitude of 'LEN' is not checked and thus 'LEN' must be - * safely convertible to a uint32_t value. - */ -#define Py_HMAC_HACL_UPDATE_CALL(HACL_STATE, BUF, LEN) \ - Hacl_Streaming_HMAC_update(HACL_STATE, BUF, (uint32_t)(LEN)) - -/* - * Call the HACL* HMAC-HASH update function on the given data. - * - * On DEBUG builds, the 'ERRACTION' statements are executed if - * the update() call returned a non-successful HACL* exit code. - * - * The buffer 'BUF' and its length 'LEN' are left untouched. - * - * The formal signature of this macro is: - * - * (HACL_HMAC_state *, uint8_t *, uint32_t, (C statements)) - */ -#ifndef NDEBUG -#define Py_HMAC_HACL_UPDATE_ONCE( \ - HACL_STATE, BUF, LEN, \ - ERRACTION \ -) \ - do { \ - Py_CHECK_HACL_UINT32_T_LENGTH(LEN); \ - hacl_errno_t code = Py_HMAC_HACL_UPDATE_CALL(HACL_STATE, BUF, LEN); \ - if (_hacl_convert_errno(code) < 0) { \ - ERRACTION; \ - } \ - } while (0) -#else -#define Py_HMAC_HACL_UPDATE_ONCE( \ - HACL_STATE, BUF, LEN, \ - _ERRACTION \ -) \ - do { \ - (void)Py_HMAC_HACL_UPDATE_CALL(HACL_STATE, BUF, (LEN)); \ - } while (0) -#endif - -/* - * Repetivively call the HACL* HMAC-HASH update function on the given - * data until the buffer length 'LEN' is strictly less than UINT32_MAX. - * - * On builds with PY_SSIZE_T_MAX <= UINT32_MAX, this is a no-op. - * - * The buffer 'BUF' (resp. 'LEN') is advanced (resp. decremented) - * by UINT32_MAX after each update. On DEBUG builds, each update() - * call is verified and the 'ERRACTION' statements are executed if - * a non-successful HACL* exit code is being returned. - * - * In particular, 'BUF' and 'LEN' must be variable names and not - * expressions on their own. - * - * The formal signature of this macro is: - * - * (HACL_HMAC_state *, uint8_t *, C integer, (C statements)) - */ -#ifdef Py_HMAC_SSIZE_LARGER_THAN_UINT32 -#define Py_HMAC_HACL_UPDATE_LOOP( \ - HACL_STATE, BUF, LEN, \ - ERRACTION \ -) \ - do { \ - while ((Py_ssize_t)LEN > UINT32_MAX_AS_SSIZE_T) { \ - Py_HMAC_HACL_UPDATE_ONCE(HACL_STATE, BUF, UINT32_MAX, \ - ERRACTION); \ - BUF += UINT32_MAX; \ - LEN -= UINT32_MAX; \ - } \ - } while (0) -#else -#define Py_HMAC_HACL_UPDATE_LOOP( \ - HACL_STATE, BUF, LEN, \ - _ERRACTION \ -) -#endif - -/* - * Perform the HMAC-HASH update() operation in a streaming fashion. - * - * The formal signature of this macro is: - * - * (HACL_HMAC_state *, uint8_t *, C integer, (C statements)) - */ -#define Py_HMAC_HACL_UPDATE( \ - HACL_STATE, BUF, LEN, \ - ERRACTION \ -) \ - do { \ - Py_HMAC_HACL_UPDATE_LOOP(HACL_STATE, BUF, LEN, \ - ERRACTION); \ - Py_HMAC_HACL_UPDATE_ONCE(HACL_STATE, BUF, LEN, \ - ERRACTION); \ - } while (0) - /* * HMAC underlying hash function static information. */ @@ -382,11 +283,7 @@ get_hmacmodule_state_by_cls(PyTypeObject *cls) typedef Hacl_Streaming_HMAC_agile_state HACL_HMAC_state; typedef struct HMACObject { - PyObject_HEAD - - bool use_mutex; - PyMutex mutex; - + HASHLIB_OBJECT_HEAD // Hash function information PyObject *name; // rendered name (exact unicode object) HMAC_Hash_Kind kind; // can be used for runtime dispatch (must be known) @@ -556,6 +453,51 @@ _hacl_hmac_state_free(HACL_HMAC_state *state) } } +/* + * Call the HACL* HMAC-HASH update function on the given data. + * + * On DEBUG builds, the update() call is verified. + * + * Return 0 on success; otherwise, set an exception and return -1 on failure. +*/ +static int +_hacl_hmac_state_update_once(HACL_HMAC_state *state, + uint8_t *buf, uint32_t len) +{ +#ifndef NDEBUG + hacl_errno_t code = Hacl_Streaming_HMAC_update(state, buf, len); + return _hacl_convert_errno(code); +#else + (void)Hacl_Streaming_HMAC_update(state, buf, len); + return 0; +#endif +} + +/* + * Perform the HMAC-HASH update() operation in a streaming fashion. + * + * On DEBUG builds, each update() call is verified. + * + * Return 0 on success; otherwise, set an exception and return -1 on failure. + */ +static int +_hacl_hmac_state_update(HACL_HMAC_state *state, uint8_t *buf, Py_ssize_t len) +{ + assert(len >= 0); +#ifdef Py_HMAC_SSIZE_LARGER_THAN_UINT32 + while (len > UINT32_MAX_AS_SSIZE_T) { + if (_hacl_hmac_state_update_once(state, buf, UINT32_MAX) < 0) { + assert(PyErr_Occurred()); + return -1; + } + buf += UINT32_MAX; + len -= UINT32_MAX; + } +#endif + Py_CHECK_HACL_UINT32_T_LENGTH(len); + return _hacl_hmac_state_update_once(state, buf, (uint32_t)len); +} + /* Static information used to construct the hash table. */ static const py_hmac_hinfo py_hmac_static_hinfo[] = { #define Py_HMAC_HINFO_HACL_API(HACL_HID) \ @@ -786,45 +728,6 @@ hmac_new_initial_state(HMACObject *self, uint8_t *key, Py_ssize_t len) return self->state == NULL ? -1 : 0; } -/* - * Feed initial data. - * - * This function MUST only be called by the HMAC object constructor - * and after hmac_set_hinfo() and hmac_new_initial_state() have been - * called, lest the behaviour is undefined. - * - * Return 0 on success; otherwise, set an exception and return -1 on failure. - */ -static int -hmac_feed_initial_data(HMACObject *self, uint8_t *msg, Py_ssize_t len) -{ - assert(self->name != NULL); - assert(self->state != NULL); - if (len == 0) { - // do nothing if the buffer is empty - return 0; - } - - if (len < HASHLIB_GIL_MINSIZE) { - Py_HMAC_HACL_UPDATE(self->state, msg, len, return -1); - return 0; - } - - int res = 0; - Py_BEGIN_ALLOW_THREADS - Py_HMAC_HACL_UPDATE(self->state, msg, len, goto error); - goto done; -#ifndef NDEBUG -error: - res = -1; -#else - Py_UNREACHABLE(); -#endif -done: - Py_END_ALLOW_THREADS - return res; -} - /*[clinic input] _hmac.new @@ -871,7 +774,12 @@ _hmac_new_impl(PyObject *module, PyObject *keyobj, PyObject *msgobj, if (msgobj != NULL && msgobj != Py_None) { Py_buffer msg; GET_BUFFER_VIEW_OR_ERROR(msgobj, &msg, goto error); - rc = hmac_feed_initial_data(self, msg.buf, msg.len); + /* Do not use self->mutex here as this is the constructor + * where it is not yet possible to have concurrent access. */ + HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED( + msg.len, + rc = _hacl_hmac_state_update(self->state, msg.buf, msg.len) + ); PyBuffer_Release(&msg); #ifndef NDEBUG if (rc < 0) { @@ -948,12 +856,12 @@ _hmac_HMAC_copy_impl(HMACObject *self, PyTypeObject *cls) return NULL; } - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); /* copy hash information */ hmac_copy_hinfo(copy, self); /* copy internal state */ int rc = hmac_copy_state(copy, self); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); if (rc < 0) { Py_DECREF(copy); @@ -965,78 +873,6 @@ _hmac_HMAC_copy_impl(HMACObject *self, PyTypeObject *cls) return (PyObject *)copy; } -/* - * Update the HMAC object with the given buffer. - * - * This unconditionally acquires the lock on the HMAC object. - * - * On DEBUG builds, each update() call is verified. - * - * Return 0 on success; otherwise, set an exception and return -1 on failure. - */ -static int -hmac_update_state_with_lock(HMACObject *self, uint8_t *buf, Py_ssize_t len) -{ - int res = 0; - Py_BEGIN_ALLOW_THREADS - PyMutex_Lock(&self->mutex); // unconditionally acquire a lock - Py_HMAC_HACL_UPDATE(self->state, buf, len, goto error); - goto done; -#ifndef NDEBUG -error: - res = -1; -#else - Py_UNREACHABLE(); -#endif -done: - PyMutex_Unlock(&self->mutex); - Py_END_ALLOW_THREADS - return res; -} - -/* - * Update the HMAC object with the given buffer. - * - * This conditionally acquires the lock on the HMAC object. - * - * On DEBUG builds, each update() call is verified. - * - * Return 0 on success; otherwise, set an exception and return -1 on failure. - */ -static int -hmac_update_state_cond_lock(HMACObject *self, uint8_t *buf, Py_ssize_t len) -{ - ENTER_HASHLIB(self); // conditionally acquire a lock - Py_HMAC_HACL_UPDATE(self->state, buf, len, goto error); - LEAVE_HASHLIB(self); - return 0; - -#ifndef NDEBUG -error: - LEAVE_HASHLIB(self); - return -1; -#else - Py_UNREACHABLE(); -#endif -} - -/* - * Update the internal HMAC state with the given buffer. - * - * Return 0 on success; otherwise, set an exception and return -1 on failure. - */ -static inline int -hmac_update_state(HMACObject *self, uint8_t *buf, Py_ssize_t len) -{ - assert(buf != 0); - assert(len >= 0); - return len == 0 - ? 0 /* nothing to do */ - : len < HASHLIB_GIL_MINSIZE - ? hmac_update_state_cond_lock(self, buf, len) - : hmac_update_state_with_lock(self, buf, len); -} - /*[clinic input] _hmac.HMAC.update @@ -1049,9 +885,13 @@ static PyObject * _hmac_HMAC_update_impl(HMACObject *self, PyObject *msgobj) /*[clinic end generated code: output=962134ada5e55985 input=7c0ea830efb03367]*/ { + int rc = 0; Py_buffer msg; GET_BUFFER_VIEW_OR_ERROUT(msgobj, &msg); - int rc = hmac_update_state(self, msg.buf, msg.len); + HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED( + self, msg.len, + rc = _hacl_hmac_state_update(self->state, msg.buf, msg.len) + ); PyBuffer_Release(&msg); return rc < 0 ? NULL : Py_None; } @@ -1067,13 +907,13 @@ _hmac_HMAC_update_impl(HMACObject *self, PyObject *msgobj) * Note: this function may raise a MemoryError. */ static int -hmac_digest_compute_cond_lock(HMACObject *self, uint8_t *digest) +hmac_digest_compute_locked(HMACObject *self, uint8_t *digest) { assert(digest != NULL); hacl_errno_t rc; - ENTER_HASHLIB(self); // conditionally acquire a lock + HASHLIB_ACQUIRE_LOCK(self); rc = Hacl_Streaming_HMAC_digest(self->state, digest, self->digest_size); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); assert( rc == Hacl_Streaming_Types_Success || rc == Hacl_Streaming_Types_OutOfMemory @@ -1095,7 +935,7 @@ _hmac_HMAC_digest_impl(HMACObject *self) { assert(self->digest_size <= Py_hmac_hash_max_digest_size); uint8_t digest[Py_hmac_hash_max_digest_size]; - if (hmac_digest_compute_cond_lock(self, digest) < 0) { + if (hmac_digest_compute_locked(self, digest) < 0) { return NULL; } return PyBytes_FromStringAndSize((const char *)digest, self->digest_size); @@ -1118,7 +958,7 @@ _hmac_HMAC_hexdigest_impl(HMACObject *self) { assert(self->digest_size <= Py_hmac_hash_max_digest_size); uint8_t digest[Py_hmac_hash_max_digest_size]; - if (hmac_digest_compute_cond_lock(self, digest) < 0) { + if (hmac_digest_compute_locked(self, digest) < 0) { return NULL; } return _Py_strhex((const char *)digest, self->digest_size); diff --git a/Modules/md5module.c b/Modules/md5module.c index 6ec0ee87a2f761..5dac3a91f4f09d 100644 --- a/Modules/md5module.c +++ b/Modules/md5module.c @@ -38,12 +38,8 @@ class MD5Type "MD5object *" "&PyType_Type" #include "_hacl/Hacl_Hash_MD5.h" - typedef struct { - PyObject_HEAD - // Prevents undefined behavior via multiple threads entering the C API. - bool use_mutex; - PyMutex mutex; + HASHLIB_OBJECT_HEAD Hacl_Hash_MD5_state_t *hash_state; } MD5object; @@ -118,9 +114,9 @@ MD5Type_copy_impl(MD5object *self, PyTypeObject *cls) return NULL; } - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); newobj->hash_state = Hacl_Hash_MD5_copy(self->hash_state); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); if (newobj->hash_state == NULL) { Py_DECREF(newobj); return PyErr_NoMemory(); @@ -139,9 +135,9 @@ MD5Type_digest_impl(MD5object *self) /*[clinic end generated code: output=eb691dc4190a07ec input=bc0c4397c2994be6]*/ { uint8_t digest[MD5_DIGESTSIZE]; - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); Hacl_Hash_MD5_digest(self->hash_state, digest); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); return PyBytes_FromStringAndSize((const char *)digest, MD5_DIGESTSIZE); } @@ -156,9 +152,9 @@ MD5Type_hexdigest_impl(MD5object *self) /*[clinic end generated code: output=17badced1f3ac932 input=b60b19de644798dd]*/ { uint8_t digest[MD5_DIGESTSIZE]; - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); Hacl_Hash_MD5_digest(self->hash_state, digest); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); return _Py_strhex((const char *)digest, MD5_DIGESTSIZE); } @@ -170,6 +166,7 @@ update(Hacl_Hash_MD5_state_t *state, uint8_t *buf, Py_ssize_t len) * take more than 1 billion years to overflow the maximum admissible length * for MD5 (2^61 - 1). */ + assert(len >= 0); #if PY_SSIZE_T_MAX > UINT32_MAX while (len > UINT32_MAX) { (void)Hacl_Hash_MD5_update(state, buf, UINT32_MAX); @@ -195,22 +192,11 @@ MD5Type_update_impl(MD5object *self, PyObject *obj) /*[clinic end generated code: output=b0fed9a7ce7ad253 input=6e1efcd9ecf17032]*/ { Py_buffer buf; - GET_BUFFER_VIEW_OR_ERROUT(obj, &buf); - - if (!self->use_mutex && buf.len >= HASHLIB_GIL_MINSIZE) { - self->use_mutex = true; - } - if (self->use_mutex) { - Py_BEGIN_ALLOW_THREADS - PyMutex_Lock(&self->mutex); - update(self->hash_state, buf.buf, buf.len); - PyMutex_Unlock(&self->mutex); - Py_END_ALLOW_THREADS - } else { - update(self->hash_state, buf.buf, buf.len); - } - + HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED( + self, buf.len, + update(self->hash_state, buf.buf, buf.len) + ); PyBuffer_Release(&buf); Py_RETURN_NONE; } @@ -312,16 +298,12 @@ _md5_md5_impl(PyObject *module, PyObject *data, int usedforsecurity, } if (string) { - if (buf.len >= HASHLIB_GIL_MINSIZE) { - /* We do not initialize self->lock here as this is the constructor - * where it is not yet possible to have concurrent access. */ - Py_BEGIN_ALLOW_THREADS - update(new->hash_state, buf.buf, buf.len); - Py_END_ALLOW_THREADS - } - else { - update(new->hash_state, buf.buf, buf.len); - } + /* Do not use self->mutex here as this is the constructor + * where it is not yet possible to have concurrent access. */ + HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED( + buf.len, + update(new->hash_state, buf.buf, buf.len) + ); PyBuffer_Release(&buf); } diff --git a/Modules/sha1module.c b/Modules/sha1module.c index a746bf74f8d4c1..3bc83b674f1bc1 100644 --- a/Modules/sha1module.c +++ b/Modules/sha1module.c @@ -38,11 +38,7 @@ class SHA1Type "SHA1object *" "&PyType_Type" #include "_hacl/Hacl_Hash_SHA1.h" typedef struct { - PyObject_HEAD - // Prevents undefined behavior via multiple threads entering the C API. - bool use_mutex; - PyMutex mutex; - PyThread_type_lock lock; + HASHLIB_OBJECT_HEAD Hacl_Hash_SHA1_state_t *hash_state; } SHA1object; @@ -121,9 +117,9 @@ SHA1Type_copy_impl(SHA1object *self, PyTypeObject *cls) return NULL; } - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); newobj->hash_state = Hacl_Hash_SHA1_copy(self->hash_state); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); if (newobj->hash_state == NULL) { Py_DECREF(newobj); return PyErr_NoMemory(); @@ -142,9 +138,9 @@ SHA1Type_digest_impl(SHA1object *self) /*[clinic end generated code: output=2f05302a7aa2b5cb input=13824b35407444bd]*/ { unsigned char digest[SHA1_DIGESTSIZE]; - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); Hacl_Hash_SHA1_digest(self->hash_state, digest); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); return PyBytes_FromStringAndSize((const char *)digest, SHA1_DIGESTSIZE); } @@ -159,9 +155,9 @@ SHA1Type_hexdigest_impl(SHA1object *self) /*[clinic end generated code: output=4161fd71e68c6659 input=97691055c0c74ab0]*/ { unsigned char digest[SHA1_DIGESTSIZE]; - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); Hacl_Hash_SHA1_digest(self->hash_state, digest); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); return _Py_strhex((const char *)digest, SHA1_DIGESTSIZE); } @@ -198,22 +194,11 @@ SHA1Type_update_impl(SHA1object *self, PyObject *obj) /*[clinic end generated code: output=cdc8e0e106dbec5f input=aad8e07812edbba3]*/ { Py_buffer buf; - GET_BUFFER_VIEW_OR_ERROUT(obj, &buf); - - if (!self->use_mutex && buf.len >= HASHLIB_GIL_MINSIZE) { - self->use_mutex = true; - } - if (self->use_mutex) { - Py_BEGIN_ALLOW_THREADS - PyMutex_Lock(&self->mutex); - update(self->hash_state, buf.buf, buf.len); - PyMutex_Unlock(&self->mutex); - Py_END_ALLOW_THREADS - } else { - update(self->hash_state, buf.buf, buf.len); - } - + HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED( + self, buf.len, + update(self->hash_state, buf.buf, buf.len) + ); PyBuffer_Release(&buf); Py_RETURN_NONE; } @@ -314,16 +299,12 @@ _sha1_sha1_impl(PyObject *module, PyObject *data, int usedforsecurity, return PyErr_NoMemory(); } if (string) { - if (buf.len >= HASHLIB_GIL_MINSIZE) { - /* We do not initialize self->lock here as this is the constructor - * where it is not yet possible to have concurrent access. */ - Py_BEGIN_ALLOW_THREADS - update(new->hash_state, buf.buf, buf.len); - Py_END_ALLOW_THREADS - } - else { - update(new->hash_state, buf.buf, buf.len); - } + /* Do not use self->mutex here as this is the constructor + * where it is not yet possible to have concurrent access. */ + HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED( + buf.len, + update(new->hash_state, buf.buf, buf.len) + ); PyBuffer_Release(&buf); } diff --git a/Modules/sha2module.c b/Modules/sha2module.c index 72931910c5d720..6643b7e6b02654 100644 --- a/Modules/sha2module.c +++ b/Modules/sha2module.c @@ -50,20 +50,14 @@ class SHA512Type "SHA512object *" "&PyType_Type" // TODO: Get rid of int digestsize in favor of Hacl state info? typedef struct { - PyObject_HEAD + HASHLIB_OBJECT_HEAD int digestsize; - // Prevents undefined behavior via multiple threads entering the C API. - bool use_mutex; - PyMutex mutex; Hacl_Hash_SHA2_state_t_256 *state; } SHA256object; typedef struct { - PyObject_HEAD + HASHLIB_OBJECT_HEAD int digestsize; - // Prevents undefined behavior via multiple threads entering the C API. - bool use_mutex; - PyMutex mutex; Hacl_Hash_SHA2_state_t_512 *state; } SHA512object; @@ -272,9 +266,9 @@ SHA256Type_copy_impl(SHA256object *self, PyTypeObject *cls) } } - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); rc = SHA256copy(self, newobj); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); if (rc < 0) { Py_DECREF(newobj); return NULL; @@ -309,9 +303,9 @@ SHA512Type_copy_impl(SHA512object *self, PyTypeObject *cls) } } - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); rc = SHA512copy(self, newobj); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); if (rc < 0) { Py_DECREF(newobj); return NULL; @@ -331,11 +325,11 @@ SHA256Type_digest_impl(SHA256object *self) { uint8_t digest[SHA256_DIGESTSIZE]; assert(self->digestsize <= SHA256_DIGESTSIZE); - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); // HACL* performs copies under the hood so that self->state remains valid // after this call. Hacl_Hash_SHA2_digest_256(self->state, digest); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); return PyBytes_FromStringAndSize((const char *)digest, self->digestsize); } @@ -351,11 +345,11 @@ SHA512Type_digest_impl(SHA512object *self) { uint8_t digest[SHA512_DIGESTSIZE]; assert(self->digestsize <= SHA512_DIGESTSIZE); - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); // HACL* performs copies under the hood so that self->state remains valid // after this call. Hacl_Hash_SHA2_digest_512(self->state, digest); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); return PyBytes_FromStringAndSize((const char *)digest, self->digestsize); } @@ -371,9 +365,9 @@ SHA256Type_hexdigest_impl(SHA256object *self) { uint8_t digest[SHA256_DIGESTSIZE]; assert(self->digestsize <= SHA256_DIGESTSIZE); - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); Hacl_Hash_SHA2_digest_256(self->state, digest); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); return _Py_strhex((const char *)digest, self->digestsize); } @@ -389,9 +383,9 @@ SHA512Type_hexdigest_impl(SHA512object *self) { uint8_t digest[SHA512_DIGESTSIZE]; assert(self->digestsize <= SHA512_DIGESTSIZE); - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); Hacl_Hash_SHA2_digest_512(self->state, digest); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); return _Py_strhex((const char *)digest, self->digestsize); } @@ -409,22 +403,11 @@ SHA256Type_update_impl(SHA256object *self, PyObject *obj) /*[clinic end generated code: output=dc58a580cf8905a5 input=b2d449d5b30f0f5a]*/ { Py_buffer buf; - GET_BUFFER_VIEW_OR_ERROUT(obj, &buf); - - if (!self->use_mutex && buf.len >= HASHLIB_GIL_MINSIZE) { - self->use_mutex = true; - } - if (self->use_mutex) { - Py_BEGIN_ALLOW_THREADS - PyMutex_Lock(&self->mutex); - update_256(self->state, buf.buf, buf.len); - PyMutex_Unlock(&self->mutex); - Py_END_ALLOW_THREADS - } else { - update_256(self->state, buf.buf, buf.len); - } - + HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED( + self, buf.len, + update_256(self->state, buf.buf, buf.len) + ); PyBuffer_Release(&buf); Py_RETURN_NONE; } @@ -443,22 +426,11 @@ SHA512Type_update_impl(SHA512object *self, PyObject *obj) /*[clinic end generated code: output=9af211766c0b7365 input=ded2b46656566283]*/ { Py_buffer buf; - GET_BUFFER_VIEW_OR_ERROUT(obj, &buf); - - if (!self->use_mutex && buf.len >= HASHLIB_GIL_MINSIZE) { - self->use_mutex = true; - } - if (self->use_mutex) { - Py_BEGIN_ALLOW_THREADS - PyMutex_Lock(&self->mutex); - update_512(self->state, buf.buf, buf.len); - PyMutex_Unlock(&self->mutex); - Py_END_ALLOW_THREADS - } else { - update_512(self->state, buf.buf, buf.len); - } - + HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED( + self, buf.len, + update_512(self->state, buf.buf, buf.len) + ); PyBuffer_Release(&buf); Py_RETURN_NONE; } @@ -638,16 +610,12 @@ _sha2_sha256_impl(PyObject *module, PyObject *data, int usedforsecurity, return PyErr_NoMemory(); } if (string) { - if (buf.len >= HASHLIB_GIL_MINSIZE) { - /* We do not initialize self->lock here as this is the constructor - * where it is not yet possible to have concurrent access. */ - Py_BEGIN_ALLOW_THREADS - update_256(new->state, buf.buf, buf.len); - Py_END_ALLOW_THREADS - } - else { - update_256(new->state, buf.buf, buf.len); - } + /* Do not use self->mutex here as this is the constructor + * where it is not yet possible to have concurrent access. */ + HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED( + buf.len, + update_256(new->state, buf.buf, buf.len) + ); PyBuffer_Release(&buf); } @@ -700,16 +668,12 @@ _sha2_sha224_impl(PyObject *module, PyObject *data, int usedforsecurity, return PyErr_NoMemory(); } if (string) { - if (buf.len >= HASHLIB_GIL_MINSIZE) { - /* We do not initialize self->lock here as this is the constructor - * where it is not yet possible to have concurrent access. */ - Py_BEGIN_ALLOW_THREADS - update_256(new->state, buf.buf, buf.len); - Py_END_ALLOW_THREADS - } - else { - update_256(new->state, buf.buf, buf.len); - } + /* Do not use self->mutex here as this is the constructor + * where it is not yet possible to have concurrent access. */ + HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED( + buf.len, + update_256(new->state, buf.buf, buf.len) + ); PyBuffer_Release(&buf); } @@ -763,16 +727,12 @@ _sha2_sha512_impl(PyObject *module, PyObject *data, int usedforsecurity, return PyErr_NoMemory(); } if (string) { - if (buf.len >= HASHLIB_GIL_MINSIZE) { - /* We do not initialize self->lock here as this is the constructor - * where it is not yet possible to have concurrent access. */ - Py_BEGIN_ALLOW_THREADS - update_512(new->state, buf.buf, buf.len); - Py_END_ALLOW_THREADS - } - else { - update_512(new->state, buf.buf, buf.len); - } + /* Do not use self->mutex here as this is the constructor + * where it is not yet possible to have concurrent access. */ + HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED( + buf.len, + update_512(new->state, buf.buf, buf.len) + ); PyBuffer_Release(&buf); } @@ -826,16 +786,12 @@ _sha2_sha384_impl(PyObject *module, PyObject *data, int usedforsecurity, return PyErr_NoMemory(); } if (string) { - if (buf.len >= HASHLIB_GIL_MINSIZE) { - /* We do not initialize self->lock here as this is the constructor - * where it is not yet possible to have concurrent access. */ - Py_BEGIN_ALLOW_THREADS - update_512(new->state, buf.buf, buf.len); - Py_END_ALLOW_THREADS - } - else { - update_512(new->state, buf.buf, buf.len); - } + /* Do not use self->mutex here as this is the constructor + * where it is not yet possible to have concurrent access. */ + HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED( + buf.len, + update_512(new->state, buf.buf, buf.len) + ); PyBuffer_Release(&buf); } diff --git a/Modules/sha3module.c b/Modules/sha3module.c index 9946024fe2e848..68b239352769a1 100644 --- a/Modules/sha3module.c +++ b/Modules/sha3module.c @@ -70,10 +70,7 @@ class _sha3.shake_256 "SHA3object *" "&SHAKE256type" #include "_hacl/Hacl_Hash_SHA3.h" typedef struct { - PyObject_HEAD - // Prevents undefined behavior via multiple threads entering the C API. - bool use_mutex; - PyMutex mutex; + HASHLIB_OBJECT_HEAD Hacl_Hash_SHA3_state_t *hash_state; } SHA3object; @@ -174,16 +171,12 @@ py_sha3_new_impl(PyTypeObject *type, PyObject *data_obj, int usedforsecurity, if (data) { GET_BUFFER_VIEW_OR_ERROR(data, &buf, goto error); - if (buf.len >= HASHLIB_GIL_MINSIZE) { - /* We do not initialize self->lock here as this is the constructor - * where it is not yet possible to have concurrent access. */ - Py_BEGIN_ALLOW_THREADS - sha3_update(self->hash_state, buf.buf, buf.len); - Py_END_ALLOW_THREADS - } - else { - sha3_update(self->hash_state, buf.buf, buf.len); - } + /* Do not use self->mutex here as this is the constructor + * where it is not yet possible to have concurrent access. */ + HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED( + buf.len, + sha3_update(self->hash_state, buf.buf, buf.len) + ); } PyBuffer_Release(&buf); @@ -249,9 +242,9 @@ _sha3_sha3_224_copy_impl(SHA3object *self) if ((newobj = newSHA3object(Py_TYPE(self))) == NULL) { return NULL; } - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); newobj->hash_state = Hacl_Hash_SHA3_copy(self->hash_state); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); if (newobj->hash_state == NULL) { Py_DECREF(newobj); return PyErr_NoMemory(); @@ -273,9 +266,9 @@ _sha3_sha3_224_digest_impl(SHA3object *self) unsigned char digest[SHA3_MAX_DIGESTSIZE]; // This function errors out if the algorithm is SHAKE. Here, we know this // not to be the case, and therefore do not perform error checking. - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); (void)Hacl_Hash_SHA3_digest(self->hash_state, digest); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); return PyBytes_FromStringAndSize((const char *)digest, Hacl_Hash_SHA3_hash_len(self->hash_state)); } @@ -292,9 +285,9 @@ _sha3_sha3_224_hexdigest_impl(SHA3object *self) /*[clinic end generated code: output=75ad03257906918d input=2d91bb6e0d114ee3]*/ { unsigned char digest[SHA3_MAX_DIGESTSIZE]; - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); (void)Hacl_Hash_SHA3_digest(self->hash_state, digest); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); return _Py_strhex((const char *)digest, Hacl_Hash_SHA3_hash_len(self->hash_state)); } @@ -314,22 +307,11 @@ _sha3_sha3_224_update_impl(SHA3object *self, PyObject *data) /*[clinic end generated code: output=390b7abf7c9795a5 input=a887f54dcc4ae227]*/ { Py_buffer buf; - GET_BUFFER_VIEW_OR_ERROUT(data, &buf); - - if (!self->use_mutex && buf.len >= HASHLIB_GIL_MINSIZE) { - self->use_mutex = true; - } - if (self->use_mutex) { - Py_BEGIN_ALLOW_THREADS - PyMutex_Lock(&self->mutex); - sha3_update(self->hash_state, buf.buf, buf.len); - PyMutex_Unlock(&self->mutex); - Py_END_ALLOW_THREADS - } else { - sha3_update(self->hash_state, buf.buf, buf.len); - } - + HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED( + self, buf.len, + sha3_update(self->hash_state, buf.buf, buf.len) + ); PyBuffer_Release(&buf); Py_RETURN_NONE; } @@ -531,9 +513,9 @@ _sha3_shake_128_digest_impl(SHA3object *self, Py_ssize_t length) CHECK_HACL_UINT32_T_LENGTH(length); PyObject *digest = PyBytes_FromStringAndSize(NULL, length); uint8_t *buffer = (uint8_t *)PyBytes_AS_STRING(digest); - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); (void)Hacl_Hash_SHA3_squeeze(self->hash_state, buffer, (uint32_t)length); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); return digest; } @@ -565,9 +547,9 @@ _sha3_shake_128_hexdigest_impl(SHA3object *self, Py_ssize_t length) return PyErr_NoMemory(); } - ENTER_HASHLIB(self); + HASHLIB_ACQUIRE_LOCK(self); (void)Hacl_Hash_SHA3_squeeze(self->hash_state, buffer, (uint32_t)length); - LEAVE_HASHLIB(self); + HASHLIB_RELEASE_LOCK(self); PyObject *digest = _Py_strhex((const char *)buffer, length); PyMem_Free(buffer); return digest; From c620360f4fb7eb2e80e52e96b4c201c1df90d2d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 22 Jun 2025 21:48:06 +0200 Subject: [PATCH 011/212] gh-135815: skip `netrc` security checks if `os.getuid` is missing (#135816) --- Doc/library/netrc.rst | 4 ++- Lib/netrc.py | 29 ++++++++++++------- Lib/test/test_netrc.py | 13 +++++---- ...-06-22-16-23-44.gh-issue-135815.0DandH.rst | 2 ++ 4 files changed, 30 insertions(+), 18 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-22-16-23-44.gh-issue-135815.0DandH.rst diff --git a/Doc/library/netrc.rst b/Doc/library/netrc.rst index f6260383b2b057..74c97e8c9a9759 100644 --- a/Doc/library/netrc.rst +++ b/Doc/library/netrc.rst @@ -24,12 +24,14 @@ the Unix :program:`ftp` program and other FTP clients. a :exc:`FileNotFoundError` exception will be raised. Parse errors will raise :exc:`NetrcParseError` with diagnostic information including the file name, line number, and terminating token. + If no argument is specified on a POSIX system, the presence of passwords in the :file:`.netrc` file will raise a :exc:`NetrcParseError` if the file ownership or permissions are insecure (owned by a user other than the user running the process, or accessible for read or write by any other user). This implements security behavior equivalent to that of ftp and other - programs that use :file:`.netrc`. + programs that use :file:`.netrc`. Such security checks are not available + on platforms that do not support :func:`os.getuid`. .. versionchanged:: 3.4 Added the POSIX permission check. diff --git a/Lib/netrc.py b/Lib/netrc.py index b285fd8e357ddb..bd003e80a48081 100644 --- a/Lib/netrc.py +++ b/Lib/netrc.py @@ -7,6 +7,19 @@ __all__ = ["netrc", "NetrcParseError"] +def _can_security_check(): + # On WASI, getuid() is indicated as a stub but it may also be missing. + return os.name == 'posix' and hasattr(os, 'getuid') + + +def _getpwuid(uid): + try: + import pwd + return pwd.getpwuid(uid)[0] + except (ImportError, LookupError): + return f'uid {uid}' + + class NetrcParseError(Exception): """Exception raised on syntax errors in the .netrc file.""" def __init__(self, msg, filename=None, lineno=None): @@ -142,18 +155,12 @@ def _parse(self, file, fp, default_netrc): self._security_check(fp, default_netrc, self.hosts[entryname][0]) def _security_check(self, fp, default_netrc, login): - if os.name == 'posix' and default_netrc and login != "anonymous": + if _can_security_check() and default_netrc and login != "anonymous": prop = os.fstat(fp.fileno()) - if prop.st_uid != os.getuid(): - import pwd - try: - fowner = pwd.getpwuid(prop.st_uid)[0] - except KeyError: - fowner = 'uid %s' % prop.st_uid - try: - user = pwd.getpwuid(os.getuid())[0] - except KeyError: - user = 'uid %s' % os.getuid() + current_user_id = os.getuid() + if prop.st_uid != current_user_id: + fowner = _getpwuid(prop.st_uid) + user = _getpwuid(current_user_id) raise NetrcParseError( (f"~/.netrc file owner ({fowner}, {user}) does not match" " current user")) diff --git a/Lib/test/test_netrc.py b/Lib/test/test_netrc.py index 81e11a293cc4c8..9d720f627102e3 100644 --- a/Lib/test/test_netrc.py +++ b/Lib/test/test_netrc.py @@ -1,11 +1,7 @@ import netrc, os, unittest, sys, textwrap +from test import support from test.support import os_helper -try: - import pwd -except ImportError: - pwd = None - temp_filename = os_helper.TESTFN class NetrcTestCase(unittest.TestCase): @@ -269,9 +265,14 @@ def test_comment_at_end_of_machine_line_pass_has_hash(self): machine bar.domain.com login foo password pass """, '#pass') + @unittest.skipUnless(support.is_wasi, 'WASI only test') + def test_security_on_WASI(self): + self.assertFalse(netrc._can_security_check()) + self.assertEqual(netrc._getpwuid(0), 'uid 0') + self.assertEqual(netrc._getpwuid(123456), 'uid 123456') @unittest.skipUnless(os.name == 'posix', 'POSIX only test') - @unittest.skipIf(pwd is None, 'security check requires pwd module') + @unittest.skipUnless(hasattr(os, 'getuid'), "os.getuid is required") @os_helper.skip_unless_working_chmod def test_security(self): # This test is incomplete since we are normally not run as root and diff --git a/Misc/NEWS.d/next/Library/2025-06-22-16-23-44.gh-issue-135815.0DandH.rst b/Misc/NEWS.d/next/Library/2025-06-22-16-23-44.gh-issue-135815.0DandH.rst new file mode 100644 index 00000000000000..0f4a68bf745a8e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-22-16-23-44.gh-issue-135815.0DandH.rst @@ -0,0 +1,2 @@ +:mod:`netrc`: skip security checks if :func:`os.getuid` is missing. +Patch by Bénédikt Tran. From a84a62bfb40d61fb3feab9cd0b57889f89efcaf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 22 Jun 2025 22:04:38 +0200 Subject: [PATCH 012/212] gh-135532: cleanup clinic `module` directives for cryptographic modules (#135822) --- Modules/_hashopenssl.c | 17 +++++++++++------ Modules/blake2module.c | 12 ++++++++---- Modules/md5module.c | 24 ++++++++++++++++-------- Modules/sha1module.c | 25 ++++++++++++++++--------- Modules/sha2module.c | 30 +++++++++++++++++------------- Modules/sha3module.c | 33 +++++++++++++++++++++------------ 6 files changed, 89 insertions(+), 52 deletions(-) diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c index 0fe69ee8fac835..90a7391ebb09af 100644 --- a/Modules/_hashopenssl.c +++ b/Modules/_hashopenssl.c @@ -255,7 +255,8 @@ py_hashentry_table_new(void) { return NULL; } -/* Module state */ +// --- Module state ----------------------------------------------------------- + static PyModuleDef _hashlibmodule; typedef struct { @@ -277,6 +278,8 @@ get_hashlib_state(PyObject *module) return (_hashlibstate *)state; } +// --- Module objects --------------------------------------------------------- + typedef struct { HASHLIB_OBJECT_HEAD EVP_MD_CTX *ctx; /* OpenSSL message digest context */ @@ -291,15 +294,17 @@ typedef struct { #define HMACobject_CAST(op) ((HMACobject *)(op)) -#include "clinic/_hashopenssl.c.h" +// --- Module clinic configuration -------------------------------------------- + /*[clinic input] module _hashlib -class _hashlib.HASH "HASHobject *" "((_hashlibstate *)PyModule_GetState(module))->HASH_type" -class _hashlib.HASHXOF "HASHobject *" "((_hashlibstate *)PyModule_GetState(module))->HASHXOF_type" -class _hashlib.HMAC "HMACobject *" "((_hashlibstate *)PyModule_GetState(module))->HMAC_type" +class _hashlib.HASH "HASHobject *" "&PyType_Type" +class _hashlib.HASHXOF "HASHobject *" "&PyType_Type" +class _hashlib.HMAC "HMACobject *" "&PyType_Type" [clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=eb805ce4b90b1b31]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=6b5c9ce5c28bdc58]*/ +#include "clinic/_hashopenssl.c.h" /* LCOV_EXCL_START */ diff --git a/Modules/blake2module.c b/Modules/blake2module.c index 295bca07650916..6c4349ac06bb8a 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -2,6 +2,7 @@ * Written in 2013 by Dmitry Chestnykh * Modified for CPython by Christian Heimes * Updated to use HACL* by Jonathan Protzenko + * Additional work by Bénédikt Tran <10796600+picnixz@users.noreply.github.com> * * To the extent possible under law, the author have dedicated all * copyright and related and neighboring rights to this software to @@ -368,15 +369,18 @@ typedef struct { #define _Blake2Object_CAST(op) ((Blake2Object *)(op)) -#include "clinic/blake2module.c.h" +// --- Module clinic configuration -------------------------------------------- /*[clinic input] module _blake2 -class _blake2.blake2b "Blake2Object *" "&PyBlake2_BLAKE2bType" -class _blake2.blake2s "Blake2Object *" "&PyBlake2_BLAKE2sType" +class _blake2.blake2b "Blake2Object *" "&PyType_Type" +class _blake2.blake2s "Blake2Object *" "&PyType_Type" [clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b7526666bd18af83]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=86b0972b0c41b3d0]*/ + +#include "clinic/blake2module.c.h" +// --- BLAKE-2 object interface ----------------------------------------------- static Blake2Object * new_Blake2Object(PyTypeObject *type) diff --git a/Modules/md5module.c b/Modules/md5module.c index 5dac3a91f4f09d..8b6dd4a8195dfb 100644 --- a/Modules/md5module.c +++ b/Modules/md5module.c @@ -8,6 +8,7 @@ Andrew Kuchling (amk@amk.ca) Greg Stein (gstein@lyra.org) Trevor Perrin (trevp@trevp.net) + Bénédikt Tran (10796600+picnixz@users.noreply.github.com) Copyright (C) 2005-2007 Gregory P. Smith (greg@krypto.org) Licensed to PSF under a Contributor Agreement. @@ -25,18 +26,14 @@ #include "hashlib.h" -/*[clinic input] -module _md5 -class MD5Type "MD5object *" "&PyType_Type" -[clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=6e5261719957a912]*/ +#include "_hacl/Hacl_Hash_MD5.h" /* The MD5 block size and message digest sizes, in bytes */ #define MD5_BLOCKSIZE 64 #define MD5_DIGESTSIZE 16 -#include "_hacl/Hacl_Hash_MD5.h" +// --- Module objects --------------------------------------------------------- typedef struct { HASHLIB_OBJECT_HEAD @@ -45,8 +42,7 @@ typedef struct { #define _MD5object_CAST(op) ((MD5object *)(op)) -#include "clinic/md5module.c.h" - +// --- Module state ----------------------------------------------------------- typedef struct { PyTypeObject* md5_type; @@ -60,6 +56,18 @@ md5_get_state(PyObject *module) return (MD5State *)state; } +// --- Module clinic configuration -------------------------------------------- + +/*[clinic input] +module _md5 +class MD5Type "MD5object *" "&PyType_Type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=6e5261719957a912]*/ + +#include "clinic/md5module.c.h" + +// --- MD5 object interface --------------------------------------------------- + static MD5object * newMD5object(MD5State * st) { diff --git a/Modules/sha1module.c b/Modules/sha1module.c index 3bc83b674f1bc1..faa9dcccc5755b 100644 --- a/Modules/sha1module.c +++ b/Modules/sha1module.c @@ -8,13 +8,13 @@ Andrew Kuchling (amk@amk.ca) Greg Stein (gstein@lyra.org) Trevor Perrin (trevp@trevp.net) + Bénédikt Tran (10796600+picnixz@users.noreply.github.com) Copyright (C) 2005-2007 Gregory P. Smith (greg@krypto.org) Licensed to PSF under a Contributor Agreement. */ -/* SHA1 objects */ #ifndef Py_BUILD_CORE_BUILTIN # define Py_BUILD_CORE_MODULE 1 #endif @@ -24,18 +24,14 @@ #include "pycore_strhex.h" // _Py_strhex() #include "pycore_typeobject.h" // _PyType_GetModuleState() -/*[clinic input] -module _sha1 -class SHA1Type "SHA1object *" "&PyType_Type" -[clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=3dc9a20d1becb759]*/ +#include "_hacl/Hacl_Hash_SHA1.h" /* The SHA1 block size and message digest sizes, in bytes */ #define SHA1_BLOCKSIZE 64 #define SHA1_DIGESTSIZE 20 -#include "_hacl/Hacl_Hash_SHA1.h" +// --- Module objects --------------------------------------------------------- typedef struct { HASHLIB_OBJECT_HEAD @@ -44,8 +40,7 @@ typedef struct { #define _SHA1object_CAST(op) ((SHA1object *)(op)) -#include "clinic/sha1module.c.h" - +// --- Module state ----------------------------------------------------------- typedef struct { PyTypeObject* sha1_type; @@ -59,6 +54,18 @@ sha1_get_state(PyObject *module) return (SHA1State *)state; } +// --- Module clinic configuration -------------------------------------------- + +/*[clinic input] +module _sha1 +class SHA1Type "SHA1object *" "&PyType_Type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=3dc9a20d1becb759]*/ + +#include "clinic/sha1module.c.h" + +// --- SHA-1 object interface configuration ----------------------------------- + static SHA1object * newSHA1object(SHA1State *st) { diff --git a/Modules/sha2module.c b/Modules/sha2module.c index 6643b7e6b02654..36300ba899fd44 100644 --- a/Modules/sha2module.c +++ b/Modules/sha2module.c @@ -9,32 +9,25 @@ Greg Stein (gstein@lyra.org) Trevor Perrin (trevp@trevp.net) Jonathan Protzenko (jonathan@protzenko.fr) + Bénédikt Tran (10796600+picnixz@users.noreply.github.com) Copyright (C) 2005-2007 Gregory P. Smith (greg@krypto.org) Licensed to PSF under a Contributor Agreement. */ -/* SHA objects */ #ifndef Py_BUILD_CORE_BUILTIN # define Py_BUILD_CORE_MODULE 1 #endif #include "Python.h" -#include "pycore_bitutils.h" // _Py_bswap32() #include "pycore_moduleobject.h" // _PyModule_GetState() #include "pycore_typeobject.h" // _PyType_GetModuleState() #include "pycore_strhex.h" // _Py_strhex() #include "hashlib.h" -/*[clinic input] -module _sha2 -class SHA256Type "SHA256object *" "&PyType_Type" -class SHA512Type "SHA512object *" "&PyType_Type" -[clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b5315a7b611c9afc]*/ - +#include "_hacl/Hacl_Hash_SHA2.h" /* The SHA block sizes and maximum message digest sizes, in bytes */ @@ -43,9 +36,7 @@ class SHA512Type "SHA512object *" "&PyType_Type" #define SHA512_BLOCKSIZE 128 #define SHA512_DIGESTSIZE 64 -/* Our SHA2 implementations defer to the HACL* verified library. */ - -#include "_hacl/Hacl_Hash_SHA2.h" +// --- Module objects --------------------------------------------------------- // TODO: Get rid of int digestsize in favor of Hacl state info? @@ -64,7 +55,7 @@ typedef struct { #define _SHA256object_CAST(op) ((SHA256object *)(op)) #define _SHA512object_CAST(op) ((SHA512object *)(op)) -#include "clinic/sha2module.c.h" +// --- Module state ----------------------------------------------------------- /* We shall use run-time type information in the remainder of this module to * tell apart SHA2-224 and SHA2-256 */ @@ -83,6 +74,19 @@ sha2_get_state(PyObject *module) return (sha2_state *)state; } +// --- Module clinic configuration -------------------------------------------- + +/*[clinic input] +module _sha2 +class SHA256Type "SHA256object *" "&PyType_Type" +class SHA512Type "SHA512object *" "&PyType_Type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b5315a7b611c9afc]*/ + +#include "clinic/sha2module.c.h" + +// --- SHA-2 object interface ------------------------------------------------- + static int SHA256copy(SHA256object *src, SHA256object *dest) { diff --git a/Modules/sha3module.c b/Modules/sha3module.c index 68b239352769a1..face90a6094beb 100644 --- a/Modules/sha3module.c +++ b/Modules/sha3module.c @@ -9,6 +9,7 @@ * Greg Stein (gstein@lyra.org) * Trevor Perrin (trevp@trevp.net) * Gregory P. Smith (greg@krypto.org) + * Bénédikt Tran (10796600+picnixz@users.noreply.github.com) * * Copyright (C) 2012-2022 Christian Heimes (christian@python.org) * Licensed to PSF under a Contributor Agreement. @@ -24,6 +25,8 @@ #include "pycore_typeobject.h" // _PyType_GetModuleState() #include "hashlib.h" +#include "_hacl/Hacl_Hash_SHA3.h" + /* * Assert that 'LEN' can be safely casted to uint32_t. * @@ -37,6 +40,8 @@ #define SHA3_MAX_DIGESTSIZE 64 /* 64 Bytes (512 Bits) for 224 to 512 */ +// --- Module state ----------------------------------------------------------- + typedef struct { PyTypeObject *sha3_224_type; PyTypeObject *sha3_256_type; @@ -54,21 +59,10 @@ sha3_get_state(PyObject *module) return (SHA3State *)state; } -/*[clinic input] -module _sha3 -class _sha3.sha3_224 "SHA3object *" "&SHA3_224typ" -class _sha3.sha3_256 "SHA3object *" "&SHA3_256typ" -class _sha3.sha3_384 "SHA3object *" "&SHA3_384typ" -class _sha3.sha3_512 "SHA3object *" "&SHA3_512typ" -class _sha3.shake_128 "SHA3object *" "&SHAKE128type" -class _sha3.shake_256 "SHA3object *" "&SHAKE256type" -[clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=b8a53680f370285a]*/ +// --- Module objects --------------------------------------------------------- /* The structure for storing SHA3 info */ -#include "_hacl/Hacl_Hash_SHA3.h" - typedef struct { HASHLIB_OBJECT_HEAD Hacl_Hash_SHA3_state_t *hash_state; @@ -76,8 +70,23 @@ typedef struct { #define _SHA3object_CAST(op) ((SHA3object *)(op)) +// --- Module clinic configuration -------------------------------------------- + +/*[clinic input] +module _sha3 +class _sha3.sha3_224 "SHA3object *" "&PyType_Type" +class _sha3.sha3_256 "SHA3object *" "&PyType_Type" +class _sha3.sha3_384 "SHA3object *" "&PyType_Type" +class _sha3.sha3_512 "SHA3object *" "&PyType_Type" +class _sha3.shake_128 "SHA3object *" "&PyType_Type" +class _sha3.shake_256 "SHA3object *" "&PyType_Type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=ccd22550c7fb99bf]*/ + #include "clinic/sha3module.c.h" +// --- SHA-3 object interface ------------------------------------------------- + static SHA3object * newSHA3object(PyTypeObject *type) { From 5b12e0b66e03e244b2b4b969842d62beb0981ff7 Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Mon, 23 Jun 2025 11:52:28 +0530 Subject: [PATCH 013/212] add async generators section to asyncio internal docs (#135674) --- InternalDocs/asyncio.md | 126 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 122 insertions(+), 4 deletions(-) diff --git a/InternalDocs/asyncio.md b/InternalDocs/asyncio.md index b60fe70478a6bc..22159852ca54db 100644 --- a/InternalDocs/asyncio.md +++ b/InternalDocs/asyncio.md @@ -2,10 +2,12 @@ asyncio ======= -This document describes the working and implementation details of C -implementation of the +This document describes the working and implementation details of the [`asyncio`](https://docs.python.org/3/library/asyncio.html) module. +**The following section describes the implementation details of the C implementation**. + +# Task management ## Pre-Python 3.14 implementation @@ -158,7 +160,8 @@ flowchart TD subgraph two["Thread deallocating"] A1{"thread's task list empty?
llist_empty(tstate->asyncio_tasks_head)"} A1 --> |true| B1["deallocate thread
free_threadstate(tstate)"] - A1 --> |false| C1["add tasks to interpreter's task list
llist_concat(&tstate->interp->asyncio_tasks_head,tstate->asyncio_tasks_head)"] + A1 --> |false| C1["add tasks to interpreter's task list
llist_concat(&tstate->interp->asyncio_tasks_head, + &tstate->asyncio_tasks_head)"] C1 --> B1 end @@ -205,6 +208,121 @@ In free-threading, it avoids contention on a global dictionary as threads can access the current task of thier running loop without any locking. +--- + +**The following section describes the implementation details of the Python implementation**. + +# async generators + +This section describes the implementation details of async generators in `asyncio`. + +Since async generators are meant to be used from coroutines, +their finalization (execution of finally blocks) needs +to be done while the loop is running. +Most async generators are closed automatically +when they are fully iterated over and exhausted; however, +if the async generator is not fully iterated over, +it may not be closed properly, leading to the `finally` blocks not being executed. + +Consider the following code: +```py +import asyncio + +async def agen(): + try: + yield 1 + finally: + await asyncio.sleep(1) + print("finally executed") + + +async def main(): + async for i in agen(): + break + +loop = asyncio.EventLoop() +loop.run_until_complete(main()) +``` + +The above code will not print "finally executed", because the +async generator `agen` is not fully iterated over +and it is not closed manually by awaiting `agen.aclose()`. + +To solve this, `asyncio` uses the `sys.set_asyncgen_hooks` function to +set hooks for finalizing async generators as described in +[PEP 525](https://peps.python.org/pep-0525/). + +- **firstiter hook**: When the async generator is iterated over for the first time, +the *firstiter hook* is called. The async generator is added to `loop._asyncgens` WeakSet +and the event loop tracks all active async generators. + +- **finalizer hook**: When the async generator is about to be finalized, +the *finalizer hook* is called. The event loop removes the async generator +from `loop._asyncgens` WeakSet, and schedules the finalization of the async +generator by creating a task calling `agen.aclose()`. This ensures that the +finally block is executed while the event loop is running. When the loop is +shutting down, the loop checks if there are active async generators and if so, +it similarly schedules the finalization of all active async generators by calling +`agen.aclose()` on each of them and waits for them to complete before shutting +down the loop. + +This ensures that the async generator's `finally` blocks are executed even +if the generator is not explicitly closed. + +Consider the following example: + +```python +import asyncio + +async def agen(): + try: + yield 1 + yield 2 + finally: + print("executing finally block") + +async def main(): + async for item in agen(): + print(item) + break # not fully iterated + +asyncio.run(main()) +``` + +```mermaid +flowchart TD + subgraph one["Loop running"] + A["asyncio.run(main())"] --> B + B["set async generator hooks
sys.set_asyncgen_hooks()"] --> C + C["async for item in agen"] --> F + F{"first iteration?"} --> |true|D + F{"first iteration?"} --> |false|H + D["calls firstiter hook
loop._asyncgen_firstiter_hook(agen)"] --> E + E["add agen to WeakSet
loop._asyncgens.add(agen)"] --> H + H["item = await agen.\_\_anext\_\_()"] --> J + J{"StopAsyncIteration?"} --> |true|M + J{"StopAsyncIteration?"} --> |false|I + I["print(item)"] --> S + S{"continue iterating?"} --> |true|C + S{"continue iterating?"} --> |false|M + M{"agen is no longer referenced?"} --> |true|N + M{"agen is no longer referenced?"} --> |false|two + N["finalize agen
_PyGen_Finalize(agen)"] --> O + O["calls finalizer hook
loop._asyncgen_finalizer_hook(agen)"] --> P + P["remove agen from WeakSet
loop._asyncgens.discard(agen)"] --> Q + Q["schedule task to close it
self.create_task(agen.aclose())"] --> R + R["print('executing finally block')"] --> E1 + + end + + subgraph two["Loop shutting down"] + A1{"check for alive async generators?"} --> |true|B1 + B1["close all async generators
await asyncio.gather\(*\[ag.aclose\(\) for ag in loop._asyncgens\]"] --> R + A1{"check for alive async generators?"} --> |false|E1 + E1["loop.close()"] + end + +``` [^1]: https://github.com/python/cpython/issues/123089 -[^2]: https://github.com/python/cpython/issues/80788 \ No newline at end of file +[^2]: https://github.com/python/cpython/issues/80788 From 0cc0aeed497a47cb5b392294dafcb3c8c5119a7b Mon Sep 17 00:00:00 2001 From: Emma Smith Date: Mon, 23 Jun 2025 06:28:05 -0400 Subject: [PATCH 014/212] gh-134986: Catch PermissionError when trying to call perf in tests (#134987) Using Ubuntu 24.04 on the Windows Subsystem for Linux, perf will raise a `PermissionError` instead of `FileNotFoundError`. This commit modifies the tests to catch that. --- Lib/test/test_perf_profiler.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_perf_profiler.py b/Lib/test/test_perf_profiler.py index 21d097dbb559ec..7529c853f9c152 100644 --- a/Lib/test/test_perf_profiler.py +++ b/Lib/test/test_perf_profiler.py @@ -506,9 +506,12 @@ def _is_perf_version_at_least(major, minor): # The output of perf --version looks like "perf version 6.7-3" but # it can also be perf version "perf version 5.15.143", or even include # a commit hash in the version string, like "6.12.9.g242e6068fd5c" + # + # PermissionError is raised if perf does not exist on the Windows Subsystem + # for Linux, see #134987 try: output = subprocess.check_output(["perf", "--version"], text=True) - except (subprocess.CalledProcessError, FileNotFoundError): + except (subprocess.CalledProcessError, FileNotFoundError, PermissionError): return False version = output.split()[2] version = version.split("-")[0] From 07bb42813956dc6e3d3a0ed773f112718c98f162 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Mon, 23 Jun 2025 12:35:59 +0200 Subject: [PATCH 015/212] gh-89488: Add warning about `Py_BuildValue("p")` needing exact `int` (GH-135610) --- Doc/c-api/arg.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Doc/c-api/arg.rst b/Doc/c-api/arg.rst index 49dbc8d71cce62..ab9f9c4539ae9a 100644 --- a/Doc/c-api/arg.rst +++ b/Doc/c-api/arg.rst @@ -686,6 +686,12 @@ Building values ``p`` (:class:`bool`) [int] Convert a C :c:expr:`int` to a Python :class:`bool` object. + Be aware that this format requires an ``int`` argument. + Unlike most other contexts in C, variadic arguments are not coerced to + a suitable type automatically. + You can convert another type (for example, a pointer or a float) to a + suitable ``int`` value using ``(x) ? 1 : 0`` or ``!!x``. + .. versionadded:: 3.14 ``c`` (:class:`bytes` of length 1) [char] From f249c82db93f506ee292807b6969be1f15aeea6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 23 Jun 2025 12:49:27 +0200 Subject: [PATCH 016/212] gh-135823: improve error message in `netrc` security checks (#135827) --- Lib/netrc.py | 4 ++-- .../Library/2025-06-22-22-03-06.gh-issue-135823.iDBg97.rst | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-22-22-03-06.gh-issue-135823.iDBg97.rst diff --git a/Lib/netrc.py b/Lib/netrc.py index bd003e80a48081..2f502c1d53364f 100644 --- a/Lib/netrc.py +++ b/Lib/netrc.py @@ -162,8 +162,8 @@ def _security_check(self, fp, default_netrc, login): fowner = _getpwuid(prop.st_uid) user = _getpwuid(current_user_id) raise NetrcParseError( - (f"~/.netrc file owner ({fowner}, {user}) does not match" - " current user")) + f"~/.netrc file owner ({fowner}) does not match" + f" current user ({user})") if (prop.st_mode & (stat.S_IRWXG | stat.S_IRWXO)): raise NetrcParseError( "~/.netrc access too permissive: access" diff --git a/Misc/NEWS.d/next/Library/2025-06-22-22-03-06.gh-issue-135823.iDBg97.rst b/Misc/NEWS.d/next/Library/2025-06-22-22-03-06.gh-issue-135823.iDBg97.rst new file mode 100644 index 00000000000000..5b9d89caae7a55 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-22-22-03-06.gh-issue-135823.iDBg97.rst @@ -0,0 +1,3 @@ +:mod:`netrc`: improve the error message when the security check for the +ownership of the default configuration file ``~/.netrc`` fails. Patch by +Bénédikt Tran. From bf25413f88e986eaf4b434231ed1ab511b236e8d Mon Sep 17 00:00:00 2001 From: sobolevn Date: Mon, 23 Jun 2025 15:29:30 +0300 Subject: [PATCH 017/212] Bump mypy to 1.16.1 (#135720) --- Lib/_pyrepl/base_eventqueue.py | 6 +++--- Tools/cases_generator/tier2_generator.py | 2 +- Tools/requirements-dev.txt | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Lib/_pyrepl/base_eventqueue.py b/Lib/_pyrepl/base_eventqueue.py index 842599bd1877fb..0589a0f437ec7c 100644 --- a/Lib/_pyrepl/base_eventqueue.py +++ b/Lib/_pyrepl/base_eventqueue.py @@ -87,7 +87,7 @@ def push(self, char: int | bytes) -> None: if isinstance(k, dict): self.keymap = k else: - self.insert(Event('key', k, self.flush_buf())) + self.insert(Event('key', k, bytes(self.flush_buf()))) self.keymap = self.compiled_keymap elif self.buf and self.buf[0] == 27: # escape @@ -96,7 +96,7 @@ def push(self, char: int | bytes) -> None: # the docstring in keymap.py trace('unrecognized escape sequence, propagating...') self.keymap = self.compiled_keymap - self.insert(Event('key', '\033', bytearray(b'\033'))) + self.insert(Event('key', '\033', b'\033')) for _c in self.flush_buf()[1:]: self.push(_c) @@ -106,5 +106,5 @@ def push(self, char: int | bytes) -> None: except UnicodeError: return else: - self.insert(Event('key', decoded, self.flush_buf())) + self.insert(Event('key', decoded, bytes(self.flush_buf()))) self.keymap = self.compiled_keymap diff --git a/Tools/cases_generator/tier2_generator.py b/Tools/cases_generator/tier2_generator.py index 276f306dfffa18..fc3bc47286f7f6 100644 --- a/Tools/cases_generator/tier2_generator.py +++ b/Tools/cases_generator/tier2_generator.py @@ -91,7 +91,7 @@ def deopt_if( self.emit("}\n") return not always_true(first_tkn) - def exit_if( # type: ignore[override] + def exit_if( self, tkn: Token, tkn_iter: TokenIterator, diff --git a/Tools/requirements-dev.txt b/Tools/requirements-dev.txt index 5bf180bb30a310..0beaab2d3e7157 100644 --- a/Tools/requirements-dev.txt +++ b/Tools/requirements-dev.txt @@ -1,7 +1,7 @@ # Requirements file for external linters and checks we run on # Tools/clinic, Tools/cases_generator/, and Tools/peg_generator/ in CI -mypy==1.15 +mypy==1.16.1 # needed for peg_generator: -types-psutil==6.0.0.20240901 -types-setuptools==74.0.0.20240831 +types-psutil==7.0.0.20250601 +types-setuptools==80.9.0.20250529 From 7b36de15d45d7bbc28a1eaf71797db77343cd2eb Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Mon, 23 Jun 2025 21:38:57 +0530 Subject: [PATCH 018/212] GH-124878: reenable `test_finalize_daemon_thread_hang` test under TSAN (#135793) --- Lib/test/test_threading.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py index 125c27446986c0..13b55d0f0a2e73 100644 --- a/Lib/test/test_threading.py +++ b/Lib/test/test_threading.py @@ -1284,12 +1284,6 @@ def f(): @cpython_only def test_finalize_daemon_thread_hang(self): - if support.check_sanitizer(thread=True, memory=True): - # the thread running `time.sleep(100)` below will still be alive - # at process exit - self.skipTest( - "https://github.com/python/cpython/issues/124878 - Known" - " race condition that TSAN identifies.") # gh-87135: tests that daemon threads hang during finalization script = textwrap.dedent(''' import os From a126c1a0018d227e8f3fecac0b10651cb12f5244 Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Tue, 24 Jun 2025 00:57:14 +0800 Subject: [PATCH 019/212] gh-134584: Specialize POP_TOP by reference and type in JIT (GH-135761) --- Include/internal/pycore_uop_ids.h | 90 ++++++++++--------- Include/internal/pycore_uop_metadata.h | 16 ++++ Lib/test/test_capi/test_opt.py | 40 +++++++++ ...-06-20-14-50-44.gh-issue-134584.3CJdAI.rst | 1 + Python/bytecodes.c | 21 +++++ Python/executor_cases.c.h | 40 +++++++++ Python/optimizer_analysis.c | 2 +- Python/optimizer_bytecodes.c | 28 +++++- Python/optimizer_cases.c.h | 47 +++++++++- Python/optimizer_symbols.c | 3 - Tools/cases_generator/analyzer.py | 1 + .../opcode_metadata_generator.py | 12 +-- 12 files changed, 239 insertions(+), 62 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-20-14-50-44.gh-issue-134584.3CJdAI.rst diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index aa11ddb75e19fb..a9432401525ebb 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -284,72 +284,76 @@ extern "C" { #define _POP_JUMP_IF_FALSE 500 #define _POP_JUMP_IF_TRUE 501 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE 502 -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 503 -#define _POP_TWO 504 -#define _POP_TWO_LOAD_CONST_INLINE_BORROW 505 +#define _POP_TOP_FLOAT 502 +#define _POP_TOP_INT 503 +#define _POP_TOP_LOAD_CONST_INLINE 504 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 505 +#define _POP_TOP_NOP 506 +#define _POP_TOP_UNICODE 507 +#define _POP_TWO 508 +#define _POP_TWO_LOAD_CONST_INLINE_BORROW 509 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 506 +#define _PUSH_FRAME 510 #define _PUSH_NULL PUSH_NULL -#define _PUSH_NULL_CONDITIONAL 507 -#define _PY_FRAME_GENERAL 508 -#define _PY_FRAME_KW 509 -#define _QUICKEN_RESUME 510 -#define _REPLACE_WITH_TRUE 511 +#define _PUSH_NULL_CONDITIONAL 511 +#define _PY_FRAME_GENERAL 512 +#define _PY_FRAME_KW 513 +#define _QUICKEN_RESUME 514 +#define _REPLACE_WITH_TRUE 515 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 512 -#define _SEND 513 -#define _SEND_GEN_FRAME 514 +#define _SAVE_RETURN_OFFSET 516 +#define _SEND 517 +#define _SEND_GEN_FRAME 518 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 515 -#define _STORE_ATTR 516 -#define _STORE_ATTR_INSTANCE_VALUE 517 -#define _STORE_ATTR_SLOT 518 -#define _STORE_ATTR_WITH_HINT 519 +#define _START_EXECUTOR 519 +#define _STORE_ATTR 520 +#define _STORE_ATTR_INSTANCE_VALUE 521 +#define _STORE_ATTR_SLOT 522 +#define _STORE_ATTR_WITH_HINT 523 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 520 -#define _STORE_FAST_0 521 -#define _STORE_FAST_1 522 -#define _STORE_FAST_2 523 -#define _STORE_FAST_3 524 -#define _STORE_FAST_4 525 -#define _STORE_FAST_5 526 -#define _STORE_FAST_6 527 -#define _STORE_FAST_7 528 +#define _STORE_FAST 524 +#define _STORE_FAST_0 525 +#define _STORE_FAST_1 526 +#define _STORE_FAST_2 527 +#define _STORE_FAST_3 528 +#define _STORE_FAST_4 529 +#define _STORE_FAST_5 530 +#define _STORE_FAST_6 531 +#define _STORE_FAST_7 532 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 529 -#define _STORE_SUBSCR 530 -#define _STORE_SUBSCR_DICT 531 -#define _STORE_SUBSCR_LIST_INT 532 -#define _SWAP 533 -#define _SWAP_2 534 -#define _SWAP_3 535 -#define _TIER2_RESUME_CHECK 536 -#define _TO_BOOL 537 +#define _STORE_SLICE 533 +#define _STORE_SUBSCR 534 +#define _STORE_SUBSCR_DICT 535 +#define _STORE_SUBSCR_LIST_INT 536 +#define _SWAP 537 +#define _SWAP_2 538 +#define _SWAP_3 539 +#define _TIER2_RESUME_CHECK 540 +#define _TO_BOOL 541 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT -#define _TO_BOOL_LIST 538 +#define _TO_BOOL_LIST 542 #define _TO_BOOL_NONE TO_BOOL_NONE -#define _TO_BOOL_STR 539 +#define _TO_BOOL_STR 543 #define _UNARY_INVERT UNARY_INVERT #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 540 -#define _UNPACK_SEQUENCE_LIST 541 -#define _UNPACK_SEQUENCE_TUPLE 542 -#define _UNPACK_SEQUENCE_TWO_TUPLE 543 +#define _UNPACK_SEQUENCE 544 +#define _UNPACK_SEQUENCE_LIST 545 +#define _UNPACK_SEQUENCE_TUPLE 546 +#define _UNPACK_SEQUENCE_TWO_TUPLE 547 #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 543 +#define MAX_UOP_ID 547 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 11345a00785817..52cbc2fffe484e 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -64,6 +64,10 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_STORE_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG, [_STORE_FAST_STORE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG, [_POP_TOP] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_POP_TOP_NOP] = 0, + [_POP_TOP_INT] = 0, + [_POP_TOP_FLOAT] = 0, + [_POP_TOP_UNICODE] = 0, [_POP_TWO] = HAS_ESCAPES_FLAG, [_PUSH_NULL] = HAS_PURE_FLAG, [_END_FOR] = HAS_ESCAPES_FLAG | HAS_NO_SAVE_IP_FLAG, @@ -593,8 +597,12 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_POP_EXCEPT] = "_POP_EXCEPT", [_POP_ITER] = "_POP_ITER", [_POP_TOP] = "_POP_TOP", + [_POP_TOP_FLOAT] = "_POP_TOP_FLOAT", + [_POP_TOP_INT] = "_POP_TOP_INT", [_POP_TOP_LOAD_CONST_INLINE] = "_POP_TOP_LOAD_CONST_INLINE", [_POP_TOP_LOAD_CONST_INLINE_BORROW] = "_POP_TOP_LOAD_CONST_INLINE_BORROW", + [_POP_TOP_NOP] = "_POP_TOP_NOP", + [_POP_TOP_UNICODE] = "_POP_TOP_UNICODE", [_POP_TWO] = "_POP_TWO", [_POP_TWO_LOAD_CONST_INLINE_BORROW] = "_POP_TWO_LOAD_CONST_INLINE_BORROW", [_PUSH_EXC_INFO] = "_PUSH_EXC_INFO", @@ -749,6 +757,14 @@ int _PyUop_num_popped(int opcode, int oparg) return 2; case _POP_TOP: return 1; + case _POP_TOP_NOP: + return 1; + case _POP_TOP_INT: + return 1; + case _POP_TOP_FLOAT: + return 1; + case _POP_TOP_UNICODE: + return 1; case _POP_TWO: return 2; case _PUSH_NULL: diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 84e864b44b9544..0a85b19e06f158 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2392,6 +2392,46 @@ def testfunc(n): assert ex is not None """)) + def test_pop_top_specialize_none(self): + def testfunc(n): + for _ in range(n): + global_identity(None) + + testfunc(TIER2_THRESHOLD) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + self.assertIn("_POP_TOP_NOP", uops) + + def test_pop_top_specialize_int(self): + def testfunc(n): + for _ in range(n): + global_identity(100000) + + testfunc(TIER2_THRESHOLD) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + self.assertIn("_POP_TOP_INT", uops) + + def test_pop_top_specialize_float(self): + def testfunc(n): + for _ in range(n): + global_identity(1e6) + + testfunc(TIER2_THRESHOLD) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + self.assertIn("_POP_TOP_FLOAT", uops) + + def global_identity(x): return x diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-20-14-50-44.gh-issue-134584.3CJdAI.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-20-14-50-44.gh-issue-134584.3CJdAI.rst new file mode 100644 index 00000000000000..715ac7dc9253d5 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-20-14-50-44.gh-issue-134584.3CJdAI.rst @@ -0,0 +1 @@ +Specialize :opcode:`POP_TOP` in the JIT compiler by specializing for reference lifetime and type. This will also enable easier top of stack caching in the JIT compiler. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 307844d38ccfcc..535e552e047475 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -344,6 +344,27 @@ dummy_func( PyStackRef_XCLOSE(value); } + op(_POP_TOP_NOP, (value --)) { + assert(PyStackRef_IsNull(value) || (!PyStackRef_RefcountOnObject(value)) || + _Py_IsImmortal((PyStackRef_AsPyObjectBorrow(value)))); + DEAD(value); + } + + op(_POP_TOP_INT, (value --)) { + assert(PyLong_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyLong_ExactDealloc); + } + + op(_POP_TOP_FLOAT, (value --)) { + assert(PyFloat_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyFloat_ExactDealloc); + } + + op(_POP_TOP_UNICODE, (value --)) { + assert(PyUnicode_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyUnicode_ExactDealloc); + } + tier2 op(_POP_TWO, (nos, tos --)) { PyStackRef_CLOSE(tos); PyStackRef_CLOSE(nos); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 8f506172550afe..46fc164a5b3bc2 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -539,6 +539,46 @@ break; } + case _POP_TOP_NOP: { + _PyStackRef value; + value = stack_pointer[-1]; + assert(PyStackRef_IsNull(value) || (!PyStackRef_RefcountOnObject(value)) || + _Py_IsImmortal((PyStackRef_AsPyObjectBorrow(value)))); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_INT: { + _PyStackRef value; + value = stack_pointer[-1]; + assert(PyLong_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyLong_ExactDealloc); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_FLOAT: { + _PyStackRef value; + value = stack_pointer[-1]; + assert(PyFloat_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyFloat_ExactDealloc); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_UNICODE: { + _PyStackRef value; + value = stack_pointer[-1]; + assert(PyUnicode_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyUnicode_ExactDealloc); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _POP_TWO: { _PyStackRef tos; _PyStackRef nos; diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 8b1a63e3d2916f..145a8c118d3612 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -345,7 +345,7 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, #define sym_new_tuple _Py_uop_sym_new_tuple #define sym_tuple_getitem _Py_uop_sym_tuple_getitem #define sym_tuple_length _Py_uop_sym_tuple_length -#define sym_is_immortal _Py_uop_sym_is_immortal +#define sym_is_immortal _Py_uop_symbol_is_immortal #define sym_is_compact_int _Py_uop_sym_is_compact_int #define sym_new_compact_int _Py_uop_sym_new_compact_int #define sym_new_truthiness _Py_uop_sym_new_truthiness diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 3f2e2e0351e052..f8fbaf232ffa2e 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -34,7 +34,7 @@ typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame; #define sym_new_tuple _Py_uop_sym_new_tuple #define sym_tuple_getitem _Py_uop_sym_tuple_getitem #define sym_tuple_length _Py_uop_sym_tuple_length -#define sym_is_immortal _Py_uop_sym_is_immortal +#define sym_is_immortal _Py_uop_symbol_is_immortal #define sym_new_compact_int _Py_uop_sym_new_compact_int #define sym_is_compact_int _Py_uop_sym_is_compact_int #define sym_new_truthiness _Py_uop_sym_new_truthiness @@ -534,7 +534,7 @@ dummy_func(void) { } op(_LOAD_CONST_INLINE, (ptr/4 -- value)) { - value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); + value = sym_new_const(ctx, ptr); } op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) { @@ -542,7 +542,7 @@ dummy_func(void) { } op(_POP_TOP_LOAD_CONST_INLINE, (ptr/4, pop -- value)) { - value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); + value = sym_new_const(ctx, ptr); } op(_POP_TOP_LOAD_CONST_INLINE_BORROW, (ptr/4, pop -- value)) { @@ -561,6 +561,24 @@ dummy_func(void) { value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); } + op(_POP_TOP, (value -- )) { + PyTypeObject *typ = sym_get_type(value); + if (PyJitRef_IsBorrowed(value) || + sym_is_immortal(PyJitRef_Unwrap(value)) || + sym_is_null(value)) { + REPLACE_OP(this_instr, _POP_TOP_NOP, 0, 0); + } + else if (typ == &PyLong_Type) { + REPLACE_OP(this_instr, _POP_TOP_INT, 0, 0); + } + else if (typ == &PyFloat_Type) { + REPLACE_OP(this_instr, _POP_TOP_FLOAT, 0, 0); + } + else if (typ == &PyUnicode_Type) { + REPLACE_OP(this_instr, _POP_TOP_UNICODE, 0, 0); + } + } + op(_COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) { assert(oparg > 0); top = bottom; @@ -803,7 +821,9 @@ dummy_func(void) { } op(_RETURN_VALUE, (retval -- res)) { - JitOptRef temp = retval; + // We wrap and unwrap the value to mimic PyStackRef_MakeHeapSafe + // in bytecodes.c + JitOptRef temp = PyJitRef_Wrap(PyJitRef_Unwrap(retval)); DEAD(retval); SAVE_STACK(); ctx->frame->stack_pointer = stack_pointer; diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 91927180b3509d..1e581afadc9569 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -100,6 +100,47 @@ } case _POP_TOP: { + JitOptRef value; + value = stack_pointer[-1]; + PyTypeObject *typ = sym_get_type(value); + if (PyJitRef_IsBorrowed(value) || + sym_is_immortal(PyJitRef_Unwrap(value)) || + sym_is_null(value)) { + REPLACE_OP(this_instr, _POP_TOP_NOP, 0, 0); + } + else if (typ == &PyLong_Type) { + REPLACE_OP(this_instr, _POP_TOP_INT, 0, 0); + } + else if (typ == &PyFloat_Type) { + REPLACE_OP(this_instr, _POP_TOP_FLOAT, 0, 0); + } + else if (typ == &PyUnicode_Type) { + REPLACE_OP(this_instr, _POP_TOP_UNICODE, 0, 0); + } + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_NOP: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_INT: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_FLOAT: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_UNICODE: { stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); break; @@ -784,7 +825,7 @@ JitOptRef retval; JitOptRef res; retval = stack_pointer[-1]; - JitOptRef temp = retval; + JitOptRef temp = PyJitRef_Wrap(PyJitRef_Unwrap(retval)); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); ctx->frame->stack_pointer = stack_pointer; @@ -2660,7 +2701,7 @@ case _LOAD_CONST_INLINE: { JitOptRef value; PyObject *ptr = (PyObject *)this_instr->operand0; - value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); + value = sym_new_const(ctx, ptr); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -2670,7 +2711,7 @@ case _POP_TOP_LOAD_CONST_INLINE: { JitOptRef value; PyObject *ptr = (PyObject *)this_instr->operand0; - value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); + value = sym_new_const(ctx, ptr); stack_pointer[-1] = value; break; } diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index 64cc1b9074fcf0..c3d9e0e778bf55 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -668,9 +668,6 @@ _Py_uop_symbol_is_immortal(JitOptSymbol *sym) if (sym->tag == JIT_SYM_KNOWN_CLASS_TAG) { return sym->cls.type == &PyBool_Type; } - if (sym->tag == JIT_SYM_TRUTHINESS_TAG) { - return true; - } return false; } diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 6ff0223d2ef3e7..6466d2615cd14e 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -596,6 +596,7 @@ def has_error_without_pop(op: parser.CodeDef) -> bool: "PyStackRef_IsNull", "PyStackRef_MakeHeapSafe", "PyStackRef_None", + "PyStackRef_RefcountOnObject", "PyStackRef_TYPE", "PyStackRef_True", "PyTuple_GET_ITEM", diff --git a/Tools/cases_generator/opcode_metadata_generator.py b/Tools/cases_generator/opcode_metadata_generator.py index 10567204dcc599..0bcdc5395dcd8e 100644 --- a/Tools/cases_generator/opcode_metadata_generator.py +++ b/Tools/cases_generator/opcode_metadata_generator.py @@ -242,14 +242,10 @@ def generate_expansion_table(analysis: Analysis, out: CWriter) -> None: assert name2 in analysis.instructions, f"{name2} doesn't match any instr" instr1 = analysis.instructions[name1] instr2 = analysis.instructions[name2] - assert ( - len(instr1.parts) == 1 - ), f"{name1} is not a good superinstruction part" - assert ( - len(instr2.parts) == 1 - ), f"{name2} is not a good superinstruction part" - expansions.append((instr1.parts[0].name, "OPARG_TOP", 0)) - expansions.append((instr2.parts[0].name, "OPARG_BOTTOM", 0)) + for part in instr1.parts: + expansions.append((part.name, "OPARG_TOP", 0)) + for part in instr2.parts: + expansions.append((part.name, "OPARG_BOTTOM", 0)) elif not is_viable_expansion(inst): continue else: From aed1ed0f281366ea8bad224a7e4534dfbd312b0e Mon Sep 17 00:00:00 2001 From: Noam Cohen Date: Mon, 23 Jun 2025 22:42:09 +0300 Subject: [PATCH 020/212] gh-131798: Optimize `_UNARY_NEGATIVE` (GH-135223) --- Lib/test/test_capi/test_opt.py | 18 ++++++++++++++++++ ...5-06-06-19-17-22.gh-issue-131798.XoV8Eb.rst | 1 + Python/optimizer_bytecodes.c | 8 +++++++- Python/optimizer_cases.c.h | 8 +++++++- 4 files changed, 33 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-06-19-17-22.gh-issue-131798.XoV8Eb.rst diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 0a85b19e06f158..e4c9a463855a69 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2432,6 +2432,24 @@ def testfunc(n): self.assertIn("_POP_TOP_FLOAT", uops) + def test_unary_negative_long_float_type(self): + def testfunc(n): + for _ in range(n): + a = 9397 + f = 9397.0 + x = -a + -a + y = -f + -f + + testfunc(TIER2_THRESHOLD) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + self.assertNotIn("_GUARD_TOS_INT", uops) + self.assertNotIn("_GUARD_NOS_INT", uops) + self.assertNotIn("_GUARD_TOS_FLOAT", uops) + self.assertNotIn("_GUARD_NOS_FLOAT", uops) def global_identity(x): return x diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-06-19-17-22.gh-issue-131798.XoV8Eb.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-06-19-17-22.gh-issue-131798.XoV8Eb.rst new file mode 100644 index 00000000000000..6a9d9c683f9a8c --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-06-19-17-22.gh-issue-131798.XoV8Eb.rst @@ -0,0 +1 @@ +Optimize ``_UNARY_NEGATIVE`` in JIT-compiled code. diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index f8fbaf232ffa2e..f8a0484bdc2b04 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -452,7 +452,13 @@ dummy_func(void) { res = sym_new_compact_int(ctx); } else { - res = sym_new_not_null(ctx); + PyTypeObject *type = sym_get_type(value); + if (type == &PyLong_Type || type == &PyFloat_Type) { + res = sym_new_type(ctx, type); + } + else { + res = sym_new_not_null(ctx); + } } } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 1e581afadc9569..10767ccdbd57f5 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -190,7 +190,13 @@ res = sym_new_compact_int(ctx); } else { - res = sym_new_not_null(ctx); + PyTypeObject *type = sym_get_type(value); + if (type == &PyLong_Type || type == &PyFloat_Type) { + res = sym_new_type(ctx, type); + } + else { + res = sym_new_not_null(ctx); + } } stack_pointer[-1] = res; break; From 88222ebff213cb7c02c3b309f162992fffdd9e04 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Mon, 23 Jun 2025 15:09:40 -0700 Subject: [PATCH 021/212] gh-119786: Add InternalDocs/qsbr.md. (gh-135411) Add internal doc for the Quiescent-State Based Reclamation (QSBR) implementation. --- InternalDocs/README.md | 3 +- InternalDocs/qsbr.md | 129 +++++++++++++++++++++++++++++++++++++++++ Python/qsbr.c | 2 +- 3 files changed, 132 insertions(+), 2 deletions(-) create mode 100644 InternalDocs/qsbr.md diff --git a/InternalDocs/README.md b/InternalDocs/README.md index c20aa015c5bb74..6b1d919826402c 100644 --- a/InternalDocs/README.md +++ b/InternalDocs/README.md @@ -42,8 +42,9 @@ Program Execution - [Exception Handling](exception_handling.md) +- [Quiescent-State Based Reclamation (QSBR)](qsbr.md) Modules --- -- [asyncio](asyncio.md) \ No newline at end of file +- [asyncio](asyncio.md) diff --git a/InternalDocs/qsbr.md b/InternalDocs/qsbr.md new file mode 100644 index 00000000000000..1c4a79a7b44436 --- /dev/null +++ b/InternalDocs/qsbr.md @@ -0,0 +1,129 @@ +# Quiescent-State Based Reclamation (QSBR) + +## Introduction + +When implementing lock-free data structures, a key challenge is determining +when it is safe to free memory that has been logically removed from a +structure. Freeing memory too early can lead to use-after-free bugs if another +thread is still accessing it. Freeing it too late results in excessive memory +consumption. + +Safe memory reclamation (SMR) schemes address this by delaying the free +operation until all concurrent read accesses are guaranteed to have completed. +Quiescent-State Based Reclamation (QSBR) is a SMR scheme used in Python's +free-threaded build to manage the lifecycle of shared memory. + +QSBR requires threads to periodically report that they are in a quiescent +state. A thread is in a quiescent state if it holds no references to shared +objects that might be reclaimed. Think of it as a checkpoint where a thread +signals, "I am not in the middle of any operation that relies on a shared +resource." In Python, the eval_breaker provides a natural and convenient place +for threads to report this state. + + +## Use in Free-Threaded Python + +While CPython's memory management is dominated by reference counting and a +tracing garbage collector, these mechanisms are not suitable for all data +structures. For example, the backing array of a list object is not individually +reference-counted but may have a shorter lifetime than the `PyListObject` that +contains it. We could delay reclamation until the next GC run, but we want +reclamation to be prompt and to run the GC less frequently in the free-threaded +build, as it requires pausing all threads. + +Many operations in the free-threaded build are protected by locks. However, for +performance-critical code, we want to allow reads to happen concurrently with +updates. For instance, we want to avoid locking during most list read accesses. +If a list is resized while another thread is reading it, QSBR provides the +mechanism to determine when it is safe to free the list's old backing array. + +Specific use cases for QSBR include: + +* Dictionary keys (`PyDictKeysObject`) and list arrays (`_PyListArray`): When a +dictionary or list that may be shared between threads is resized, we use QSBR +to delay freeing the old keys or array until it's safe. For dicts and lists +that are not shared, their storage can be freed immediately upon resize. + +* Mimalloc `mi_page_t`: Non-locking dictionary and list accesses require +cooperation from the memory allocator. If an object is freed and its memory is +reused, we must ensure the new object's reference count field is at the same +memory location. In practice, this means when a mimalloc page (`mi_page_t`) +becomes empty, we don't immediately allow it to be reused for allocations of a +different size class. QSBR is used to determine when it's safe to repurpose the +page or return its memory to the OS. + + +## Implementation Details + + +### Core Implementation + +The proposal to add QSBR to Python is contained in +[Github issue 115103](https://github.com/python/cpython/issues/115103). +Many details of that proposal have been copied here, so they can be kept +up-to-date with the actual implementation. + +Python's QSBR implementation is based on FreeBSD's "Global Unbounded +Sequences." [^1][^2][^3]. It relies on a few key counters: + +* Global Write Sequence (`wr_seq`): A per-interpreter counter, `wr_seq`, is started +at 1 and incremented by 2 each time it is advanced. This ensures its value is +always odd, which can be used to distinguish it from other state values. When +an object needs to be reclaimed, `wr_seq` is advanced, and the object is tagged +with this new sequence number. + +* Per-Thread Read Sequence: Each thread has a local read sequence counter. When +a thread reaches a quiescent state (e.g., at the eval_breaker), it copies the +current global `wr_seq` to its local counter. + +* Global Read Sequence (`rd_seq`): This per-interpreter value stores the minimum +of all per-thread read sequence counters (excluding detached threads). It is +updated by a "polling" operation. + +To free an object, the following steps are taken: + +1. Advance the global `wr_seq`. + +2. Add the object's pointer to a deferred-free list, tagging it with the new + `wr_seq` value as its qsbr_goal. + +Periodically, a polling mechanism processes this deferred-free list: + +1. The minimum read sequence value across all active threads is calculated and + stored as the global `rd_seq`. + +2. For each item on the deferred-free list, if its qsbr_goal is less than or + equal to the new `rd_seq`, its memory is freed, and it is removed from the: + list. Otherwise, it remains on the list for a future attempt. + + +### Deferred Advance Optimization + +To reduce memory contention from frequent updates to the global `wr_seq`, its +advancement is sometimes deferred. Instead of incrementing `wr_seq` on every +reclamation request, each thread tracks its number of deferrals locally. Once +the deferral count reaches a limit (QSBR_DEFERRED_LIMIT, currently 10), the +thread advances the global `wr_seq` and resets its local count. + +When an object is added to the deferred-free list, its qsbr_goal is set to +`wr_seq` + 2. By setting the goal to the next sequence value, we ensure it's safe +to defer the global counter advancement. This optimization improves runtime +speed but may increase peak memory usage by slightly delaying when memory can +be reclaimed. + + +## Limitations + +Determining the `rd_seq` requires scanning over all thread states. This operation +could become a bottleneck in applications with a very large number of threads +(e.g., >1,000). Future work may address this with more advanced mechanisms, +such as a tree-based structure or incremental scanning. For now, the +implementation prioritizes simplicity, with plans for refinement if +multi-threaded benchmarks reveal performance issues. + + +## References + +[^1]: https://youtu.be/ZXUIFj4nRjk?t=694 +[^2]: https://people.kernel.org/joelfernandes/gus-vs-rcu +[^3]: http://bxr.su/FreeBSD/sys/kern/subr_smr.c#44 diff --git a/Python/qsbr.c b/Python/qsbr.c index bf34fb2523dfc8..afa03776c26b6f 100644 --- a/Python/qsbr.c +++ b/Python/qsbr.c @@ -1,6 +1,6 @@ /* * Implementation of safe memory reclamation scheme using - * quiescent states. + * quiescent states. See InternalDocs/qsbr.md. * * This is derived from the "GUS" safe memory reclamation technique * in FreeBSD written by Jeffrey Roberson. It is heavily modified. Any bugs From c87b55b054035f8f81e1ffd0bc4a6d34e0e758ee Mon Sep 17 00:00:00 2001 From: Yongzi Li <204532581+Yzi-Li@users.noreply.github.com> Date: Tue, 24 Jun 2025 06:53:33 +0800 Subject: [PATCH 022/212] Docs: Use `arguments` to replace `args` in `argparse.rst` (GH-135510) --- Doc/library/argparse.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst index 17f126cc065a82..a03d88092dbf1f 100644 --- a/Doc/library/argparse.rst +++ b/Doc/library/argparse.rst @@ -955,7 +955,7 @@ See also :ref:`specifying-ambiguous-arguments`. The supported values are: .. index:: single: + (plus); in argparse module -* ``'+'``. Just like ``'*'``, all command-line args present are gathered into a +* ``'+'``. Just like ``'*'``, all command-line arguments present are gathered into a list. Additionally, an error message will be generated if there wasn't at least one command-line argument present. For example:: From 62d59e37dd930b65cf0688d917ba3e7eb4a6e9c3 Mon Sep 17 00:00:00 2001 From: Vincent Poulailleau Date: Tue, 24 Jun 2025 01:36:30 +0200 Subject: [PATCH 023/212] Fix example according to PEP 750 in "What's new in 3.14" (GH-134727) A redundant extra part was written. Added a closing tag, to match the usage in PEP 750. --- Doc/whatsnew/3.14.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index f0a87a9ada7bab..8b20e42d7d8e07 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -278,7 +278,7 @@ As another example, generating HTML attributes from data: attributes = {"src": "shrubbery.jpg", "alt": "looks nice"} template = t"" - assert html(template) == 'looks nice' + assert html(template) == 'looks nice' Compared to using an f-string, the ``html`` function has access to template attributes containing the original information: static strings, interpolations, and values From f4bf6ab0d8af4cfab2333aa96504f7d8b1ce93dc Mon Sep 17 00:00:00 2001 From: Russell Keith-Magee Date: Tue, 24 Jun 2025 08:55:50 +0800 Subject: [PATCH 024/212] gh-135648: Document that `shutil.copyfileobj` doesn't flush (#135737) Adds a note about flush/close on copyfileobj, and updates the Emscripten build script to follow documented advice. --- Doc/library/shutil.rst | 7 +++++++ Tools/wasm/emscripten/__main__.py | 3 ++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/Doc/library/shutil.rst b/Doc/library/shutil.rst index e7c4c4f46bd011..2dde40c9d92f45 100644 --- a/Doc/library/shutil.rst +++ b/Doc/library/shutil.rst @@ -47,6 +47,13 @@ Directory and files operations 0, only the contents from the current file position to the end of the file will be copied. + :func:`copyfileobj` will *not* guarantee that the destination stream has + been flushed on completion of the copy. If you want to read from the + destination at the completion of the copy operation (for example, reading + the contents of a temporary file that has been copied from a HTTP stream), + you must ensure that you have called :func:`~io.IOBase.flush` or + :func:`~io.IOBase.close` on the file-like object before attempting to read + the destination file. .. function:: copyfile(src, dst, *, follow_symlinks=True) diff --git a/Tools/wasm/emscripten/__main__.py b/Tools/wasm/emscripten/__main__.py index 849bd5de44eb7b..c0d58aeaadd2cf 100644 --- a/Tools/wasm/emscripten/__main__.py +++ b/Tools/wasm/emscripten/__main__.py @@ -167,11 +167,12 @@ def make_build_python(context, working_dir): @subdir(HOST_BUILD_DIR, clean_ok=True) def make_emscripten_libffi(context, working_dir): shutil.rmtree(working_dir / "libffi-3.4.6", ignore_errors=True) - with tempfile.NamedTemporaryFile(suffix=".tar.gz") as tmp_file: + with tempfile.NamedTemporaryFile(suffix=".tar.gz", delete_on_close=False) as tmp_file: with urlopen( "https://github.com/libffi/libffi/releases/download/v3.4.6/libffi-3.4.6.tar.gz" ) as response: shutil.copyfileobj(response, tmp_file) + tmp_file.close() shutil.unpack_archive(tmp_file.name, working_dir) call( [EMSCRIPTEN_DIR / "make_libffi.sh"], From e856bb9c2c83022f8205bc826d717049eeed41b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 24 Jun 2025 09:58:07 +0200 Subject: [PATCH 025/212] gh-135532: use `defining_class` for copying BLAKE-2 and SHA-3 objects (#135838) --- Lib/test/test_hashlib.py | 10 ++++++++++ Modules/blake2module.c | 8 +++++--- Modules/clinic/blake2module.c.h | 14 +++++++++----- Modules/clinic/sha3module.c.h | 14 +++++++++----- Modules/sha3module.c | 9 +++++---- 5 files changed, 38 insertions(+), 17 deletions(-) diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index 6e4e41e4fbb4f1..7b378c45e71563 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -371,6 +371,16 @@ def test_get_builtin_constructor(self): self.assertIs(constructor, _md5.md5) self.assertEqual(sorted(builtin_constructor_cache), ['MD5', 'md5']) + def test_copy(self): + for cons in self.hash_constructors: + h1 = cons(os.urandom(16), usedforsecurity=False) + h2 = h1.copy() + self.assertIs(type(h1), type(h2)) + self.assertEqual(h1.name, h2.name) + size = (16,) if h1.name in self.shakes else () + self.assertEqual(h1.digest(*size), h2.digest(*size)) + self.assertEqual(h1.hexdigest(*size), h2.hexdigest(*size)) + def test_hexdigest(self): for cons in self.hash_constructors: h = cons(usedforsecurity=False) diff --git a/Modules/blake2module.c b/Modules/blake2module.c index 6c4349ac06bb8a..9e279e11b518d2 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -787,17 +787,19 @@ blake2_blake2b_copy_unlocked(Blake2Object *self, Blake2Object *cpy) /*[clinic input] _blake2.blake2b.copy + cls: defining_class + Return a copy of the hash object. [clinic start generated code]*/ static PyObject * -_blake2_blake2b_copy_impl(Blake2Object *self) -/*[clinic end generated code: output=622d1c56b91c50d8 input=e383c2d199fd8a2e]*/ +_blake2_blake2b_copy_impl(Blake2Object *self, PyTypeObject *cls) +/*[clinic end generated code: output=5f8ea31c56c52287 input=f38f3475e9aec98d]*/ { int rc; Blake2Object *cpy; - if ((cpy = new_Blake2Object(Py_TYPE(self))) == NULL) { + if ((cpy = new_Blake2Object(cls)) == NULL) { return NULL; } diff --git a/Modules/clinic/blake2module.c.h b/Modules/clinic/blake2module.c.h index 9e9cd56e569b24..97d010d03a4b23 100644 --- a/Modules/clinic/blake2module.c.h +++ b/Modules/clinic/blake2module.c.h @@ -434,15 +434,19 @@ PyDoc_STRVAR(_blake2_blake2b_copy__doc__, "Return a copy of the hash object."); #define _BLAKE2_BLAKE2B_COPY_METHODDEF \ - {"copy", (PyCFunction)_blake2_blake2b_copy, METH_NOARGS, _blake2_blake2b_copy__doc__}, + {"copy", _PyCFunction_CAST(_blake2_blake2b_copy), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _blake2_blake2b_copy__doc__}, static PyObject * -_blake2_blake2b_copy_impl(Blake2Object *self); +_blake2_blake2b_copy_impl(Blake2Object *self, PyTypeObject *cls); static PyObject * -_blake2_blake2b_copy(PyObject *self, PyObject *Py_UNUSED(ignored)) +_blake2_blake2b_copy(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { - return _blake2_blake2b_copy_impl((Blake2Object *)self); + if (nargs || (kwnames && PyTuple_GET_SIZE(kwnames))) { + PyErr_SetString(PyExc_TypeError, "copy() takes no arguments"); + return NULL; + } + return _blake2_blake2b_copy_impl((Blake2Object *)self, cls); } PyDoc_STRVAR(_blake2_blake2b_update__doc__, @@ -502,4 +506,4 @@ _blake2_blake2b_hexdigest(PyObject *self, PyObject *Py_UNUSED(ignored)) { return _blake2_blake2b_hexdigest_impl((Blake2Object *)self); } -/*[clinic end generated code: output=eed18dcfaf6f7731 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=60a4abbcb8950fe5 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/sha3module.c.h b/Modules/clinic/sha3module.c.h index d6d44bf4c514c0..1f631ff406e25b 100644 --- a/Modules/clinic/sha3module.c.h +++ b/Modules/clinic/sha3module.c.h @@ -100,15 +100,19 @@ PyDoc_STRVAR(_sha3_sha3_224_copy__doc__, "Return a copy of the hash object."); #define _SHA3_SHA3_224_COPY_METHODDEF \ - {"copy", (PyCFunction)_sha3_sha3_224_copy, METH_NOARGS, _sha3_sha3_224_copy__doc__}, + {"copy", _PyCFunction_CAST(_sha3_sha3_224_copy), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sha3_sha3_224_copy__doc__}, static PyObject * -_sha3_sha3_224_copy_impl(SHA3object *self); +_sha3_sha3_224_copy_impl(SHA3object *self, PyTypeObject *cls); static PyObject * -_sha3_sha3_224_copy(PyObject *self, PyObject *Py_UNUSED(ignored)) +_sha3_sha3_224_copy(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { - return _sha3_sha3_224_copy_impl((SHA3object *)self); + if (nargs || (kwnames && PyTuple_GET_SIZE(kwnames))) { + PyErr_SetString(PyExc_TypeError, "copy() takes no arguments"); + return NULL; + } + return _sha3_sha3_224_copy_impl((SHA3object *)self, cls); } PyDoc_STRVAR(_sha3_sha3_224_digest__doc__, @@ -306,4 +310,4 @@ _sha3_shake_128_hexdigest(PyObject *self, PyObject *const *args, Py_ssize_t narg exit: return return_value; } -/*[clinic end generated code: output=c17e3ec670afe253 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=48be77f8a31e8a3e input=a9049054013a1b77]*/ diff --git a/Modules/sha3module.c b/Modules/sha3module.c index face90a6094beb..5764556bb680f3 100644 --- a/Modules/sha3module.c +++ b/Modules/sha3module.c @@ -239,16 +239,17 @@ SHA3_traverse(PyObject *self, visitproc visit, void *arg) /*[clinic input] _sha3.sha3_224.copy + cls: defining_class + Return a copy of the hash object. [clinic start generated code]*/ static PyObject * -_sha3_sha3_224_copy_impl(SHA3object *self) -/*[clinic end generated code: output=6c537411ecdcda4c input=93a44aaebea51ba8]*/ +_sha3_sha3_224_copy_impl(SHA3object *self, PyTypeObject *cls) +/*[clinic end generated code: output=13958b44c244013e input=7134b4dc0a2fbcac]*/ { SHA3object *newobj; - - if ((newobj = newSHA3object(Py_TYPE(self))) == NULL) { + if ((newobj = newSHA3object(cls)) == NULL) { return NULL; } HASHLIB_ACQUIRE_LOCK(self); From e081415c3e390ef3846805f8b25bf28a2b421904 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Tue, 24 Jun 2025 09:08:23 +0100 Subject: [PATCH 026/212] gh-123299: Update 'What's New in Python 3.14' from `3.14` branch (#135616) --- Doc/whatsnew/3.14.rst | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 8b20e42d7d8e07..cbca720b75e96c 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -1757,6 +1757,16 @@ os (Contributed by Cody Maloney in :gh:`129205`.) +os.path +------- + +* The *strict* parameter to :func:`os.path.realpath` accepts a new value, + :data:`os.path.ALLOW_MISSING`. + If used, errors other than :exc:`FileNotFoundError` will be re-raised; + the resulting path can be missing but it will be free of symlinks. + (Contributed by Petr Viktorin for :cve:`2025-4517`.) + + pathlib ------- @@ -1945,6 +1955,28 @@ sysconfig (Contributed by Xuehai Pan in :gh:`131799`.) +tarfile +------- + +* :func:`~tarfile.data_filter` now normalizes symbolic link targets in order to + avoid path traversal attacks. + (Contributed by Petr Viktorin in :gh:`127987` and :cve:`2025-4138`.) +* :func:`~tarfile.TarFile.extractall` now skips fixing up directory attributes + when a directory was removed or replaced by another kind of file. + (Contributed by Petr Viktorin in :gh:`127987` and :cve:`2024-12718`.) +* :func:`~tarfile.TarFile.extract` and :func:`~tarfile.TarFile.extractall` + now (re-)apply the extraction filter when substituting a link (hard or + symbolic) with a copy of another archive member, and when fixing up + directory attributes. + The former raises a new exception, :exc:`~tarfile.LinkFallbackError`. + (Contributed by Petr Viktorin for :cve:`2025-4330` and :cve:`2024-12718`.) +* :func:`~tarfile.TarFile.extract` and :func:`~tarfile.TarFile.extractall` + no longer extract rejected members when + :func:`~tarfile.TarFile.errorlevel` is zero. + (Contributed by Matt Prodani and Petr Viktorin in :gh:`112887` + and :cve:`2025-4435`.) + + threading --------- @@ -2700,6 +2732,7 @@ New features * :c:func:`PyUnicodeWriter_Discard` * :c:func:`PyUnicodeWriter_Finish` * :c:func:`PyUnicodeWriter_Format` + * :c:func:`PyUnicodeWriter_WriteASCII` * :c:func:`PyUnicodeWriter_WriteChar` * :c:func:`PyUnicodeWriter_WriteRepr` * :c:func:`PyUnicodeWriter_WriteStr` @@ -2976,7 +3009,7 @@ Deprecated :c:func:`PyUnicodeWriter_WriteSubstring(writer, str, start, end) `. * :c:func:`!_PyUnicodeWriter_WriteASCIIString`: replace ``_PyUnicodeWriter_WriteASCIIString(&writer, str)`` with - :c:func:`PyUnicodeWriter_WriteUTF8(writer, str) `. + :c:func:`PyUnicodeWriter_WriteASCII(writer, str) `. * :c:func:`!_PyUnicodeWriter_WriteLatin1String`: replace ``_PyUnicodeWriter_WriteLatin1String(&writer, str)`` with :c:func:`PyUnicodeWriter_WriteUTF8(writer, str) `. From 98311018b226b7bbb553ed6eb901b91dabaf09c5 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 24 Jun 2025 09:49:38 +0100 Subject: [PATCH 027/212] GH-135106: Restrict trashcan to GC'ed objects (GH-135682) --- ...-06-18-16-45-36.gh-issue-135106.cpl6Aq.rst | 2 + Objects/object.c | 76 ++++++------------- 2 files changed, 26 insertions(+), 52 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-18-16-45-36.gh-issue-135106.cpl6Aq.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-18-16-45-36.gh-issue-135106.cpl6Aq.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-18-16-45-36.gh-issue-135106.cpl6Aq.rst new file mode 100644 index 00000000000000..b6e953a7719be1 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-18-16-45-36.gh-issue-135106.cpl6Aq.rst @@ -0,0 +1,2 @@ +Restrict the trashcan mechanism to GC'ed objects and untrack them while in +the trashcan to prevent the GC and trashcan mechanisms conflicting. diff --git a/Objects/object.c b/Objects/object.c index 6a581a19604f9d..4d60128b092c22 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -3034,57 +3034,28 @@ Py_ReprLeave(PyObject *obj) /* Trashcan support. */ -#ifndef Py_GIL_DISABLED -/* We need to store a pointer in the refcount field of - * an object. It is important that we never store 0 (NULL). -* It is also important to not make the object appear immortal, -* or it might be untracked by the cycle GC. */ -static uintptr_t -pointer_to_safe_refcount(void *ptr) -{ - uintptr_t full = (uintptr_t)ptr; - assert((full & 3) == 0); -#if SIZEOF_VOID_P > 4 - uint32_t refcnt = (uint32_t)full; - if (refcnt >= (uint32_t)_Py_IMMORTAL_MINIMUM_REFCNT) { - full = full - ((uintptr_t)_Py_IMMORTAL_MINIMUM_REFCNT) + 1; - } - return full + 2; -#else - // Make the top two bits 0, so it appears mortal. - return (full >> 2) + 1; -#endif -} - -static void * -safe_refcount_to_pointer(uintptr_t refcnt) -{ -#if SIZEOF_VOID_P > 4 - if (refcnt & 1) { - refcnt += _Py_IMMORTAL_MINIMUM_REFCNT - 1; - } - return (void *)(refcnt - 2); -#else - return (void *)((refcnt -1) << 2); -#endif -} -#endif - /* Add op to the gcstate->trash_delete_later list. Called when the current - * call-stack depth gets large. op must be a currently untracked gc'ed - * object, with refcount 0. Py_DECREF must already have been called on it. + * call-stack depth gets large. op must be a gc'ed object, with refcount 0. + * Py_DECREF must already have been called on it. */ void _PyTrash_thread_deposit_object(PyThreadState *tstate, PyObject *op) { _PyObject_ASSERT(op, Py_REFCNT(op) == 0); + PyTypeObject *tp = Py_TYPE(op); + assert(tp->tp_flags & Py_TPFLAGS_HAVE_GC); + int tracked = 0; + if (tp->tp_is_gc == NULL || tp->tp_is_gc(op)) { + tracked = _PyObject_GC_IS_TRACKED(op); + if (tracked) { + _PyObject_GC_UNTRACK(op); + } + } + uintptr_t tagged_ptr = ((uintptr_t)tstate->delete_later) | tracked; #ifdef Py_GIL_DISABLED - op->ob_tid = (uintptr_t)tstate->delete_later; + op->ob_tid = tagged_ptr; #else - /* Store the delete_later pointer in the refcnt field. */ - uintptr_t refcnt = pointer_to_safe_refcount(tstate->delete_later); - *((uintptr_t*)op) = refcnt; - assert(!_Py_IsImmortal(op)); + _Py_AS_GC(op)->_gc_next = tagged_ptr; #endif tstate->delete_later = op; } @@ -3099,17 +3070,17 @@ _PyTrash_thread_destroy_chain(PyThreadState *tstate) destructor dealloc = Py_TYPE(op)->tp_dealloc; #ifdef Py_GIL_DISABLED - tstate->delete_later = (PyObject*) op->ob_tid; + uintptr_t tagged_ptr = op->ob_tid; op->ob_tid = 0; _Py_atomic_store_ssize_relaxed(&op->ob_ref_shared, _Py_REF_MERGED); #else - /* Get the delete_later pointer from the refcnt field. - * See _PyTrash_thread_deposit_object(). */ - uintptr_t refcnt = *((uintptr_t*)op); - tstate->delete_later = safe_refcount_to_pointer(refcnt); - op->ob_refcnt = 0; + uintptr_t tagged_ptr = _Py_AS_GC(op)->_gc_next; + _Py_AS_GC(op)->_gc_next = 0; #endif - + tstate->delete_later = (PyObject *)(tagged_ptr & ~1); + if (tagged_ptr & 1) { + _PyObject_GC_TRACK(op); + } /* Call the deallocator directly. This used to try to * fool Py_DECREF into calling it indirectly, but * Py_DECREF was already called on this object, and in @@ -3183,10 +3154,11 @@ void _Py_Dealloc(PyObject *op) { PyTypeObject *type = Py_TYPE(op); + unsigned long gc_flag = type->tp_flags & Py_TPFLAGS_HAVE_GC; destructor dealloc = type->tp_dealloc; PyThreadState *tstate = _PyThreadState_GET(); intptr_t margin = _Py_RecursionLimit_GetMargin(tstate); - if (margin < 2) { + if (margin < 2 && gc_flag) { _PyTrash_thread_deposit_object(tstate, (PyObject *)op); return; } @@ -3232,7 +3204,7 @@ _Py_Dealloc(PyObject *op) Py_XDECREF(old_exc); Py_DECREF(type); #endif - if (tstate->delete_later && margin >= 4) { + if (tstate->delete_later && margin >= 4 && gc_flag) { _PyTrash_thread_destroy_chain(tstate); } } From 576fe7ae837c92e1fc01459cee8f62e66355b9b5 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 24 Jun 2025 12:21:35 +0200 Subject: [PATCH 028/212] gh-135494: Fix python -m test --pgo -x test_re (#135713) Fix regrtest to support excluding tests from --pgo tests. --- Lib/test/libregrtest/main.py | 16 ++++++++++------ Lib/test/test_regrtest.py | 11 +++++++++++ ...025-06-19-15-29-38.gh-issue-135494.FVl9a0.rst | 2 ++ 3 files changed, 23 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Tests/2025-06-19-15-29-38.gh-issue-135494.FVl9a0.rst diff --git a/Lib/test/libregrtest/main.py b/Lib/test/libregrtest/main.py index 0d9c059a93872d..a2d01b157ac89b 100644 --- a/Lib/test/libregrtest/main.py +++ b/Lib/test/libregrtest/main.py @@ -190,6 +190,12 @@ def find_tests(self, tests: TestList | None = None) -> tuple[TestTuple, TestList strip_py_suffix(tests) + exclude_tests = set() + if self.exclude: + for arg in self.cmdline_args: + exclude_tests.add(arg) + self.cmdline_args = [] + if self.pgo: # add default PGO tests if no tests are specified setup_pgo_tests(self.cmdline_args, self.pgo_extended) @@ -200,17 +206,15 @@ def find_tests(self, tests: TestList | None = None) -> tuple[TestTuple, TestList if self.tsan_parallel: setup_tsan_parallel_tests(self.cmdline_args) - exclude_tests = set() - if self.exclude: - for arg in self.cmdline_args: - exclude_tests.add(arg) - self.cmdline_args = [] - alltests = findtests(testdir=self.test_dir, exclude=exclude_tests) if not self.fromfile: selected = tests or self.cmdline_args + if exclude_tests: + # Support "--pgo/--tsan -x test_xxx" command + selected = [name for name in selected + if name not in exclude_tests] if selected: selected = split_test_packages(selected) else: diff --git a/Lib/test/test_regrtest.py b/Lib/test/test_regrtest.py index a43d2678ebd3be..5bc3c5924b07fb 100644 --- a/Lib/test/test_regrtest.py +++ b/Lib/test/test_regrtest.py @@ -2346,6 +2346,17 @@ def check(output): output = self.run_tests('-j1', '-v', testname, env=env, isolated=False) check(output) + def test_pgo_exclude(self): + # Get PGO tests + output = self.run_tests('--pgo', '--list-tests') + pgo_tests = output.strip().split() + + # Exclude test_re + output = self.run_tests('--pgo', '--list-tests', '-x', 'test_re') + tests = output.strip().split() + self.assertNotIn('test_re', tests) + self.assertEqual(len(tests), len(pgo_tests) - 1) + class TestUtils(unittest.TestCase): def test_format_duration(self): diff --git a/Misc/NEWS.d/next/Tests/2025-06-19-15-29-38.gh-issue-135494.FVl9a0.rst b/Misc/NEWS.d/next/Tests/2025-06-19-15-29-38.gh-issue-135494.FVl9a0.rst new file mode 100644 index 00000000000000..832d1fe033e229 --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2025-06-19-15-29-38.gh-issue-135494.FVl9a0.rst @@ -0,0 +1,2 @@ +Fix regrtest to support excluding tests from ``--pgo`` tests. Patch by +Victor Stinner. From 1043b3e951b507023c6ab369f22f0b7a3bef6fbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 24 Jun 2025 13:09:46 +0200 Subject: [PATCH 029/212] gh-135487: fix `reprlib.Repr.repr_int` when given very large integers (#135506) --- Lib/reprlib.py | 17 +++++++- Lib/test/test_reprlib.py | 40 +++++++++++++++---- ...-06-14-12-06-55.gh-issue-135487.KdVFff.rst | 2 + 3 files changed, 50 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-14-12-06-55.gh-issue-135487.KdVFff.rst diff --git a/Lib/reprlib.py b/Lib/reprlib.py index 441d1be4bdede2..ab18247682b69a 100644 --- a/Lib/reprlib.py +++ b/Lib/reprlib.py @@ -181,7 +181,22 @@ def repr_str(self, x, level): return s def repr_int(self, x, level): - s = builtins.repr(x) # XXX Hope this isn't too slow... + try: + s = builtins.repr(x) + except ValueError as exc: + assert 'sys.set_int_max_str_digits()' in str(exc) + # Those imports must be deferred due to Python's build system + # where the reprlib module is imported before the math module. + import math, sys + # Integers with more than sys.get_int_max_str_digits() digits + # are rendered differently as their repr() raises a ValueError. + # See https://github.com/python/cpython/issues/135487. + k = 1 + int(math.log10(abs(x))) + # Note: math.log10(abs(x)) may be overestimated or underestimated, + # but for simplicity, we do not compute the exact number of digits. + max_digits = sys.get_int_max_str_digits() + return (f'<{x.__class__.__name__} instance with roughly {k} ' + f'digits (limit at {max_digits}) at 0x{id(x):x}>') if len(s) > self.maxlong: i = max(0, (self.maxlong-3)//2) j = max(0, self.maxlong-3-i) diff --git a/Lib/test/test_reprlib.py b/Lib/test/test_reprlib.py index 16623654c29b28..d5631efcdb75b7 100644 --- a/Lib/test/test_reprlib.py +++ b/Lib/test/test_reprlib.py @@ -151,14 +151,38 @@ def test_frozenset(self): eq(r(frozenset({1, 2, 3, 4, 5, 6, 7})), "frozenset({1, 2, 3, 4, 5, 6, ...})") def test_numbers(self): - eq = self.assertEqual - eq(r(123), repr(123)) - eq(r(123), repr(123)) - eq(r(1.0/3), repr(1.0/3)) - - n = 10**100 - expected = repr(n)[:18] + "..." + repr(n)[-19:] - eq(r(n), expected) + for x in [123, 1.0 / 3]: + self.assertEqual(r(x), repr(x)) + + max_digits = sys.get_int_max_str_digits() + for k in [100, max_digits - 1]: + with self.subTest(f'10 ** {k}', k=k): + n = 10 ** k + expected = repr(n)[:18] + "..." + repr(n)[-19:] + self.assertEqual(r(n), expected) + + def re_msg(n, d): + return (rf'<{n.__class__.__name__} instance with roughly {d} ' + rf'digits \(limit at {max_digits}\) at 0x[a-f0-9]+>') + + k = max_digits + with self.subTest(f'10 ** {k}', k=k): + n = 10 ** k + self.assertRaises(ValueError, repr, n) + self.assertRegex(r(n), re_msg(n, k + 1)) + + for k in [max_digits + 1, 2 * max_digits]: + self.assertGreater(k, 100) + with self.subTest(f'10 ** {k}', k=k): + n = 10 ** k + self.assertRaises(ValueError, repr, n) + self.assertRegex(r(n), re_msg(n, k + 1)) + with self.subTest(f'10 ** {k} - 1', k=k): + n = 10 ** k - 1 + # Here, since math.log10(n) == math.log10(n-1), + # the number of digits of n - 1 is overestimated. + self.assertRaises(ValueError, repr, n) + self.assertRegex(r(n), re_msg(n, k + 1)) def test_instance(self): eq = self.assertEqual diff --git a/Misc/NEWS.d/next/Library/2025-06-14-12-06-55.gh-issue-135487.KdVFff.rst b/Misc/NEWS.d/next/Library/2025-06-14-12-06-55.gh-issue-135487.KdVFff.rst new file mode 100644 index 00000000000000..3ef51fa31dfac9 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-14-12-06-55.gh-issue-135487.KdVFff.rst @@ -0,0 +1,2 @@ +Fix :meth:`!reprlib.Repr.repr_int` when given integers with more than +:func:`sys.get_int_max_str_digits` digits. Patch by Bénédikt Tran. From 1bf008929bc84817e8aa1420d587f75ad096f566 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Tue, 24 Jun 2025 19:33:25 +0300 Subject: [PATCH 030/212] gh-135878: Fix crash in `types.SimpleNamespace.__repr__` (#135889) Co-authored-by: Peter Bierma --- .../Library/2025-06-24-14-43-24.gh-issue-135878.Db4roX.rst | 3 +++ Objects/namespaceobject.c | 7 ++++--- 2 files changed, 7 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-24-14-43-24.gh-issue-135878.Db4roX.rst diff --git a/Misc/NEWS.d/next/Library/2025-06-24-14-43-24.gh-issue-135878.Db4roX.rst b/Misc/NEWS.d/next/Library/2025-06-24-14-43-24.gh-issue-135878.Db4roX.rst new file mode 100644 index 00000000000000..969cf2dfa40878 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-24-14-43-24.gh-issue-135878.Db4roX.rst @@ -0,0 +1,3 @@ +Fixes a crash of :class:`types.SimpleNamespace` on :term:`free threading` builds, +when several threads were calling its :meth:`~object.__repr__` method at the +same time. diff --git a/Objects/namespaceobject.c b/Objects/namespaceobject.c index caebe6bf543567..0fc2bcea4cb06e 100644 --- a/Objects/namespaceobject.c +++ b/Objects/namespaceobject.c @@ -124,9 +124,10 @@ namespace_repr(PyObject *ns) if (PyUnicode_Check(key) && PyUnicode_GET_LENGTH(key) > 0) { PyObject *value, *item; - value = PyDict_GetItemWithError(d, key); - if (value != NULL) { + int has_key = PyDict_GetItemRef(d, key, &value); + if (has_key == 1) { item = PyUnicode_FromFormat("%U=%R", key, value); + Py_DECREF(value); if (item == NULL) { loop_error = 1; } @@ -135,7 +136,7 @@ namespace_repr(PyObject *ns) Py_DECREF(item); } } - else if (PyErr_Occurred()) { + else if (has_key < 0) { loop_error = 1; } } From c2e0003441893d42972c15650275f3b831fdee9d Mon Sep 17 00:00:00 2001 From: mpage Date: Tue, 24 Jun 2025 10:02:50 -0700 Subject: [PATCH 031/212] gh-135805: Document the X option and env var for controlling thread-local bytecode (#135868) Document the X option and env var for controlling thread-local bytecode. --- Doc/using/cmdline.rst | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index 40a46a62031ef7..a5867b489e0053 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -669,6 +669,13 @@ Miscellaneous options .. versionadded:: 3.14 + * :samp:`-X tlbc={0,1}` enables (1, the default) or disables (0) thread-local + bytecode in builds configured with :option:`--disable-gil`. When disabled, + this also disables the specializing interpreter. See also + :envvar:`PYTHON_TLBC`. + + .. versionadded:: 3.14 + It also allows passing arbitrary values and retrieving them through the :data:`sys._xoptions` dictionary. @@ -1302,6 +1309,16 @@ conflict. .. versionadded:: 3.13 +.. envvar:: PYTHON_TLBC + + If set to ``1`` enables thread-local bytecode. If set to ``0`` thread-local + bytecode and the specializing interpreter are disabled. Only applies to + builds configured with :option:`--disable-gil`. + + See also the :option:`-X tlbc <-X>` command-line option. + + .. versionadded:: 3.14 + Debug-mode variables ~~~~~~~~~~~~~~~~~~~~ From d76582d2205b8cc517340be379ca8cbeb2c6780c Mon Sep 17 00:00:00 2001 From: Brian Schubert Date: Tue, 24 Jun 2025 14:53:14 -0400 Subject: [PATCH 032/212] gh-135855: Raise TypeError When Passing Non-dict Object to `_interpreters.set___main___attrs` (gh-135856) --- Lib/test/test__interpreters.py | 15 +++++++++++++ ...-06-23-10-19-11.gh-issue-135855.-J0AGF.rst | 3 +++ Modules/_interpretersmodule.c | 22 +++++++++---------- Python/crossinterp.c | 1 + 4 files changed, 29 insertions(+), 12 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-23-10-19-11.gh-issue-135855.-J0AGF.rst diff --git a/Lib/test/test__interpreters.py b/Lib/test/test__interpreters.py index ad3ebbfdff64a7..a32d5d81d2bf2d 100644 --- a/Lib/test/test__interpreters.py +++ b/Lib/test/test__interpreters.py @@ -485,6 +485,21 @@ def test_signatures(self): msg = r'_interpreters.run_func\(\) argument 3 must be dict, not int' with self.assertRaisesRegex(TypeError, msg): _interpreters.run_func(self.id, lambda: None, shared=1) + # See https://github.com/python/cpython/issues/135855 + msg = r'_interpreters.set___main___attrs\(\) argument 2 must be dict, not int' + with self.assertRaisesRegex(TypeError, msg): + _interpreters.set___main___attrs(self.id, 1) + + def test_invalid_shared_none(self): + msg = r'must be dict, not None' + with self.assertRaisesRegex(TypeError, msg): + _interpreters.exec(self.id, 'a', shared=None) + with self.assertRaisesRegex(TypeError, msg): + _interpreters.run_string(self.id, 'a', shared=None) + with self.assertRaisesRegex(TypeError, msg): + _interpreters.run_func(self.id, lambda: None, shared=None) + with self.assertRaisesRegex(TypeError, msg): + _interpreters.set___main___attrs(self.id, None) def test_invalid_shared_encoding(self): # See https://github.com/python/cpython/issues/127196 diff --git a/Misc/NEWS.d/next/Library/2025-06-23-10-19-11.gh-issue-135855.-J0AGF.rst b/Misc/NEWS.d/next/Library/2025-06-23-10-19-11.gh-issue-135855.-J0AGF.rst new file mode 100644 index 00000000000000..fcf495bdceb168 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-23-10-19-11.gh-issue-135855.-J0AGF.rst @@ -0,0 +1,3 @@ +Raise :exc:`TypeError` instead of :exc:`SystemError` when +:func:`!_interpreters.set___main___attrs` is passed a non-dict object. +Patch by Brian Schubert. diff --git a/Modules/_interpretersmodule.c b/Modules/_interpretersmodule.c index e201ccd53daf8a..b920c32474f4e6 100644 --- a/Modules/_interpretersmodule.c +++ b/Modules/_interpretersmodule.c @@ -1039,8 +1039,8 @@ interp_set___main___attrs(PyObject *self, PyObject *args, PyObject *kwargs) PyObject *id, *updates; int restricted = 0; if (!PyArg_ParseTupleAndKeywords(args, kwargs, - "OO|$p:" MODULE_NAME_STR ".set___main___attrs", - kwlist, &id, &updates, &restricted)) + "OO!|$p:" MODULE_NAME_STR ".set___main___attrs", + kwlist, &id, &PyDict_Type, &updates, &restricted)) { return NULL; } @@ -1054,16 +1054,14 @@ interp_set___main___attrs(PyObject *self, PyObject *args, PyObject *kwargs) } // Check the updates. - if (updates != Py_None) { - Py_ssize_t size = PyObject_Size(updates); - if (size < 0) { - return NULL; - } - if (size == 0) { - PyErr_SetString(PyExc_ValueError, - "arg 2 must be a non-empty mapping"); - return NULL; - } + Py_ssize_t size = PyDict_Size(updates); + if (size < 0) { + return NULL; + } + if (size == 0) { + PyErr_SetString(PyExc_ValueError, + "arg 2 must be a non-empty dict"); + return NULL; } _PyXI_session *session = _PyXI_NewSession(); diff --git a/Python/crossinterp.c b/Python/crossinterp.c index c44bcd559000b3..16a23f0351cd26 100644 --- a/Python/crossinterp.c +++ b/Python/crossinterp.c @@ -2617,6 +2617,7 @@ _PyXI_Enter(_PyXI_session *session, // Convert the attrs for cross-interpreter use. _PyXI_namespace *sharedns = NULL; if (nsupdates != NULL) { + assert(PyDict_Check(nsupdates)); Py_ssize_t len = PyDict_Size(nsupdates); if (len < 0) { if (result != NULL) { From f49487fdee69d19230a729af8089f77d337c16a2 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Tue, 24 Jun 2025 14:41:41 -0700 Subject: [PATCH 033/212] GH-90117: Check for list and tuple before MappingView in pprint (GH-135779) --- Lib/pprint.py | 68 +++++++++---------- ...5-06-20-17-06-59.gh-issue-90117.GYWVrn.rst | 1 + 2 files changed, 35 insertions(+), 34 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-20-17-06-59.gh-issue-90117.GYWVrn.rst diff --git a/Lib/pprint.py b/Lib/pprint.py index 1e611481b51ac0..92a2c543ac279c 100644 --- a/Lib/pprint.py +++ b/Lib/pprint.py @@ -653,6 +653,40 @@ def _safe_repr(self, object, context, maxlevels, level): del context[objid] return "{%s}" % ", ".join(components), readable, recursive + if (issubclass(typ, list) and r is list.__repr__) or \ + (issubclass(typ, tuple) and r is tuple.__repr__): + if issubclass(typ, list): + if not object: + return "[]", True, False + format = "[%s]" + elif len(object) == 1: + format = "(%s,)" + else: + if not object: + return "()", True, False + format = "(%s)" + objid = id(object) + if maxlevels and level >= maxlevels: + return format % "...", False, objid in context + if objid in context: + return _recursion(object), False, True + context[objid] = 1 + readable = True + recursive = False + components = [] + append = components.append + level += 1 + for o in object: + orepr, oreadable, orecur = self.format( + o, context, maxlevels, level) + append(orepr) + if not oreadable: + readable = False + if orecur: + recursive = True + del context[objid] + return format % ", ".join(components), readable, recursive + if issubclass(typ, _collections.abc.MappingView) and r in self._view_reprs: objid = id(object) if maxlevels and level >= maxlevels: @@ -689,40 +723,6 @@ def _safe_repr(self, object, context, maxlevels, level): del context[objid] return typ.__name__ + '([%s])' % ", ".join(components), readable, recursive - if (issubclass(typ, list) and r is list.__repr__) or \ - (issubclass(typ, tuple) and r is tuple.__repr__): - if issubclass(typ, list): - if not object: - return "[]", True, False - format = "[%s]" - elif len(object) == 1: - format = "(%s,)" - else: - if not object: - return "()", True, False - format = "(%s)" - objid = id(object) - if maxlevels and level >= maxlevels: - return format % "...", False, objid in context - if objid in context: - return _recursion(object), False, True - context[objid] = 1 - readable = True - recursive = False - components = [] - append = components.append - level += 1 - for o in object: - orepr, oreadable, orecur = self.format( - o, context, maxlevels, level) - append(orepr) - if not oreadable: - readable = False - if orecur: - recursive = True - del context[objid] - return format % ", ".join(components), readable, recursive - rep = repr(object) return rep, (rep and not rep.startswith('<')), False diff --git a/Misc/NEWS.d/next/Library/2025-06-20-17-06-59.gh-issue-90117.GYWVrn.rst b/Misc/NEWS.d/next/Library/2025-06-20-17-06-59.gh-issue-90117.GYWVrn.rst new file mode 100644 index 00000000000000..2bb15cb6d9c61c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-20-17-06-59.gh-issue-90117.GYWVrn.rst @@ -0,0 +1 @@ +Speed up :mod:`pprint` for :class:`list` and :class:`tuple`. From 772467aaa1835fa8741892116daa9dcb877848d1 Mon Sep 17 00:00:00 2001 From: Vinay Sajip Date: Wed, 25 Jun 2025 06:42:38 +0100 Subject: [PATCH 034/212] gh-91555: Revert disabling of logger while handling log record. (GH-135858) Revert "gh-91555: disable logger while handling log record (GH-131812)" This reverts commit 2561e148ec985755baa3984b91fd0bfc089b283c. --- Lib/logging/__init__.py | 28 ++----- Lib/test/test_logging.py | 83 ------------------- ...5-03-30-16-42-38.gh-issue-91555.ShVtwW.rst | 2 - 3 files changed, 8 insertions(+), 105 deletions(-) delete mode 100644 Misc/NEWS.d/next/Library/2025-03-30-16-42-38.gh-issue-91555.ShVtwW.rst diff --git a/Lib/logging/__init__.py b/Lib/logging/__init__.py index 5c3c44249345f3..c5860d53b1bdff 100644 --- a/Lib/logging/__init__.py +++ b/Lib/logging/__init__.py @@ -1475,8 +1475,6 @@ class Logger(Filterer): level, and "input.csv", "input.xls" and "input.gnu" for the sub-levels. There is no arbitrary limit to the depth of nesting. """ - _tls = threading.local() - def __init__(self, name, level=NOTSET): """ Initialize the logger with a name and an optional level. @@ -1673,19 +1671,14 @@ def handle(self, record): This method is used for unpickled records received from a socket, as well as those created locally. Logger-level filtering is applied. """ - if self._is_disabled(): + if self.disabled: return - - self._tls.in_progress = True - try: - maybe_record = self.filter(record) - if not maybe_record: - return - if isinstance(maybe_record, LogRecord): - record = maybe_record - self.callHandlers(record) - finally: - self._tls.in_progress = False + maybe_record = self.filter(record) + if not maybe_record: + return + if isinstance(maybe_record, LogRecord): + record = maybe_record + self.callHandlers(record) def addHandler(self, hdlr): """ @@ -1773,7 +1766,7 @@ def isEnabledFor(self, level): """ Is this logger enabled for level 'level'? """ - if self._is_disabled(): + if self.disabled: return False try: @@ -1823,11 +1816,6 @@ def _hierlevel(logger): if isinstance(item, Logger) and item.parent is self and _hierlevel(item) == 1 + _hierlevel(item.parent)) - def _is_disabled(self): - # We need to use getattr as it will only be set the first time a log - # message is recorded on any given thread - return self.disabled or getattr(self._tls, 'in_progress', False) - def __repr__(self): level = getLevelName(self.getEffectiveLevel()) return '<%s %s (%s)>' % (self.__class__.__name__, self.name, level) diff --git a/Lib/test/test_logging.py b/Lib/test/test_logging.py index e672dfcbb465cc..3819965ed2cbf9 100644 --- a/Lib/test/test_logging.py +++ b/Lib/test/test_logging.py @@ -4214,89 +4214,6 @@ def __init__(self, *args, **kwargs): handler = logging.getHandlerByName('custom') self.assertEqual(handler.custom_kwargs, custom_kwargs) - # See gh-91555 and gh-90321 - @support.requires_subprocess() - def test_deadlock_in_queue(self): - queue = multiprocessing.Queue() - handler = logging.handlers.QueueHandler(queue) - logger = multiprocessing.get_logger() - level = logger.level - try: - logger.setLevel(logging.DEBUG) - logger.addHandler(handler) - logger.debug("deadlock") - finally: - logger.setLevel(level) - logger.removeHandler(handler) - - def test_recursion_in_custom_handler(self): - class BadHandler(logging.Handler): - def __init__(self): - super().__init__() - def emit(self, record): - logger.debug("recurse") - logger = logging.getLogger("test_recursion_in_custom_handler") - logger.addHandler(BadHandler()) - logger.setLevel(logging.DEBUG) - logger.debug("boom") - - @threading_helper.requires_working_threading() - def test_thread_supression_noninterference(self): - lock = threading.Lock() - logger = logging.getLogger("test_thread_supression_noninterference") - - # Block on the first call, allow others through - # - # NOTE: We need to bypass the base class's lock, otherwise that will - # block multiple calls to the same handler itself. - class BlockOnceHandler(TestHandler): - def __init__(self, barrier): - super().__init__(support.Matcher()) - self.barrier = barrier - - def createLock(self): - self.lock = None - - def handle(self, record): - self.emit(record) - - def emit(self, record): - if self.barrier: - barrier = self.barrier - self.barrier = None - barrier.wait() - with lock: - pass - super().emit(record) - logger.info("blow up if not supressed") - - barrier = threading.Barrier(2) - handler = BlockOnceHandler(barrier) - logger.addHandler(handler) - logger.setLevel(logging.DEBUG) - - t1 = threading.Thread(target=logger.debug, args=("1",)) - with lock: - - # Ensure first thread is blocked in the handler, hence supressing logging... - t1.start() - barrier.wait() - - # ...but the second thread should still be able to log... - t2 = threading.Thread(target=logger.debug, args=("2",)) - t2.start() - t2.join(timeout=3) - - self.assertEqual(len(handler.buffer), 1) - self.assertTrue(handler.matches(levelno=logging.DEBUG, message='2')) - - # The first thread should still be blocked here - self.assertTrue(t1.is_alive()) - - # Now the lock has been released the first thread should complete - t1.join() - self.assertEqual(len(handler.buffer), 2) - self.assertTrue(handler.matches(levelno=logging.DEBUG, message='1')) class ManagerTest(BaseTest): def test_manager_loggerclass(self): diff --git a/Misc/NEWS.d/next/Library/2025-03-30-16-42-38.gh-issue-91555.ShVtwW.rst b/Misc/NEWS.d/next/Library/2025-03-30-16-42-38.gh-issue-91555.ShVtwW.rst deleted file mode 100644 index e8f5ba56fcc23d..00000000000000 --- a/Misc/NEWS.d/next/Library/2025-03-30-16-42-38.gh-issue-91555.ShVtwW.rst +++ /dev/null @@ -1,2 +0,0 @@ -Ignore log messages generated during handling of log messages, to avoid -deadlock or infinite recursion. From 412082d59691ea6dd0608d2e52a51fd2ffd7b2c6 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 25 Jun 2025 00:06:32 -0700 Subject: [PATCH 035/212] GH-133136: Revise QSBR to reduce excess memory held (gh-135473) The free threading build uses QSBR to delay the freeing of dictionary keys and list arrays when the objects are accessed by multiple threads in order to allow concurrent reads to proceed with holding the object lock. The requests are processed in batches to reduce execution overhead, but for large memory blocks this can lead to excess memory usage. Take into account the size of the memory block when deciding when to process QSBR requests. Also track the amount of memory being held by QSBR for mimalloc pages. Advance the write sequence if this memory exceeds a limit. Advancing the sequence will allow it to be freed more quickly. Process the held QSBR items from the "eval breaker", rather than from `_PyMem_FreeDelayed()`. This gives a higher chance that the global read sequence has advanced enough so that items can be freed. Co-authored-by: Sam Gross --- Include/internal/pycore_pymem.h | 2 +- Include/internal/pycore_qsbr.h | 31 ++++-- ...-06-03-21-06-22.gh-issue-133136.Usnvri.rst | 2 + Objects/codeobject.c | 2 +- Objects/dictobject.c | 4 +- Objects/listobject.c | 3 +- Objects/obmalloc.c | 96 +++++++++++++++++-- Python/ceval_gil.c | 4 + Python/qsbr.c | 12 +-- 9 files changed, 129 insertions(+), 27 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-03-21-06-22.gh-issue-133136.Usnvri.rst diff --git a/Include/internal/pycore_pymem.h b/Include/internal/pycore_pymem.h index e669a30b072d18..f3f2ae0a140828 100644 --- a/Include/internal/pycore_pymem.h +++ b/Include/internal/pycore_pymem.h @@ -88,7 +88,7 @@ extern wchar_t *_PyMem_DefaultRawWcsdup(const wchar_t *str); extern int _PyMem_DebugEnabled(void); // Enqueue a pointer to be freed possibly after some delay. -extern void _PyMem_FreeDelayed(void *ptr); +extern void _PyMem_FreeDelayed(void *ptr, size_t size); // Periodically process delayed free requests. extern void _PyMem_ProcessDelayed(PyThreadState *tstate); diff --git a/Include/internal/pycore_qsbr.h b/Include/internal/pycore_qsbr.h index b835c3abaf5d0b..1f9b3fcf777493 100644 --- a/Include/internal/pycore_qsbr.h +++ b/Include/internal/pycore_qsbr.h @@ -48,8 +48,21 @@ struct _qsbr_thread_state { // Thread state (or NULL) PyThreadState *tstate; - // Used to defer advancing write sequence a fixed number of times - int deferrals; + // Number of held items added by this thread since the last write sequence + // advance + int deferred_count; + + // Estimate for the amount of memory that is held by this thread since + // the last write sequence advance + size_t deferred_memory; + + // Amount of memory in mimalloc pages deferred from collection. When + // deferred, they are prevented from being used for a different size class + // and in a different thread. + size_t deferred_page_memory; + + // True if the deferred memory frees should be processed. + bool should_process; // Is this thread state allocated? bool allocated; @@ -109,11 +122,17 @@ _Py_qbsr_goal_reached(struct _qsbr_thread_state *qsbr, uint64_t goal) extern uint64_t _Py_qsbr_advance(struct _qsbr_shared *shared); -// Batches requests to advance the write sequence. This advances the write -// sequence every N calls, which reduces overhead but increases time to -// reclamation. Returns the new goal. +// Return the next value for the write sequence (current plus the increment). extern uint64_t -_Py_qsbr_deferred_advance(struct _qsbr_thread_state *qsbr); +_Py_qsbr_shared_next(struct _qsbr_shared *shared); + +// Return true if deferred memory frees held by QSBR should be processed to +// determine if they can be safely freed. +static inline bool +_Py_qsbr_should_process(struct _qsbr_thread_state *qsbr) +{ + return qsbr->should_process; +} // Have the read sequences advanced to the given goal? If this returns true, // it safe to reclaim any memory tagged with the goal (or earlier goal). diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-03-21-06-22.gh-issue-133136.Usnvri.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-03-21-06-22.gh-issue-133136.Usnvri.rst new file mode 100644 index 00000000000000..a9501c13c95b3a --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-03-21-06-22.gh-issue-133136.Usnvri.rst @@ -0,0 +1,2 @@ +Limit excess memory usage in the :term:`free threading` build when a +large dictionary or list is resized and accessed by multiple threads. diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 3f53d4cfeb20ac..91772bc9d19098 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -3369,7 +3369,7 @@ create_tlbc_lock_held(PyCodeObject *co, Py_ssize_t idx) } memcpy(new_tlbc->entries, tlbc->entries, tlbc->size * sizeof(void *)); _Py_atomic_store_ptr_release(&co->co_tlbc, new_tlbc); - _PyMem_FreeDelayed(tlbc); + _PyMem_FreeDelayed(tlbc, tlbc->size * sizeof(void *)); tlbc = new_tlbc; } char *bc = PyMem_Calloc(1, _PyCode_NBYTES(co)); diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 72901377cbb3da..6b7b150f0e2f2c 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -813,7 +813,7 @@ free_keys_object(PyDictKeysObject *keys, bool use_qsbr) { #ifdef Py_GIL_DISABLED if (use_qsbr) { - _PyMem_FreeDelayed(keys); + _PyMem_FreeDelayed(keys, _PyDict_KeysSize(keys)); return; } #endif @@ -858,7 +858,7 @@ free_values(PyDictValues *values, bool use_qsbr) assert(values->embedded == 0); #ifdef Py_GIL_DISABLED if (use_qsbr) { - _PyMem_FreeDelayed(values); + _PyMem_FreeDelayed(values, values_size_from_count(values->capacity)); return; } #endif diff --git a/Objects/listobject.c b/Objects/listobject.c index c5895645a2dd12..23d3472b6d4153 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -61,7 +61,8 @@ free_list_items(PyObject** items, bool use_qsbr) #ifdef Py_GIL_DISABLED _PyListArray *array = _Py_CONTAINER_OF(items, _PyListArray, ob_item); if (use_qsbr) { - _PyMem_FreeDelayed(array); + size_t size = sizeof(_PyListArray) + array->allocated * sizeof(PyObject *); + _PyMem_FreeDelayed(array, size); } else { PyMem_Free(array); diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index d4b8327cb73b37..deb7fd957e57dd 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -124,6 +124,33 @@ _PyMem_mi_page_is_safe_to_free(mi_page_t *page) } +#ifdef Py_GIL_DISABLED + +// If we are deferring collection of more than this amount of memory for +// mimalloc pages, advance the write sequence. Advancing allows these +// pages to be re-used in a different thread or for a different size class. +#define QSBR_PAGE_MEM_LIMIT 4096*20 + +// Return true if the global write sequence should be advanced for a mimalloc +// page that is deferred from collection. +static bool +should_advance_qsbr_for_page(struct _qsbr_thread_state *qsbr, mi_page_t *page) +{ + size_t bsize = mi_page_block_size(page); + size_t page_size = page->capacity*bsize; + if (page_size > QSBR_PAGE_MEM_LIMIT) { + qsbr->deferred_page_memory = 0; + return true; + } + qsbr->deferred_page_memory += page_size; + if (qsbr->deferred_page_memory > QSBR_PAGE_MEM_LIMIT) { + qsbr->deferred_page_memory = 0; + return true; + } + return false; +} +#endif + static bool _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force) { @@ -139,7 +166,14 @@ _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force) _PyMem_mi_page_clear_qsbr(page); page->retire_expire = 0; - page->qsbr_goal = _Py_qsbr_deferred_advance(tstate->qsbr); + + if (should_advance_qsbr_for_page(tstate->qsbr, page)) { + page->qsbr_goal = _Py_qsbr_advance(tstate->qsbr->shared); + } + else { + page->qsbr_goal = _Py_qsbr_shared_next(tstate->qsbr->shared); + } + llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node); return false; } @@ -1141,8 +1175,44 @@ free_work_item(uintptr_t ptr, delayed_dealloc_cb cb, void *state) } } + +#ifdef Py_GIL_DISABLED + +// For deferred advance on free: the number of deferred items before advancing +// the write sequence. This is based on WORK_ITEMS_PER_CHUNK. We ideally +// want to process a chunk before it overflows. +#define QSBR_DEFERRED_LIMIT 127 + +// If the deferred memory exceeds 1 MiB, advance the write sequence. This +// helps limit memory usage due to QSBR delaying frees too long. +#define QSBR_FREE_MEM_LIMIT 1024*1024 + +// Return true if the global write sequence should be advanced for a deferred +// memory free. +static bool +should_advance_qsbr_for_free(struct _qsbr_thread_state *qsbr, size_t size) +{ + if (size > QSBR_FREE_MEM_LIMIT) { + qsbr->deferred_count = 0; + qsbr->deferred_memory = 0; + qsbr->should_process = true; + return true; + } + qsbr->deferred_count++; + qsbr->deferred_memory += size; + if (qsbr->deferred_count > QSBR_DEFERRED_LIMIT || + qsbr->deferred_memory > QSBR_FREE_MEM_LIMIT) { + qsbr->deferred_count = 0; + qsbr->deferred_memory = 0; + qsbr->should_process = true; + return true; + } + return false; +} +#endif + static void -free_delayed(uintptr_t ptr) +free_delayed(uintptr_t ptr, size_t size) { #ifndef Py_GIL_DISABLED free_work_item(ptr, NULL, NULL); @@ -1200,23 +1270,32 @@ free_delayed(uintptr_t ptr) } assert(buf != NULL && buf->wr_idx < WORK_ITEMS_PER_CHUNK); - uint64_t seq = _Py_qsbr_deferred_advance(tstate->qsbr); + uint64_t seq; + if (should_advance_qsbr_for_free(tstate->qsbr, size)) { + seq = _Py_qsbr_advance(tstate->qsbr->shared); + } + else { + seq = _Py_qsbr_shared_next(tstate->qsbr->shared); + } buf->array[buf->wr_idx].ptr = ptr; buf->array[buf->wr_idx].qsbr_goal = seq; buf->wr_idx++; if (buf->wr_idx == WORK_ITEMS_PER_CHUNK) { + // Normally the processing of delayed items is done from the eval + // breaker. Processing here is a safety measure to ensure too much + // work does not accumulate. _PyMem_ProcessDelayed((PyThreadState *)tstate); } #endif } void -_PyMem_FreeDelayed(void *ptr) +_PyMem_FreeDelayed(void *ptr, size_t size) { assert(!((uintptr_t)ptr & 0x01)); if (ptr != NULL) { - free_delayed((uintptr_t)ptr); + free_delayed((uintptr_t)ptr, size); } } @@ -1226,7 +1305,10 @@ _PyObject_XDecRefDelayed(PyObject *ptr) { assert(!((uintptr_t)ptr & 0x01)); if (ptr != NULL) { - free_delayed(((uintptr_t)ptr)|0x01); + // We use 0 as the size since we don't have an easy way to know the + // actual size. If we are freeing many objects, the write sequence + // will be advanced due to QSBR_DEFERRED_LIMIT. + free_delayed(((uintptr_t)ptr)|0x01, 0); } } #endif @@ -1317,6 +1399,8 @@ _PyMem_ProcessDelayed(PyThreadState *tstate) PyInterpreterState *interp = tstate->interp; _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate; + tstate_impl->qsbr->should_process = false; + // Process thread-local work process_queue(&tstate_impl->mem_free_queue, tstate_impl, true, NULL, NULL); diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 57d8f68b000b60..aa68371ac8febf 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -1387,6 +1387,10 @@ _Py_HandlePending(PyThreadState *tstate) _Py_unset_eval_breaker_bit(tstate, _PY_EVAL_EXPLICIT_MERGE_BIT); _Py_brc_merge_refcounts(tstate); } + /* Process deferred memory frees held by QSBR */ + if (_Py_qsbr_should_process(((_PyThreadStateImpl *)tstate)->qsbr)) { + _PyMem_ProcessDelayed(tstate); + } #endif /* GC scheduled to run */ diff --git a/Python/qsbr.c b/Python/qsbr.c index afa03776c26b6f..c992c285cb13e4 100644 --- a/Python/qsbr.c +++ b/Python/qsbr.c @@ -41,10 +41,6 @@ // Starting size of the array of qsbr thread states #define MIN_ARRAY_SIZE 8 -// For _Py_qsbr_deferred_advance(): the number of deferrals before advancing -// the write sequence. -#define QSBR_DEFERRED_LIMIT 10 - // Allocate a QSBR thread state from the freelist static struct _qsbr_thread_state * qsbr_allocate(struct _qsbr_shared *shared) @@ -117,13 +113,9 @@ _Py_qsbr_advance(struct _qsbr_shared *shared) } uint64_t -_Py_qsbr_deferred_advance(struct _qsbr_thread_state *qsbr) +_Py_qsbr_shared_next(struct _qsbr_shared *shared) { - if (++qsbr->deferrals < QSBR_DEFERRED_LIMIT) { - return _Py_qsbr_shared_current(qsbr->shared) + QSBR_INCR; - } - qsbr->deferrals = 0; - return _Py_qsbr_advance(qsbr->shared); + return _Py_qsbr_shared_current(shared) + QSBR_INCR; } static uint64_t From cd186902a244fb9c37b9d42b7d33deb0926cd624 Mon Sep 17 00:00:00 2001 From: Peter Bierma Date: Wed, 25 Jun 2025 04:44:55 -0400 Subject: [PATCH 036/212] gh-135755: Docs: C API: Document missing `PyFunction_GET*` macros (GH-135762) Co-authored-by: Brian Schubert Co-authored-by: Petr Viktorin --- Doc/c-api/function.rst | 20 ++++++++++++++++++++ Doc/data/refcounts.dat | 24 ++++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/Doc/c-api/function.rst b/Doc/c-api/function.rst index 63b78f677674e9..5fb8567ef8c95f 100644 --- a/Doc/c-api/function.rst +++ b/Doc/c-api/function.rst @@ -95,6 +95,13 @@ There are a few functions specific to Python functions. .. versionadded:: 3.12 + +.. c:function:: PyObject* PyFunction_GetKwDefaults(PyObject *op) + + Return the keyword-only argument default values of the function object *op*. This can be a + dictionary of arguments or ``NULL``. + + .. c:function:: PyObject* PyFunction_GetClosure(PyObject *op) Return the closure associated with the function object *op*. This can be ``NULL`` @@ -123,6 +130,19 @@ There are a few functions specific to Python functions. Raises :exc:`SystemError` and returns ``-1`` on failure. +.. c:function:: PyObject *PyFunction_GET_CODE(PyObject *op) + PyObject *PyFunction_GET_GLOBALS(PyObject *op) + PyObject *PyFunction_GET_MODULE(PyObject *op) + PyObject *PyFunction_GET_DEFAULTS(PyObject *op) + PyObject *PyFunction_GET_KW_DEFAULTS(PyObject *op) + PyObject *PyFunction_GET_CLOSURE(PyObject *op) + PyObject *PyFunction_GET_ANNOTATIONS(PyObject *op) + + These functions are similar to their ``PyFunction_Get*`` counterparts, but + do not do type checking. Passing anything other than an instance of + :c:data:`PyFunction_Type` is undefined behavior. + + .. c:function:: int PyFunction_AddWatcher(PyFunction_WatchCallback callback) Register *callback* as a function watcher for the current interpreter. diff --git a/Doc/data/refcounts.dat b/Doc/data/refcounts.dat index 99cc823c0c3772..144c5608e07426 100644 --- a/Doc/data/refcounts.dat +++ b/Doc/data/refcounts.dat @@ -963,21 +963,45 @@ PyFunction_Check:PyObject*:o:0: PyFunction_GetAnnotations:PyObject*::0: PyFunction_GetAnnotations:PyObject*:op:0: +PyFunction_GET_ANNOTATIONS:PyObject*::0: +PyFunction_GET_ANNOTATIONS:PyObject*:op:0: + PyFunction_GetClosure:PyObject*::0: PyFunction_GetClosure:PyObject*:op:0: +PyFunction_GET_CLOSURE:PyObject*::0: +PyFunction_GET_CLOSURE:PyObject*:op:0: + PyFunction_GetCode:PyObject*::0: PyFunction_GetCode:PyObject*:op:0: +PyFunction_GET_CODE:PyObject*::0: +PyFunction_GET_CODE:PyObject*:op:0: + PyFunction_GetDefaults:PyObject*::0: PyFunction_GetDefaults:PyObject*:op:0: +PyFunction_GET_DEFAULTS:PyObject*::0: +PyFunction_GET_DEFAULTS:PyObject*:op:0: + +PyFunction_GetKwDefaults:PyObject*::0: +PyFunction_GetKwDefaults:PyObject*:op:0: + +PyFunction_GET_KW_DEFAULTS:PyObject*::0: +PyFunction_GET_KW_DEFAULTS:PyObject*:op:0: + PyFunction_GetGlobals:PyObject*::0: PyFunction_GetGlobals:PyObject*:op:0: +PyFunction_GET_GLOBALS:PyObject*::0: +PyFunction_GET_GLOBALS:PyObject*:op:0: + PyFunction_GetModule:PyObject*::0: PyFunction_GetModule:PyObject*:op:0: +PyFunction_GET_MODULE:PyObject*::0: +PyFunction_GET_MODULE:PyObject*:op:0: + PyFunction_New:PyObject*::+1: PyFunction_New:PyObject*:code:+1: PyFunction_New:PyObject*:globals:+1: From c5c385104e281fa801ba4539748ebf5ce4d78bd2 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Wed, 25 Jun 2025 11:54:42 +0300 Subject: [PATCH 037/212] gh-135839: Fix `module_traverse` and `module_clear` in `_interpchannelsmodule` (#135840) --- Modules/_interpchannelsmodule.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Modules/_interpchannelsmodule.c b/Modules/_interpchannelsmodule.c index ee5e2b005e0a5b..9c1f8615161275 100644 --- a/Modules/_interpchannelsmodule.c +++ b/Modules/_interpchannelsmodule.c @@ -3614,8 +3614,7 @@ module_traverse(PyObject *mod, visitproc visit, void *arg) { module_state *state = get_module_state(mod); assert(state != NULL); - (void)traverse_module_state(state, visit, arg); - return 0; + return traverse_module_state(state, visit, arg); } static int @@ -3625,8 +3624,7 @@ module_clear(PyObject *mod) assert(state != NULL); // Now we clear the module state. - (void)clear_module_state(state); - return 0; + return clear_module_state(state); } static void From 37b7efcbaef307c5e8f995dac59fa3122770ff4e Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Wed, 25 Jun 2025 13:03:05 +0200 Subject: [PATCH 038/212] gh-135755: Move SPECIAL_ constants to a private header (GH-135922) Macros without a `Py`/`_Py` prefix should not be defined in public headers. --- Include/ceval.h | 7 ------- Include/internal/pycore_ceval.h | 7 +++++++ Modules/_opcode.c | 2 +- Python/ceval.c | 2 +- Python/codegen.c | 1 + 5 files changed, 10 insertions(+), 9 deletions(-) diff --git a/Include/ceval.h b/Include/ceval.h index 32ab38972e548f..e9df8684996e23 100644 --- a/Include/ceval.h +++ b/Include/ceval.h @@ -133,13 +133,6 @@ PyAPI_FUNC(void) PyEval_ReleaseThread(PyThreadState *tstate); #define FVS_MASK 0x4 #define FVS_HAVE_SPEC 0x4 -/* Special methods used by LOAD_SPECIAL */ -#define SPECIAL___ENTER__ 0 -#define SPECIAL___EXIT__ 1 -#define SPECIAL___AENTER__ 2 -#define SPECIAL___AEXIT__ 3 -#define SPECIAL_MAX 3 - #ifndef Py_LIMITED_API # define Py_CPYTHON_CEVAL_H # include "cpython/ceval.h" diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index 18623cc8f1c945..cc2defbdf77821 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -366,6 +366,13 @@ extern int _PyRunRemoteDebugger(PyThreadState *tstate); PyAPI_FUNC(_PyStackRef) _PyForIter_VirtualIteratorNext(PyThreadState* tstate, struct _PyInterpreterFrame* frame, _PyStackRef iter, _PyStackRef *index_ptr); +/* Special methods used by LOAD_SPECIAL */ +#define SPECIAL___ENTER__ 0 +#define SPECIAL___EXIT__ 1 +#define SPECIAL___AENTER__ 2 +#define SPECIAL___AEXIT__ 3 +#define SPECIAL_MAX 3 + #ifdef __cplusplus } #endif diff --git a/Modules/_opcode.c b/Modules/_opcode.c index c295f7b3152577..ef271b6ef566b9 100644 --- a/Modules/_opcode.c +++ b/Modules/_opcode.c @@ -5,7 +5,7 @@ #include "Python.h" #include "compile.h" #include "opcode.h" -#include "pycore_ceval.h" +#include "pycore_ceval.h" // SPECIAL_MAX #include "pycore_code.h" #include "pycore_compile.h" #include "pycore_intrinsics.h" diff --git a/Python/ceval.c b/Python/ceval.c index 3da6f61f5acde5..d1de4875656fd3 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -8,7 +8,7 @@ #include "pycore_backoff.h" #include "pycore_call.h" // _PyObject_CallNoArgs() #include "pycore_cell.h" // PyCell_GetRef() -#include "pycore_ceval.h" +#include "pycore_ceval.h" // SPECIAL___ENTER__ #include "pycore_code.h" #include "pycore_dict.h" #include "pycore_emscripten_signal.h" // _Py_CHECK_EMSCRIPTEN_SIGNALS diff --git a/Python/codegen.c b/Python/codegen.c index a8744a533e63c7..317b2e881e6818 100644 --- a/Python/codegen.c +++ b/Python/codegen.c @@ -28,6 +28,7 @@ #include "pycore_pystate.h" // _Py_GetConfig() #include "pycore_symtable.h" // PySTEntryObject #include "pycore_unicodeobject.h" // _PyUnicode_EqualToASCIIString +#include "pycore_ceval.h" // SPECIAL___ENTER__ #define NEED_OPCODE_METADATA #include "pycore_opcode_metadata.h" // _PyOpcode_opcode_metadata, _PyOpcode_num_popped/pushed From cc611f8c99bbffd8427df7b2c331f47eb1e88bb0 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Wed, 25 Jun 2025 13:51:32 +0200 Subject: [PATCH 039/212] gh-131591: Make --without-remote-debug work (GH-135925) The feature is checked using `defined(Py_REMOTE_DEBUG)`; defining the macro (even as `0`) enables it. --- configure | 3 --- configure.ac | 2 -- 2 files changed, 5 deletions(-) diff --git a/configure b/configure index 8334e247c3e1bb..50223d81cd976e 100755 --- a/configure +++ b/configure @@ -29933,9 +29933,6 @@ printf "%s\n" "#define Py_REMOTE_DEBUG 1" >>confdefs.h { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } else - -printf "%s\n" "#define Py_REMOTE_DEBUG 0" >>confdefs.h - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } fi diff --git a/configure.ac b/configure.ac index 82fb2d8c492d48..f58e16c774f4aa 100644 --- a/configure.ac +++ b/configure.ac @@ -7168,8 +7168,6 @@ if test "$with_remote_debug" = yes; then [Define if you want to enable remote debugging support.]) AC_MSG_RESULT([yes]) else - AC_DEFINE([Py_REMOTE_DEBUG], [0], - [Define if you want to enable remote debugging support.]) AC_MSG_RESULT([no]) fi From abb63982ace58cedca9b9da7e38d84b2a6a53577 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Wed, 25 Jun 2025 17:17:02 +0300 Subject: [PATCH 040/212] gh-135839: Fix `module_traverse` and `module_clear` in subinterp modules (#135937) --- Modules/_interpqueuesmodule.c | 6 ++---- Modules/_interpretersmodule.c | 6 ++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/Modules/_interpqueuesmodule.c b/Modules/_interpqueuesmodule.c index de06b8b41fe585..e5afe746f90bdc 100644 --- a/Modules/_interpqueuesmodule.c +++ b/Modules/_interpqueuesmodule.c @@ -1952,8 +1952,7 @@ static int module_traverse(PyObject *mod, visitproc visit, void *arg) { module_state *state = get_module_state(mod); - (void)traverse_module_state(state, visit, arg); - return 0; + return traverse_module_state(state, visit, arg); } static int @@ -1962,8 +1961,7 @@ module_clear(PyObject *mod) module_state *state = get_module_state(mod); // Now we clear the module state. - (void)clear_module_state(state); - return 0; + return clear_module_state(state); } static void diff --git a/Modules/_interpretersmodule.c b/Modules/_interpretersmodule.c index b920c32474f4e6..e7feaa7f186aee 100644 --- a/Modules/_interpretersmodule.c +++ b/Modules/_interpretersmodule.c @@ -1706,8 +1706,7 @@ module_traverse(PyObject *mod, visitproc visit, void *arg) { module_state *state = get_module_state(mod); assert(state != NULL); - (void)traverse_module_state(state, visit, arg); - return 0; + return traverse_module_state(state, visit, arg); } static int @@ -1715,8 +1714,7 @@ module_clear(PyObject *mod) { module_state *state = get_module_state(mod); assert(state != NULL); - (void)clear_module_state(state); - return 0; + return clear_module_state(state); } static void From 2d752831ab06f1695547254be42de0c52957cb0c Mon Sep 17 00:00:00 2001 From: HarryLHW <123lhw321@gmail.com> Date: Wed, 25 Jun 2025 22:24:58 +0800 Subject: [PATCH 041/212] Docs: Add cross-reference for `positional_item` in the `calls` productionlist (GH-129977) Add missing hyperlink for `positional_item` --- Doc/reference/expressions.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/reference/expressions.rst b/Doc/reference/expressions.rst index 429b3cd1f006a2..17f39aaf5f57cd 100644 --- a/Doc/reference/expressions.rst +++ b/Doc/reference/expressions.rst @@ -1023,7 +1023,7 @@ series of :term:`arguments `: : ["," `keywords_arguments`] : | `starred_and_keywords` ["," `keywords_arguments`] : | `keywords_arguments` - positional_arguments: positional_item ("," positional_item)* + positional_arguments: `positional_item` ("," `positional_item`)* positional_item: `assignment_expression` | "*" `expression` starred_and_keywords: ("*" `expression` | `keyword_item`) : ("," "*" `expression` | "," `keyword_item`)* From d8d537a869c7fcf96c6a3d8f69137b1cc56760b2 Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Wed, 25 Jun 2025 23:02:53 +0800 Subject: [PATCH 042/212] gh-135927: Fix MSVC Clatest C builds (GH-135935) --- Include/pymacro.h | 4 ++-- Include/pyport.h | 3 ++- .../next/Build/2025-06-25-13-27-14.gh-issue-135927.iCNPQc.rst | 1 + Modules/mathmodule.c | 2 +- 4 files changed, 6 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Build/2025-06-25-13-27-14.gh-issue-135927.iCNPQc.rst diff --git a/Include/pymacro.h b/Include/pymacro.h index bfe660e8303d0f..b2886ddac5d17e 100644 --- a/Include/pymacro.h +++ b/Include/pymacro.h @@ -73,14 +73,14 @@ # else # define _Py_ALIGNED_DEF(N, T) alignas(N) alignas(T) T # endif +# elif defined(_MSC_VER) +# define _Py_ALIGNED_DEF(N, T) __declspec(align(N)) T # elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L # define _Py_ALIGNED_DEF(N, T) alignas(N) alignas(T) T # elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L # define _Py_ALIGNED_DEF(N, T) _Alignas(N) _Alignas(T) T # elif (defined(__GNUC__) || defined(__clang__)) # define _Py_ALIGNED_DEF(N, T) __attribute__((aligned(N))) T -# elif defined(_MSC_VER) -# define _Py_ALIGNED_DEF(N, T) __declspec(align(N)) T # else # define _Py_ALIGNED_DEF(N, T) _Alignas(N) _Alignas(T) T # endif diff --git a/Include/pyport.h b/Include/pyport.h index 3eac119bf8e8d8..73f071c41a6687 100644 --- a/Include/pyport.h +++ b/Include/pyport.h @@ -49,7 +49,8 @@ // Static inline functions should use _Py_NULL rather than using directly NULL // to prevent C++ compiler warnings. On C23 and newer and on C++11 and newer, // _Py_NULL is defined as nullptr. -#if (defined (__STDC_VERSION__) && __STDC_VERSION__ > 201710L) \ +#if (defined(__GNUC__) || defined(__clang__)) && \ + (defined (__STDC_VERSION__) && __STDC_VERSION__ > 201710L) \ || (defined(__cplusplus) && __cplusplus >= 201103) # define _Py_NULL nullptr #else diff --git a/Misc/NEWS.d/next/Build/2025-06-25-13-27-14.gh-issue-135927.iCNPQc.rst b/Misc/NEWS.d/next/Build/2025-06-25-13-27-14.gh-issue-135927.iCNPQc.rst new file mode 100644 index 00000000000000..21a2c87d3448c1 --- /dev/null +++ b/Misc/NEWS.d/next/Build/2025-06-25-13-27-14.gh-issue-135927.iCNPQc.rst @@ -0,0 +1 @@ +Fix building with MSVC when passing option ``/std:clatest``. diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index bbbb49115681de..1837de4735c176 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -3148,7 +3148,7 @@ static PyObject * math_issubnormal_impl(PyObject *module, double x) /*[clinic end generated code: output=4e76ac98ddcae761 input=9a20aba7107d0d95]*/ { -#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L +#if !defined(_MSC_VER) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L return PyBool_FromLong(issubnormal(x)); #else return PyBool_FromLong(isfinite(x) && x && !isnormal(x)); From b7de590e52c8f03846338c702e702c7a2f8e4ca8 Mon Sep 17 00:00:00 2001 From: Joseph Tibbertsma Date: Wed, 25 Jun 2025 09:41:36 -0700 Subject: [PATCH 043/212] Fix needless spinning in `_PyMutex_LockTimed` with zero timeout (gh-135872) The free threading build could spin unnecessarily on `_Py_yield()` if the initial compare and swap failed. --- .../2025-06-23-18-08-32.gh-issue-135871.50C528.rst | 2 ++ Python/lock.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-18-08-32.gh-issue-135871.50C528.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-18-08-32.gh-issue-135871.50C528.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-18-08-32.gh-issue-135871.50C528.rst new file mode 100644 index 00000000000000..ce29ddecefe17f --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-23-18-08-32.gh-issue-135871.50C528.rst @@ -0,0 +1,2 @@ +Non-blocking mutex lock attempts now return immediately when the lock is busy +instead of briefly spinning in the :term:`free threading` build. diff --git a/Python/lock.c b/Python/lock.c index b125ad0c9e356d..ea6ac00bfeccb4 100644 --- a/Python/lock.c +++ b/Python/lock.c @@ -58,7 +58,7 @@ _PyMutex_LockTimed(PyMutex *m, PyTime_t timeout, _PyLockFlags flags) return PY_LOCK_ACQUIRED; } } - else if (timeout == 0) { + if (timeout == 0) { return PY_LOCK_FAILURE; } From e613aeec3291b212a02806a942d16fbc08875e79 Mon Sep 17 00:00:00 2001 From: Rob Reynolds Date: Wed, 25 Jun 2025 11:40:00 -0600 Subject: [PATCH 044/212] Docs: Fix indentation in `slice` class of `functions.rst` (GH-134393) Paragraph should not be under `slice.step`. It applies to the whole class. --------- Co-authored-by: Rob Reynolds <13379223+reynoldsnlp@users.noreply.github.com> --- Doc/library/functions.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index 2ecce3dba5a0b9..80bd1275973f8d 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -1839,15 +1839,15 @@ are always available. They are listed here in alphabetical order. ``range(start, stop, step)``. The *start* and *step* arguments default to ``None``. + Slice objects have read-only data attributes :attr:`!start`, + :attr:`!stop`, and :attr:`!step` which merely return the argument + values (or their default). They have no other explicit functionality; + however, they are used by NumPy and other third-party packages. + .. attribute:: slice.start .. attribute:: slice.stop .. attribute:: slice.step - Slice objects have read-only data attributes :attr:`!start`, - :attr:`!stop`, and :attr:`!step` which merely return the argument - values (or their default). They have no other explicit functionality; - however, they are used by NumPy and other third-party packages. - Slice objects are also generated when extended indexing syntax is used. For example: ``a[start:stop:step]`` or ``a[start:stop, i]``. See :func:`itertools.islice` for an alternate version that returns an From 4f577c6670863bd99f5b221a51e670f679e059e7 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Thu, 26 Jun 2025 04:01:25 +0100 Subject: [PATCH 045/212] gh-125142: remove duplicated import in `Lib/pydoc.py` (gh-135215) --- Lib/pydoc.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/Lib/pydoc.py b/Lib/pydoc.py index 7528178fdcae97..d508fb70ea429e 100644 --- a/Lib/pydoc.py +++ b/Lib/pydoc.py @@ -1812,7 +1812,6 @@ def writedocs(dir, pkgpath='', done=None): def _introdoc(): - import textwrap ver = '%d.%d' % sys.version_info[:2] if os.environ.get('PYTHON_BASIC_REPL'): pyrepl_keys = '' @@ -2170,7 +2169,6 @@ def showtopic(self, topic, more_xrefs=''): if more_xrefs: xrefs = (xrefs or '') + ' ' + more_xrefs if xrefs: - import textwrap text = 'Related help topics: ' + ', '.join(xrefs.split()) + '\n' wrapped_text = textwrap.wrap(text, 72) doc += '\n%s\n' % '\n'.join(wrapped_text) From 38ecbf9d4e03904ae7e1b37cea53df6bff145a1d Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 25 Jun 2025 20:03:24 -0700 Subject: [PATCH 046/212] Add whatsnew text for warnings module changes. (gh-135869) --- Doc/whatsnew/3.14.rst | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index cbca720b75e96c..a74d414ae4bb70 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -978,6 +978,23 @@ be specified by the build backend, as it will no longer be determined automatically by the C compiler. For a running interpreter, the setting that was used at compile time can be found using :func:`sysconfig.get_config_var`. +A new flag has been added, :data:`~sys.flags.context_aware_warnings`. This +flag defaults to true for the free-threaded build and false for the GIL-enabled +build. If the flag is true then the :class:`warnings.catch_warnings` context +manager uses a context variable for warning filters. This makes the context +manager behave predicably when used with multiple threads or asynchronous +tasks. + +A new flag has been added, :data:`~sys.flags.thread_inherit_context`. This flag +defaults to true for the free-threaded build and false for the GIL-enabled +build. If the flag is true then threads created with :class:`threading.Thread` +start with a copy of the :class:`~contextvars.Context()` of the caller of +:meth:`~threading.Thread.start`. Most significantly, this makes the warning +filtering context established by :class:`~warnings.catch_warnings` be +"inherited" by threads (or asyncio tasks) started within that context. It also +affects other modules that use context variables, such as the :mod:`decimal` +context manager. + .. _whatsnew314-pyrepl-highlighting: @@ -1028,6 +1045,18 @@ Please report any bugs or major performance regressions that you encounter! .. seealso:: :pep:`744` +Concurrent safe warnings control +-------------------------------- + +The :class:`warnings.catch_warnings` context manager will now optionally +use a context variable for warning filters. This is enabled by setting +the :data:`~sys.flags.context_aware_warnings` flag, either with the ``-X`` +command-line option or an environment variable. This gives predicable +warnings control when using :class:`~warnings.catch_warnings` combined with +multiple threads or asynchronous tasks. The flag defaults to true for the +free-threaded build and false for the GIL-enabled build. + +(Contributed by Neil Schemenauer and Kumar Aditya in :gh:`130010`.) Other language changes ====================== From cb689a3bb6e018306d152a2029dbe80f0287e701 Mon Sep 17 00:00:00 2001 From: Terry Jan Reedy Date: Wed, 25 Jun 2025 23:44:08 -0400 Subject: [PATCH 047/212] gh-135956: Remove duplicate word in _pydatetime docstring (#135957) _pydatetime.isoformat docstring repeats 'giving'. --- Lib/_pydatetime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/_pydatetime.py b/Lib/_pydatetime.py index 71f619024e570d..bc35823f70144e 100644 --- a/Lib/_pydatetime.py +++ b/Lib/_pydatetime.py @@ -2164,7 +2164,7 @@ def isoformat(self, sep='T', timespec='auto'): By default, the fractional part is omitted if self.microsecond == 0. If self.tzinfo is not None, the UTC offset is also attached, giving - giving a full format of 'YYYY-MM-DD HH:MM:SS.mmmmmm+HH:MM'. + a full format of 'YYYY-MM-DD HH:MM:SS.mmmmmm+HH:MM'. Optional argument sep specifies the separator between date and time, default 'T'. From 6bdea9ad6150b0e02f04c942640b0072e92435cc Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Thu, 26 Jun 2025 09:25:41 +0200 Subject: [PATCH 048/212] gh-131591: Add Py_ prefix to MAX_SCRIPT_PATH_SIZE; remove unprefixed struct tag (GH-135924) Names/macros defined in public headers should have `Py`/`_Py` prefixes. --- Include/cpython/pystate.h | 6 +++--- Include/internal/pycore_debug_offsets.h | 2 +- Modules/posixmodule.c | 3 ++- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index 54d7e62292966e..be582122118e44 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -28,10 +28,10 @@ typedef int (*Py_tracefunc)(PyObject *, PyFrameObject *, int, PyObject *); #define PyTrace_OPCODE 7 /* Remote debugger support */ -#define MAX_SCRIPT_PATH_SIZE 512 -typedef struct _remote_debugger_support { +#define Py_MAX_SCRIPT_PATH_SIZE 512 +typedef struct { int32_t debugger_pending_call; - char debugger_script_path[MAX_SCRIPT_PATH_SIZE]; + char debugger_script_path[Py_MAX_SCRIPT_PATH_SIZE]; } _PyRemoteDebuggerSupport; typedef struct _err_stackitem { diff --git a/Include/internal/pycore_debug_offsets.h b/Include/internal/pycore_debug_offsets.h index ce3fcb109f49f7..1b59fa2ef60014 100644 --- a/Include/internal/pycore_debug_offsets.h +++ b/Include/internal/pycore_debug_offsets.h @@ -368,7 +368,7 @@ typedef struct _Py_DebugOffsets { .remote_debugging_enabled = offsetof(PyInterpreterState, config.remote_debug), \ .debugger_pending_call = offsetof(_PyRemoteDebuggerSupport, debugger_pending_call), \ .debugger_script_path = offsetof(_PyRemoteDebuggerSupport, debugger_script_path), \ - .debugger_script_path_size = MAX_SCRIPT_PATH_SIZE, \ + .debugger_script_path_size = Py_MAX_SCRIPT_PATH_SIZE, \ }, \ } diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 7dc5ef39a566e4..b570f81b7cf7c2 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -685,7 +685,8 @@ static void reset_remotedebug_data(PyThreadState *tstate) { tstate->remote_debugger_support.debugger_pending_call = 0; - memset(tstate->remote_debugger_support.debugger_script_path, 0, MAX_SCRIPT_PATH_SIZE); + memset(tstate->remote_debugger_support.debugger_script_path, 0, + Py_MAX_SCRIPT_PATH_SIZE); } From 55e0336e774f4f2728c14cd78b818ded664bf7ab Mon Sep 17 00:00:00 2001 From: Peter Bierma Date: Thu, 26 Jun 2025 05:43:08 -0400 Subject: [PATCH 049/212] gh-135755: Move `PyFunction_GET_BUILTINS` to the private API (GH-135938) --- Include/cpython/funcobject.h | 5 ----- Include/internal/pycore_function.h | 5 +++++ Modules/_testinternalcapi.c | 5 +++-- Objects/funcobject.c | 2 +- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/Include/cpython/funcobject.h b/Include/cpython/funcobject.h index 18249b95befe65..598cd330bc9ca9 100644 --- a/Include/cpython/funcobject.h +++ b/Include/cpython/funcobject.h @@ -97,11 +97,6 @@ static inline PyObject* PyFunction_GET_GLOBALS(PyObject *func) { } #define PyFunction_GET_GLOBALS(func) PyFunction_GET_GLOBALS(_PyObject_CAST(func)) -static inline PyObject* PyFunction_GET_BUILTINS(PyObject *func) { - return _PyFunction_CAST(func)->func_builtins; -} -#define PyFunction_GET_BUILTINS(func) PyFunction_GET_BUILTINS(_PyObject_CAST(func)) - static inline PyObject* PyFunction_GET_MODULE(PyObject *func) { return _PyFunction_CAST(func)->func_module; } diff --git a/Include/internal/pycore_function.h b/Include/internal/pycore_function.h index a30d52d49bdc4d..6e1209659565a3 100644 --- a/Include/internal/pycore_function.h +++ b/Include/internal/pycore_function.h @@ -41,6 +41,11 @@ extern PyObject *_Py_set_function_type_params( PyAPI_FUNC(int) _PyFunction_VerifyStateless(PyThreadState *, PyObject *); +static inline PyObject* _PyFunction_GET_BUILTINS(PyObject *func) { + return _PyFunction_CAST(func)->func_builtins; +} +#define _PyFunction_GET_BUILTINS(func) _PyFunction_GET_BUILTINS(_PyObject_CAST(func)) + #ifdef __cplusplus } diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 804cb4e4d1c8ee..fdf22a0c994d3a 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -21,6 +21,7 @@ #include "pycore_fileutils.h" // _Py_normpath() #include "pycore_flowgraph.h" // _PyCompile_OptimizeCfg() #include "pycore_frame.h" // _PyInterpreterFrame +#include "pycore_function.h" // _PyFunction_GET_BUILTINS #include "pycore_gc.h" // PyGC_Head #include "pycore_hashtable.h" // _Py_hashtable_new() #include "pycore_import.h" // _PyImport_ClearExtension() @@ -1022,7 +1023,7 @@ get_code_var_counts(PyObject *self, PyObject *_args, PyObject *_kwargs) globalsns = PyFunction_GET_GLOBALS(codearg); } if (builtinsns == NULL) { - builtinsns = PyFunction_GET_BUILTINS(codearg); + builtinsns = _PyFunction_GET_BUILTINS(codearg); } codearg = PyFunction_GET_CODE(codearg); } @@ -1190,7 +1191,7 @@ verify_stateless_code(PyObject *self, PyObject *args, PyObject *kwargs) globalsns = PyFunction_GET_GLOBALS(codearg); } if (builtinsns == NULL) { - builtinsns = PyFunction_GET_BUILTINS(codearg); + builtinsns = _PyFunction_GET_BUILTINS(codearg); } codearg = PyFunction_GET_CODE(codearg); } diff --git a/Objects/funcobject.c b/Objects/funcobject.c index f87b0e5d8f1e47..f8dd10a346d613 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -1256,7 +1256,7 @@ _PyFunction_VerifyStateless(PyThreadState *tstate, PyObject *func) return -1; } // Check the builtins. - PyObject *builtinsns = PyFunction_GET_BUILTINS(func); + PyObject *builtinsns = _PyFunction_GET_BUILTINS(func); if (builtinsns != NULL && !PyDict_Check(builtinsns)) { _PyErr_Format(tstate, PyExc_TypeError, "unsupported builtins %R", builtinsns); From 5f3f5f97fd58d71223aa02bac31ce0a3021b0c06 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Thu, 26 Jun 2025 11:48:37 +0200 Subject: [PATCH 050/212] gh-125206: Make _Py_FFI_SUPPORT_C_COMPLEX private (GH-135932) --- Modules/_ctypes/_ctypes_test.c | 4 ++-- Modules/_ctypes/cfield.c | 4 ++-- Modules/_ctypes/ctypes.h | 2 +- configure | 2 +- configure.ac | 2 +- pyconfig.h.in | 6 +++--- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Modules/_ctypes/_ctypes_test.c b/Modules/_ctypes/_ctypes_test.c index d28e5708b44933..66338805007853 100644 --- a/Modules/_ctypes/_ctypes_test.c +++ b/Modules/_ctypes/_ctypes_test.c @@ -23,7 +23,7 @@ # define _Py_thread_local __thread #endif -#if defined(Py_FFI_SUPPORT_C_COMPLEX) +#if defined(_Py_FFI_SUPPORT_C_COMPLEX) # include // csqrt() # undef I // for _ctypes_test_generated.c.h #endif @@ -457,7 +457,7 @@ EXPORT(double) my_sqrt(double a) return sqrt(a); } -#if defined(Py_FFI_SUPPORT_C_COMPLEX) +#if defined(_Py_FFI_SUPPORT_C_COMPLEX) EXPORT(double complex) my_csqrt(double complex a) { return csqrt(a); diff --git a/Modules/_ctypes/cfield.c b/Modules/_ctypes/cfield.c index 163b92642615e5..547e2471a1cbc0 100644 --- a/Modules/_ctypes/cfield.c +++ b/Modules/_ctypes/cfield.c @@ -759,7 +759,7 @@ d_get(void *ptr, Py_ssize_t size) return PyFloat_FromDouble(val); } -#if defined(Py_FFI_SUPPORT_C_COMPLEX) +#if defined(_Py_FFI_SUPPORT_C_COMPLEX) /* We don't use _Complex types here, using arrays instead, as the C11+ standard says: "Each complex type has the same representation and alignment @@ -1599,7 +1599,7 @@ for base_code, base_c_type in [ /////////////////////////////////////////////////////////////////////////// TABLE_ENTRY_SW(d, &ffi_type_double); -#if defined(Py_FFI_SUPPORT_C_COMPLEX) +#if defined(_Py_FFI_SUPPORT_C_COMPLEX) if (Py_FFI_COMPLEX_AVAILABLE) { TABLE_ENTRY(D, &ffi_type_complex_double); TABLE_ENTRY(F, &ffi_type_complex_float); diff --git a/Modules/_ctypes/ctypes.h b/Modules/_ctypes/ctypes.h index 6a45c11e61af5c..5b4f97d43b8721 100644 --- a/Modules/_ctypes/ctypes.h +++ b/Modules/_ctypes/ctypes.h @@ -23,7 +23,7 @@ // Do we support C99 complex types in ffi? // For Apple's libffi, this must be determined at runtime (see gh-128156). -#if defined(Py_FFI_SUPPORT_C_COMPLEX) +#if defined(_Py_FFI_SUPPORT_C_COMPLEX) # if USING_APPLE_OS_LIBFFI && defined(__has_builtin) # if __has_builtin(__builtin_available) # define Py_FFI_COMPLEX_AVAILABLE __builtin_available(macOS 10.15, *) diff --git a/configure b/configure index 50223d81cd976e..43b36d9231e341 100755 --- a/configure +++ b/configure @@ -15742,7 +15742,7 @@ fi printf "%s\n" "$ac_cv_ffi_complex_double_supported" >&6; } if test "$ac_cv_ffi_complex_double_supported" = "yes"; then -printf "%s\n" "#define Py_FFI_SUPPORT_C_COMPLEX 1" >>confdefs.h +printf "%s\n" "#define _Py_FFI_SUPPORT_C_COMPLEX 1" >>confdefs.h fi diff --git a/configure.ac b/configure.ac index f58e16c774f4aa..e77696e3a4e025 100644 --- a/configure.ac +++ b/configure.ac @@ -4163,7 +4163,7 @@ int main(void) [ac_cv_ffi_complex_double_supported=no]) ])]) if test "$ac_cv_ffi_complex_double_supported" = "yes"; then - AC_DEFINE([Py_FFI_SUPPORT_C_COMPLEX], [1], + AC_DEFINE([_Py_FFI_SUPPORT_C_COMPLEX], [1], [Defined if _Complex C type can be used with libffi.]) fi diff --git a/pyconfig.h.in b/pyconfig.h.in index 65a2c55217c258..d4f1da7fb10776 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -1736,9 +1736,6 @@ /* Defined if Python is built as a shared library. */ #undef Py_ENABLE_SHARED -/* Defined if _Complex C type can be used with libffi. */ -#undef Py_FFI_SUPPORT_C_COMPLEX - /* Define if you want to disable the GIL */ #undef Py_GIL_DISABLED @@ -2026,6 +2023,9 @@ /* Maximum length in bytes of a thread name */ #undef _PYTHREAD_NAME_MAXLEN +/* Defined if _Complex C type can be used with libffi. */ +#undef _Py_FFI_SUPPORT_C_COMPLEX + /* Define to force use of thread-safe errno, h_errno, and other functions */ #undef _REENTRANT From 8f8e6dfc42705a65780034f02111afc16df0429a Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Thu, 26 Jun 2025 13:05:01 +0200 Subject: [PATCH 051/212] gh-135755: Use private names (_Py*) for header file guards new in 3.14 (GH-135921) These are private API; let's name new ones accordingly. --- Include/audit.h | 10 +++++----- Include/cpython/audit.h | 2 +- Include/refcount.h | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Include/audit.h b/Include/audit.h index 793b7077e1027b..9be54ad4411096 100644 --- a/Include/audit.h +++ b/Include/audit.h @@ -1,5 +1,5 @@ -#ifndef Py_AUDIT_H -#define Py_AUDIT_H +#ifndef _Py_AUDIT_H +#define _Py_AUDIT_H #ifdef __cplusplus extern "C" { #endif @@ -18,13 +18,13 @@ PyAPI_FUNC(int) PySys_AuditTuple( #ifndef Py_LIMITED_API -# define Py_CPYTHON_AUDIT_H +# define _Py_CPYTHON_AUDIT_H # include "cpython/audit.h" -# undef Py_CPYTHON_AUDIT_H +# undef _Py_CPYTHON_AUDIT_H #endif #ifdef __cplusplus } #endif -#endif /* !Py_AUDIT_H */ +#endif /* !_Py_AUDIT_H */ diff --git a/Include/cpython/audit.h b/Include/cpython/audit.h index 3c5c7a8c06091d..536f9248632097 100644 --- a/Include/cpython/audit.h +++ b/Include/cpython/audit.h @@ -1,4 +1,4 @@ -#ifndef Py_CPYTHON_AUDIT_H +#ifndef _Py_CPYTHON_AUDIT_H # error "this header file must not be included directly" #endif diff --git a/Include/refcount.h b/Include/refcount.h index ebd1dba6d15e1a..034c453f449f5b 100644 --- a/Include/refcount.h +++ b/Include/refcount.h @@ -1,5 +1,5 @@ -#ifndef Py_REFCOUNT_H -#define Py_REFCOUNT_H +#ifndef _Py_REFCOUNT_H +#define _Py_REFCOUNT_H #ifdef __cplusplus extern "C" { #endif @@ -561,4 +561,4 @@ static inline PyObject* _Py_XNewRef(PyObject *obj) #ifdef __cplusplus } #endif -#endif // !Py_REFCOUNT_H +#endif // !_Py_REFCOUNT_H From 807d99d0bae0dc670da52e24321b2b989632fc2e Mon Sep 17 00:00:00 2001 From: Weilin Du <108666168+LamentXU123@users.noreply.github.com> Date: Thu, 26 Jun 2025 19:41:41 +0800 Subject: [PATCH 052/212] gh-135965: Delete duplicate word in isolating-extensions howto (#135964) Change use use to use. --- Doc/howto/isolating-extensions.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/howto/isolating-extensions.rst b/Doc/howto/isolating-extensions.rst index b2109b1503992b..fbc426ba1d7d9a 100644 --- a/Doc/howto/isolating-extensions.rst +++ b/Doc/howto/isolating-extensions.rst @@ -453,7 +453,7 @@ Avoiding ``PyObject_New`` GC-tracked objects need to be allocated using GC-aware functions. -If you use use :c:func:`PyObject_New` or :c:func:`PyObject_NewVar`: +If you use :c:func:`PyObject_New` or :c:func:`PyObject_NewVar`: - Get and call type's :c:member:`~PyTypeObject.tp_alloc` slot, if possible. That is, replace ``TYPE *o = PyObject_New(TYPE, typeobj)`` with:: From 11ce5c482afaf4af226ef89efb1a917aacc57b1a Mon Sep 17 00:00:00 2001 From: Dylan Date: Thu, 26 Jun 2025 08:02:50 -0500 Subject: [PATCH 053/212] gh-129958: New syntax error in format spec applies to both f-strings and t-strings (#135570) Co-authored-by: Tomas R. Co-authored-by: Lysandros Nikolaou --- Lib/test/test_tstring.py | 1 + .../2025-06-24-06-41-47.gh-issue-129958.EaJuS0.rst | 2 ++ Parser/lexer/lexer.c | 3 ++- 3 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-24-06-41-47.gh-issue-129958.EaJuS0.rst diff --git a/Lib/test/test_tstring.py b/Lib/test/test_tstring.py index e72a1ea54176d5..aabae38556735b 100644 --- a/Lib/test/test_tstring.py +++ b/Lib/test/test_tstring.py @@ -219,6 +219,7 @@ def test_syntax_errors(self): ("t'{lambda:1}'", "t-string: lambda expressions are not allowed " "without parentheses"), ("t'{x:{;}}'", "t-string: expecting a valid expression after '{'"), + ("t'{1:d\n}'", "t-string: newlines are not allowed in format specifiers") ): with self.subTest(case), self.assertRaisesRegex(SyntaxError, err): eval(case) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-24-06-41-47.gh-issue-129958.EaJuS0.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-24-06-41-47.gh-issue-129958.EaJuS0.rst new file mode 100644 index 00000000000000..70b3e99425df14 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-24-06-41-47.gh-issue-129958.EaJuS0.rst @@ -0,0 +1,2 @@ +Differentiate between t-strings and f-strings in syntax error for newlines +in format specifiers of single-quoted interpolated strings. diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c index 4d10bccf0a53f2..0a078dd594148c 100644 --- a/Parser/lexer/lexer.c +++ b/Parser/lexer/lexer.c @@ -1421,7 +1421,8 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct return MAKE_TOKEN( _PyTokenizer_syntaxerror( tok, - "f-string: newlines are not allowed in format specifiers for single quoted f-strings" + "%c-string: newlines are not allowed in format specifiers for single quoted %c-strings", + TOK_GET_STRING_PREFIX(tok), TOK_GET_STRING_PREFIX(tok) ) ); } From 9987678ac1be3f10d8a6991c00a83c55e3fb0a2d Mon Sep 17 00:00:00 2001 From: Connor Denihan <188690869+cdenihan@users.noreply.github.com> Date: Thu, 26 Jun 2025 09:27:25 -0400 Subject: [PATCH 054/212] gh-135110: Fix misleading `generator.close()` documentation (GH-135152) The documentation incorrectly stated that generator.close() 'raises' a GeneratorExit exception. This was misleading because the method doesn't raise the exception to the caller - it sends the exception internally to the generator and returns None. --- Doc/howto/functional.rst | 2 +- Doc/reference/expressions.rst | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Doc/howto/functional.rst b/Doc/howto/functional.rst index b4f3463afee812..78e56e0c64fb10 100644 --- a/Doc/howto/functional.rst +++ b/Doc/howto/functional.rst @@ -602,7 +602,7 @@ generators: raise an exception inside the generator; the exception is raised by the ``yield`` expression where the generator's execution is paused. -* :meth:`~generator.close` raises a :exc:`GeneratorExit` exception inside the +* :meth:`~generator.close` sends a :exc:`GeneratorExit` exception to the generator to terminate the iteration. On receiving this exception, the generator's code must either raise :exc:`GeneratorExit` or :exc:`StopIteration`; catching the exception and doing anything else is diff --git a/Doc/reference/expressions.rst b/Doc/reference/expressions.rst index 17f39aaf5f57cd..24544a055c3ed2 100644 --- a/Doc/reference/expressions.rst +++ b/Doc/reference/expressions.rst @@ -625,8 +625,10 @@ is already executing raises a :exc:`ValueError` exception. .. method:: generator.close() - Raises a :exc:`GeneratorExit` at the point where the generator function was - paused. If the generator function catches the exception and returns a + Raises a :exc:`GeneratorExit` exception at the point where the generator + function was paused (equivalent to calling ``throw(GeneratorExit)``). + The exception is raised by the yield expression where the generator was paused. + If the generator function catches the exception and returns a value, this value is returned from :meth:`close`. If the generator function is already closed, or raises :exc:`GeneratorExit` (by not catching the exception), :meth:`close` returns :const:`None`. If the generator yields a From 48e72c0cb65db684d4efd9dd75de3735d5e65bb2 Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Fri, 27 Jun 2025 02:18:32 +1200 Subject: [PATCH 055/212] gh-91555: add warning to docs about possibility of deadlock/infinite recursion (GH-135954) * gh-91555: add warning to docs about possibility of deadlock/infinite recursion Attempt to clarify in the documentation that care must be taken when using multiprocessing classes to implement logging since they have builtin internal logging, and hence may cause deadlock/infinite recursion. * Update Doc/library/logging.handlers.rst Co-authored-by: Vinay Sajip * Change whitespace. --------- Co-authored-by: Vinay Sajip --- Doc/library/logging.handlers.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Doc/library/logging.handlers.rst b/Doc/library/logging.handlers.rst index 8f3aa1dfdd0cde..d74ef73ee28497 100644 --- a/Doc/library/logging.handlers.rst +++ b/Doc/library/logging.handlers.rst @@ -1059,6 +1059,15 @@ possible, while any potentially slow operations (such as sending an email via .. note:: If you are using :mod:`multiprocessing`, you should avoid using :class:`~queue.SimpleQueue` and instead use :class:`multiprocessing.Queue`. + .. warning:: + + The :mod:`multiprocessing` module uses an internal logger created and + accessed via :meth:`~multiprocessing.get_logger`. + :class:`multiprocessing.Queue` will log ``DEBUG`` level messages upon + items being queued. If those log messages are processed by a + :class:`QueueHandler` using the same :class:`multiprocessing.Queue` instance, + it will cause a deadlock or infinite recursion. + .. method:: emit(record) Enqueues the result of preparing the LogRecord. Should an exception From 36de3effaeb69f4ef3f84e4672a6b2495f2298a9 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 26 Jun 2025 18:11:49 +0200 Subject: [PATCH 056/212] gh-135927: Check _MSC_VER to define _Py_NULL macro (#135987) --- Include/pyport.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Include/pyport.h b/Include/pyport.h index 73f071c41a6687..0675294d5bc3b1 100644 --- a/Include/pyport.h +++ b/Include/pyport.h @@ -49,9 +49,9 @@ // Static inline functions should use _Py_NULL rather than using directly NULL // to prevent C++ compiler warnings. On C23 and newer and on C++11 and newer, // _Py_NULL is defined as nullptr. -#if (defined(__GNUC__) || defined(__clang__)) && \ - (defined (__STDC_VERSION__) && __STDC_VERSION__ > 201710L) \ - || (defined(__cplusplus) && __cplusplus >= 201103) +#if !defined(_MSC_VER) && \ + ((defined (__STDC_VERSION__) && __STDC_VERSION__ > 201710L) \ + || (defined(__cplusplus) && __cplusplus >= 201103)) # define _Py_NULL nullptr #else # define _Py_NULL NULL From e51e7857fda75d0a5d950cb03f8254f53e9bff16 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Thu, 26 Jun 2025 21:20:07 +0100 Subject: [PATCH 057/212] IDLE: Update NEWS2x.txt with 2.7.0 release date (#129908) --- Lib/idlelib/NEWS2x.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/idlelib/NEWS2x.txt b/Lib/idlelib/NEWS2x.txt index 6751ca5f111b18..3721193007e59b 100644 --- a/Lib/idlelib/NEWS2x.txt +++ b/Lib/idlelib/NEWS2x.txt @@ -1,6 +1,6 @@ What's New in IDLE 2.7? (Merged into 3.1 before 2.7 release.) ======================= -*Release date: XX-XXX-2010* +*Release date: 03-Jul-2010* - idle.py modified and simplified to better support developing experimental versions of IDLE which are not installed in the standard location. From d6f50a23a39ac704656dedf57907fcebc31099f3 Mon Sep 17 00:00:00 2001 From: Nathan Korth Date: Thu, 26 Jun 2025 18:35:45 -0400 Subject: [PATCH 058/212] gh-135995: Fix missing char in palmos encoding (#135990) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 0x8b correctly encodes to ‹, but 0x9b was mistakenly marked as a control character instead of ›. --------- Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> Co-authored-by: Brian Schubert Co-authored-by: Terry Jan Reedy --- Lib/encodings/palmos.py | 2 +- .../next/Library/2025-06-26-17-28-49.gh-issue-135995.pPrDCt.rst | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-26-17-28-49.gh-issue-135995.pPrDCt.rst diff --git a/Lib/encodings/palmos.py b/Lib/encodings/palmos.py index c506d654523496..df164ca5b9549c 100644 --- a/Lib/encodings/palmos.py +++ b/Lib/encodings/palmos.py @@ -201,7 +201,7 @@ def getregentry(): '\u02dc' # 0x98 -> SMALL TILDE '\u2122' # 0x99 -> TRADE MARK SIGN '\u0161' # 0x9A -> LATIN SMALL LETTER S WITH CARON - '\x9b' # 0x9B -> + '\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK '\u0153' # 0x9C -> LATIN SMALL LIGATURE OE '\x9d' # 0x9D -> '\x9e' # 0x9E -> diff --git a/Misc/NEWS.d/next/Library/2025-06-26-17-28-49.gh-issue-135995.pPrDCt.rst b/Misc/NEWS.d/next/Library/2025-06-26-17-28-49.gh-issue-135995.pPrDCt.rst new file mode 100644 index 00000000000000..998b3cd85b1d5d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-26-17-28-49.gh-issue-135995.pPrDCt.rst @@ -0,0 +1 @@ +In the palmos encoding, make byte ``0x9b`` decode to ``›`` (U+203A - SINGLE RIGHT-POINTING ANGLE QUOTATION MARK). From 6d0ec7e37c37f061ade3dcd1767533ec3b2280cc Mon Sep 17 00:00:00 2001 From: Brian Schubert Date: Thu, 26 Jun 2025 20:00:19 -0400 Subject: [PATCH 059/212] Docs: Fix duplicate word typos (GH-135958) --- Doc/c-api/init.rst | 2 +- Doc/c-api/long.rst | 2 +- Doc/extending/newtypes_tutorial.rst | 2 +- Doc/library/ctypes.rst | 2 +- Doc/library/email.header.rst | 4 ++-- Doc/library/exceptions.rst | 2 +- Doc/library/faulthandler.rst | 2 +- Doc/library/mmap.rst | 2 +- Doc/library/pathlib.rst | 2 +- Doc/library/socketserver.rst | 2 +- Doc/library/threading.rst | 2 +- Doc/using/cmdline.rst | 4 ++-- Doc/whatsnew/3.13.rst | 2 +- 13 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Doc/c-api/init.rst b/Doc/c-api/init.rst index 3106bf9808f254..41fd4ea14ef12f 100644 --- a/Doc/c-api/init.rst +++ b/Doc/c-api/init.rst @@ -1250,7 +1250,7 @@ All of the following functions must be called after :c:func:`Py_Initialize`. .. c:function:: void PyInterpreterState_Clear(PyInterpreterState *interp) Reset all information in an interpreter state object. There must be - an :term:`attached thread state` for the the interpreter. + an :term:`attached thread state` for the interpreter. .. audit-event:: cpython.PyInterpreterState_Clear "" c.PyInterpreterState_Clear diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst index 25d9e62e387279..2d0bda76697e81 100644 --- a/Doc/c-api/long.rst +++ b/Doc/c-api/long.rst @@ -439,7 +439,7 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. All *n_bytes* of the buffer are written: large buffers are padded with zeroes. - If the returned value is greater than than *n_bytes*, the value was + If the returned value is greater than *n_bytes*, the value was truncated: as many of the lowest bits of the value as could fit are written, and the higher bits are ignored. This matches the typical behavior of a C-style downcast. diff --git a/Doc/extending/newtypes_tutorial.rst b/Doc/extending/newtypes_tutorial.rst index f14690de4f86e8..3bbee33bd50698 100644 --- a/Doc/extending/newtypes_tutorial.rst +++ b/Doc/extending/newtypes_tutorial.rst @@ -277,7 +277,7 @@ be an instance of a subclass. The explicit cast to ``CustomObject *`` above is needed because we defined ``Custom_dealloc`` to take a ``PyObject *`` argument, as the ``tp_dealloc`` function pointer expects to receive a ``PyObject *`` argument. - By assigning to the the ``tp_dealloc`` slot of a type, we declare + By assigning to the ``tp_dealloc`` slot of a type, we declare that it can only be called with instances of our ``CustomObject`` class, so the cast to ``(CustomObject *)`` is safe. This is object-oriented polymorphism, in C! diff --git a/Doc/library/ctypes.rst b/Doc/library/ctypes.rst index e00fe9c8145f12..846cece3761858 100644 --- a/Doc/library/ctypes.rst +++ b/Doc/library/ctypes.rst @@ -2965,7 +2965,7 @@ fields, or any other data types containing pointer type fields. .. attribute:: is_anonymous True if this field is anonymous, that is, it contains nested sub-fields - that should be be merged into a containing structure or union. + that should be merged into a containing structure or union. .. _ctypes-arrays-pointers: diff --git a/Doc/library/email.header.rst b/Doc/library/email.header.rst index c3392a62b8ee79..f49885b8785235 100644 --- a/Doc/library/email.header.rst +++ b/Doc/library/email.header.rst @@ -206,7 +206,7 @@ The :mod:`email.header` module also provides the following convenient functions. .. note:: - This function exists for for backwards compatibility only. For + This function exists for backwards compatibility only. For new code, we recommend using :class:`email.headerregistry.HeaderRegistry`. @@ -225,5 +225,5 @@ The :mod:`email.header` module also provides the following convenient functions. .. note:: - This function exists for for backwards compatibility only, and is + This function exists for backwards compatibility only, and is not recommended for use in new code. diff --git a/Doc/library/exceptions.rst b/Doc/library/exceptions.rst index bb72032891ea98..9806ae80905ca0 100644 --- a/Doc/library/exceptions.rst +++ b/Doc/library/exceptions.rst @@ -1048,7 +1048,7 @@ their subgroups based on the types of the contained exceptions. subclasses that need a different constructor signature need to override that rather than :meth:`~object.__init__`. For example, the following defines an exception group subclass which accepts an exit_code and - and constructs the group's message from it. :: + constructs the group's message from it. :: class Errors(ExceptionGroup): def __new__(cls, errors, exit_code): diff --git a/Doc/library/faulthandler.rst b/Doc/library/faulthandler.rst index 5058b85bffb15c..1977f4d3ba3916 100644 --- a/Doc/library/faulthandler.rst +++ b/Doc/library/faulthandler.rst @@ -90,7 +90,7 @@ An error will be printed instead of the stack. Additionally, some compilers do not support :term:`CPython's ` implementation of C stack dumps. As a result, a different error may be printed -instead of the stack, even if the the operating system supports dumping stacks. +instead of the stack, even if the operating system supports dumping stacks. .. note:: diff --git a/Doc/library/mmap.rst b/Doc/library/mmap.rst index 4e20c07331a220..8fca79b23e4e15 100644 --- a/Doc/library/mmap.rst +++ b/Doc/library/mmap.rst @@ -269,7 +269,7 @@ To map anonymous memory, -1 should be passed as the fileno along with the length Resizing a map created with *access* of :const:`ACCESS_READ` or :const:`ACCESS_COPY`, will raise a :exc:`TypeError` exception. - Resizing a map created with with *trackfd* set to ``False``, + Resizing a map created with *trackfd* set to ``False``, will raise a :exc:`ValueError` exception. **On Windows**: Resizing the map will raise an :exc:`OSError` if there are other diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 86351e65dc4ed6..47986a2d9602ee 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1985,7 +1985,7 @@ The :mod:`pathlib.types` module provides types for static type checking. If *follow_symlinks* is ``False``, return ``True`` only if the path is a file (without following symlinks); return ``False`` if the path - is a directory or other other non-file, or if it doesn't exist. + is a directory or other non-file, or if it doesn't exist. .. method:: is_symlink() diff --git a/Doc/library/socketserver.rst b/Doc/library/socketserver.rst index 753f12460b824b..7fb629f7d2f256 100644 --- a/Doc/library/socketserver.rst +++ b/Doc/library/socketserver.rst @@ -543,7 +543,7 @@ objects that simplify communication by providing the standard file interface):: The difference is that the ``readline()`` call in the second handler will call ``recv()`` multiple times until it encounters a newline character, while the -the first handler had to use a ``recv()`` loop to accumulate data until a +first handler had to use a ``recv()`` loop to accumulate data until a newline itself. If it had just used a single ``recv()`` without the loop it would just have returned what has been received so far from the client. TCP is stream based: data arrives in the order it was sent, but there no diff --git a/Doc/library/threading.rst b/Doc/library/threading.rst index 52fefd590daf18..cabb41442f8419 100644 --- a/Doc/library/threading.rst +++ b/Doc/library/threading.rst @@ -621,7 +621,7 @@ since it is impossible to detect the termination of alien threads. an error to :meth:`~Thread.join` a thread before it has been started and attempts to do so raise the same exception. - If an attempt is made to join a running daemonic thread in in late stages + If an attempt is made to join a running daemonic thread in late stages of :term:`Python finalization ` :meth:`!join` raises a :exc:`PythonFinalizationError`. diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index a5867b489e0053..cad49e2deeb46f 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -653,7 +653,7 @@ Miscellaneous options .. versionadded:: 3.13 * :samp:`-X thread_inherit_context={0,1}` causes :class:`~threading.Thread` - to, by default, use a copy of context of of the caller of + to, by default, use a copy of context of the caller of ``Thread.start()`` when starting. Otherwise, threads will start with an empty context. If unset, the value of this option defaults to ``1`` on free-threaded builds and to ``0`` otherwise. See also @@ -1284,7 +1284,7 @@ conflict. .. envvar:: PYTHON_THREAD_INHERIT_CONTEXT If this variable is set to ``1`` then :class:`~threading.Thread` will, - by default, use a copy of context of of the caller of ``Thread.start()`` + by default, use a copy of context of the caller of ``Thread.start()`` when starting. Otherwise, new threads will start with an empty context. If unset, this variable defaults to ``1`` on free-threaded builds and to ``0`` otherwise. See also :option:`-X thread_inherit_context<-X>`. diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 580a3d8154dee1..ef7c36d8539dfd 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -1996,7 +1996,7 @@ New Deprecations (Contributed by Alex Waygood in :gh:`105566` and :gh:`105570`.) * Deprecate the :func:`typing.no_type_check_decorator` decorator function, - to be removed in in Python 3.15. + to be removed in Python 3.15. After eight years in the :mod:`typing` module, it has yet to be supported by any major type checker. (Contributed by Alex Waygood in :gh:`106309`.) From ce39ffc0c741288bd32ef8d78d8435eb7a87b992 Mon Sep 17 00:00:00 2001 From: Russell Keith-Magee Date: Fri, 27 Jun 2025 12:46:49 +0800 Subject: [PATCH 060/212] gh-135966: Modify iOS testbed to make app_packages a site directory (#135967) The iOS testbed now treats the app_packages folder as a site folder. This ensures it is on the path, but also ensures any .pth files are processed on app startup. --- Doc/using/ios.rst | 11 ++-- ...-06-26-15-15-35.gh-issue-135966.EBpF8Y.rst | 1 + iOS/testbed/iOSTestbedTests/iOSTestbedTests.m | 51 +++++++++++++++---- 3 files changed, 50 insertions(+), 13 deletions(-) create mode 100644 Misc/NEWS.d/next/Tests/2025-06-26-15-15-35.gh-issue-135966.EBpF8Y.rst diff --git a/Doc/using/ios.rst b/Doc/using/ios.rst index 7d5c6331bef5ce..0fb28f8c866b02 100644 --- a/Doc/using/ios.rst +++ b/Doc/using/ios.rst @@ -298,9 +298,9 @@ To add Python to an iOS Xcode project: * Signal handlers (:c:member:`PyConfig.install_signal_handlers`) are *enabled*; * System logging (:c:member:`PyConfig.use_system_logger`) is *enabled* (optional, but strongly recommended; this is enabled by default); - * ``PYTHONHOME`` for the interpreter is configured to point at the + * :envvar:`PYTHONHOME` for the interpreter is configured to point at the ``python`` subfolder of your app's bundle; and - * The ``PYTHONPATH`` for the interpreter includes: + * The :envvar:`PYTHONPATH` for the interpreter includes: - the ``python/lib/python3.X`` subfolder of your app's bundle, - the ``python/lib/python3.X/lib-dynload`` subfolder of your app's bundle, and @@ -324,7 +324,12 @@ modules in your app, some additional steps will be required: the ``lib-dynload`` folder can be copied and adapted for this purpose. * If you're using a separate folder for third-party packages, ensure that folder - is included as part of the ``PYTHONPATH`` configuration in step 10. + is included as part of the :envvar:`PYTHONPATH` configuration in step 10. + +* If any of the folders that contain third-party packages will contain ``.pth`` + files, you should add that folder as a *site directory* (using + :meth:`site.addsitedir`), rather than adding to :envvar:`PYTHONPATH` or + :attr:`sys.path` directly. Testing a Python package ------------------------ diff --git a/Misc/NEWS.d/next/Tests/2025-06-26-15-15-35.gh-issue-135966.EBpF8Y.rst b/Misc/NEWS.d/next/Tests/2025-06-26-15-15-35.gh-issue-135966.EBpF8Y.rst new file mode 100644 index 00000000000000..8dc007431f3919 --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2025-06-26-15-15-35.gh-issue-135966.EBpF8Y.rst @@ -0,0 +1 @@ +The iOS testbed now handles the ``app_packages`` folder as a site directory. diff --git a/iOS/testbed/iOSTestbedTests/iOSTestbedTests.m b/iOS/testbed/iOSTestbedTests/iOSTestbedTests.m index dd6e76f9496fe0..b502a6eb277b0b 100644 --- a/iOS/testbed/iOSTestbedTests/iOSTestbedTests.m +++ b/iOS/testbed/iOSTestbedTests/iOSTestbedTests.m @@ -15,6 +15,11 @@ - (void)testPython { PyStatus status; PyPreConfig preconfig; PyConfig config; + PyObject *app_packages_path; + PyObject *method_args; + PyObject *result; + PyObject *site_module; + PyObject *site_addsitedir_attr; PyObject *sys_module; PyObject *sys_path_attr; NSArray *test_args; @@ -111,29 +116,55 @@ - (void)testPython { return; } - sys_module = PyImport_ImportModule("sys"); - if (sys_module == NULL) { - XCTFail(@"Could not import sys module"); + // Add app_packages as a site directory. This both adds to sys.path, + // and ensures that any .pth files in that directory will be executed. + site_module = PyImport_ImportModule("site"); + if (site_module == NULL) { + XCTFail(@"Could not import site module"); return; } - sys_path_attr = PyObject_GetAttrString(sys_module, "path"); - if (sys_path_attr == NULL) { - XCTFail(@"Could not access sys.path"); + site_addsitedir_attr = PyObject_GetAttrString(site_module, "addsitedir"); + if (site_addsitedir_attr == NULL || !PyCallable_Check(site_addsitedir_attr)) { + XCTFail(@"Could not access site.addsitedir"); return; } - // Add the app packages path path = [NSString stringWithFormat:@"%@/app_packages", resourcePath, nil]; NSLog(@"App packages path: %@", path); wtmp_str = Py_DecodeLocale([path UTF8String], NULL); - failed = PyList_Insert(sys_path_attr, 0, PyUnicode_FromString([path UTF8String])); - if (failed) { - XCTFail(@"Unable to add app packages to sys.path"); + app_packages_path = PyUnicode_FromWideChar(wtmp_str, wcslen(wtmp_str)); + if (app_packages_path == NULL) { + XCTFail(@"Could not convert app_packages path to unicode"); return; } PyMem_RawFree(wtmp_str); + method_args = Py_BuildValue("(O)", app_packages_path); + if (method_args == NULL) { + XCTFail(@"Could not create arguments for site.addsitedir"); + return; + } + + result = PyObject_CallObject(site_addsitedir_attr, method_args); + if (result == NULL) { + XCTFail(@"Could not add app_packages directory using site.addsitedir"); + return; + } + + // Add test code to sys.path + sys_module = PyImport_ImportModule("sys"); + if (sys_module == NULL) { + XCTFail(@"Could not import sys module"); + return; + } + + sys_path_attr = PyObject_GetAttrString(sys_module, "path"); + if (sys_path_attr == NULL) { + XCTFail(@"Could not access sys.path"); + return; + } + path = [NSString stringWithFormat:@"%@/app", resourcePath, nil]; NSLog(@"App path: %@", path); wtmp_str = Py_DecodeLocale([path UTF8String], NULL); From b87d40c174bae98e445de78453e27eec9cdcbef4 Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Thu, 26 Jun 2025 23:48:05 -0500 Subject: [PATCH 061/212] gh-135551: Change how sorting picks minimum run length (#135553) New scheme from Stefan Pochmann for picking minimum run lengths. By allowing them to change a little from one run to the next, it's possible to arrange for that all merges, at all levels, strongly tend to be as evenly balanced as possible, for randomly ordered data. Meaning the number of initial runs is a power of 2, and all merges involve runs whose lengths differ by no more than 1. --- Misc/ACKS | 1 + ...-06-16-03-56-15.gh-issue-135551.hRTQO-.rst | 1 + Objects/listobject.c | 48 +++-- Objects/listsort.txt | 175 ++++++++++++++++-- 4 files changed, 184 insertions(+), 41 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-16-03-56-15.gh-issue-135551.hRTQO-.rst diff --git a/Misc/ACKS b/Misc/ACKS index 74cf29cdbc552f..6ab50763feadd9 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1481,6 +1481,7 @@ Jean-François Piéronne Oleg Plakhotnyuk Anatoliy Platonov Marcel Plch +Stefan Pochmann Kirill Podoprigora Remi Pointel Jon Poler diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-16-03-56-15.gh-issue-135551.hRTQO-.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-16-03-56-15.gh-issue-135551.hRTQO-.rst new file mode 100644 index 00000000000000..22dda2a3e972a8 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-16-03-56-15.gh-issue-135551.hRTQO-.rst @@ -0,0 +1 @@ +Sorting randomly ordered lists will often run a bit faster, thanks to a new scheme for picking minimum run lengths from Stefan Pochmann, which arranges for the merge tree to be as evenly balanced as is possible. diff --git a/Objects/listobject.c b/Objects/listobject.c index 23d3472b6d4153..1b36f4c25abf4d 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -1685,10 +1685,7 @@ sortslice_advance(sortslice *slice, Py_ssize_t n) /* Avoid malloc for small temp arrays. */ #define MERGESTATE_TEMP_SIZE 256 -/* The largest value of minrun. This must be a power of 2, and >= 1, so that - * the compute_minrun() algorithm guarantees to return a result no larger than - * this, - */ +/* The largest value of minrun. This must be a power of 2, and >= 1 */ #define MAX_MINRUN 64 #if ((MAX_MINRUN) < 1) || ((MAX_MINRUN) & ((MAX_MINRUN) - 1)) #error "MAX_MINRUN must be a power of 2, and >= 1" @@ -1749,6 +1746,11 @@ struct s_MergeState { * of tuples. It may be set to safe_object_compare, but the idea is that hopefully * we can assume more, and use one of the special-case compares. */ int (*tuple_elem_compare)(PyObject *, PyObject *, MergeState *); + + /* Varisbles used for minrun computation. The "ideal" minrun length is + * the infinite precision listlen / 2**e. See listsort.txt. + */ + Py_ssize_t mr_current, mr_e, mr_mask; }; /* binarysort is the best method for sorting small arrays: it does few @@ -2210,6 +2212,14 @@ merge_init(MergeState *ms, Py_ssize_t list_size, int has_keyfunc, ms->min_gallop = MIN_GALLOP; ms->listlen = list_size; ms->basekeys = lo->keys; + + /* State for generating minrun values. See listsort.txt. */ + ms->mr_e = 0; + while (list_size >> ms->mr_e >= MAX_MINRUN) { + ++ms->mr_e; + } + ms->mr_mask = (1 << ms->mr_e) - 1; + ms->mr_current = 0; } /* Free all the temp memory owned by the MergeState. This must be called @@ -2687,27 +2697,15 @@ merge_force_collapse(MergeState *ms) return 0; } -/* Compute a good value for the minimum run length; natural runs shorter - * than this are boosted artificially via binary insertion. - * - * If n < MAX_MINRUN return n (it's too small to bother with fancy stuff). - * Else if n is an exact power of 2, return MAX_MINRUN / 2. - * Else return an int k, MAX_MINRUN / 2 <= k <= MAX_MINRUN, such that n/k is - * close to, but strictly less than, an exact power of 2. - * - * See listsort.txt for more info. - */ -static Py_ssize_t -merge_compute_minrun(Py_ssize_t n) +/* Return the next minrun value to use. See listsort.txt. */ +Py_LOCAL_INLINE(Py_ssize_t) +minrun_next(MergeState *ms) { - Py_ssize_t r = 0; /* becomes 1 if any 1 bits are shifted off */ - - assert(n >= 0); - while (n >= MAX_MINRUN) { - r |= n & 1; - n >>= 1; - } - return n + r; + ms->mr_current += ms->listlen; + assert(ms->mr_current >= 0); /* no overflow */ + Py_ssize_t result = ms->mr_current >> ms->mr_e; + ms->mr_current &= ms->mr_mask; + return result; } /* Here we define custom comparison functions to optimize for the cases one commonly @@ -3075,7 +3073,6 @@ list_sort_impl(PyListObject *self, PyObject *keyfunc, int reverse) /* March over the array once, left to right, finding natural runs, * and extending short natural runs to minrun elements. */ - minrun = merge_compute_minrun(nremaining); do { Py_ssize_t n; @@ -3084,6 +3081,7 @@ list_sort_impl(PyListObject *self, PyObject *keyfunc, int reverse) if (n < 0) goto fail; /* If short, extend to min(minrun, nremaining). */ + minrun = minrun_next(&ms); if (n < minrun) { const Py_ssize_t force = nremaining <= minrun ? nremaining : minrun; diff --git a/Objects/listsort.txt b/Objects/listsort.txt index f387d9c116e502..5b2fc7d50a25ca 100644 --- a/Objects/listsort.txt +++ b/Objects/listsort.txt @@ -270,8 +270,8 @@ result. This has two primary good effects: Computing minrun ---------------- -If N < MAX_MINRUN, minrun is N. IOW, binary insertion sort is used for the -whole array then; it's hard to beat that given the overheads of trying +If N < MAX_MINRUN, minrun is N. IOW, binary insertion sort is used for the +whole array then; it's hard to beat that given the overheads of trying something fancier (see note BINSORT). When N is a power of 2, testing on random data showed that minrun values of @@ -288,7 +288,6 @@ that 32 isn't a good choice for the general case! Consider N=2112: >>> divmod(2112, 32) (66, 0) ->>> If the data is randomly ordered, we're very likely to end up with 66 runs each of length 32. The first 64 of these trigger a sequence of perfectly @@ -301,22 +300,94 @@ to get 64 elements into place). If we take minrun=33 in this case, then we're very likely to end up with 64 runs each of length 33, and then all merges are perfectly balanced. Better! -What we want to avoid is picking minrun such that in +The original code used a cheap heuristic to pick a minrun that avoided the +very worst cases of imbalance for the final merge, but "pretty bad" cases +still existed. - q, r = divmod(N, minrun) +In 2025, Stefan Pochmann found a much better approach, based on letting minrun +vary a bit from one run to the next. Under his scheme, at _all_ levels of the +merge tree: -q is a power of 2 and r>0 (then the last merge only gets r elements into -place, and r < minrun is small compared to N), or q a little larger than a -power of 2 regardless of r (then we've got a case similar to "2112", again -leaving too little work for the last merge to do). +- The number of runs is a power of 2. +- At most two different run lengths appear. +- When two do appear, the smaller is one less than the larger. +- The lengths of run pairs merged never differ by more than one. -Instead we pick a minrun in range(MAX_MINRUN / 2, MAX_MINRUN + 1) such that -N/minrun is exactly a power of 2, or if that isn't possible, is close to, but -strictly less than, a power of 2. This is easier to do than it may sound: -take the first log2(MAX_MINRUN) bits of N, and add 1 if any of the remaining -bits are set. In fact, that rule covers every case in this section, including -small N and exact powers of 2; merge_compute_minrun() is a deceptively simple -function. +So, in all respects, as perfectly balanced as possible. + +For the 2112 case, that also keeps minrun at 33, but we were lucky there +that 2112 is 33 times a power of 2. The new approach doesn't rely on luck. + +For example, with 315 random elements, the old scheme uses fixed minrun=40 and +produces runs of length 40, except for the last. The new scheme produces a +mix of lengths 39 and 40: + +old: 40 40 40 40 40 40 40 35 +new: 39 39 40 39 39 40 39 40 + +Both schemes produce eight runs, a power of 2. That's good for a balanced +merge tree. But the new scheme allows merges where left and right length +never differ by more than 1: + +39 39 40 39 39 40 39 40 + 78 79 79 79 + 157 158 + 315 + +(This shows merges downward, e.g., two runs of length 39 are merged and +become a run of length 78.) + +With larger lists, the old scheme can get even more unbalanced. For example, +with 32769 elements (that's 2**15 + 1), it uses minrun=33 and produces 993 +runs (of length 33). That's not even a power of 2. The new scheme instead +produces 1024 runs, all with length 32 except for the last one with length 33. + +How does it work? Ideally, all runs would be exactly equally long. For the +above example, each run would have 315/8 = 39.375 elements. Which of course +doesn't work. But we can get close: + +For the first run, we'd like 39.375 elements. Since that's impossible, we +instead use 39 (the floor) and remember the current leftover fraction 0.375. +For the second run, we add 0.375 + 39.375 = 39.75. Again impossible, so we +instead use 39 and remember 0.75. For the third run, we add 0.75 + 39.375 = +40.125. This time we get 40 and remember 0.125. And so on. Here's a Python +generator doing that: + +def gen_minruns_with_floats(n): + mr = n + while mr >= MAX_MINRUN: + mr /= 2 + + mr_current = 0 + while True: + mr_current += mr + yield int(mr_current) + mr_current %= 1 + +But while all arithmetic here can be done exactly using binery floating point, +floats have less precision that a Py_ssize_t, and mixing floats with ints is +needlessly expensive anyway. + +So here's an integer version, where the internal numbers are scaled up by +2**e, or rather not divided by 2**e. Instead, only each yielded minrun gets +divided (by right-shifting). For example instead of adding 39.375 and +reducing modulo 1, it just adds 315 and reduces modulo 8. And always divides +by 8 to get each actual minrun value: + +def gen_minruns_simpler(n): + e = 0 + while (n >> e) >= MAX_MINRUN: + e += 1 + mask = (1 << e) - 1 + + mr_current = 0 + while True: + mr_current += n + yield mr_current >> e + mr_current &= mask + +See note MINRUN CODE for a full implementation and a driver that exhaustively +verifies the claims above for all list lengths through 2 million. The Merge Pattern @@ -820,3 +891,75 @@ partially mitigated by pre-scanning the data to determine whether the data is homogeneous with respect to type. If so, it is sometimes possible to substitute faster type-specific comparisons for the slower, generic PyObject_RichCompareBool. + +MINRUN CODE +from itertools import accumulate +try: + from itertools import batched +except ImportError: + from itertools import islice + def batched(xs, k): + it = iter(xs) + while chunk := tuple(islice(it, k)): + yield chunk + +MAX_MINRUN = 64 + +def gen_minruns(n): + # In listobject.c, initialization is done in merge_init(), and + # the body of the loop in minrun_next(). + mr_e = 0 + while (n >> mr_e) >= MAX_MINRUN: + mr_e += 1 + mr_mask = (1 << mr_e) - 1 + + mr_current = 0 + while True: + mr_current += n + yield mr_current >> mr_e + mr_current &= mr_mask + +def chew(n, show=False): + if n < 1: + return + + sizes = [] + tot = 0 + for size in gen_minruns(n): + sizes.append(size) + tot += size + if tot >= n: + break + assert tot == n + print(n, len(sizes)) + + small, large = MAX_MINRUN // 2, MAX_MINRUN + while len(sizes) > 1: + assert not len(sizes) & 1 + assert len(sizes).bit_count() == 1 # i.e., power of 2 + assert sum(sizes) == n + assert min(sizes) >= min(n, small) + assert max(sizes) <= large + + d = set(sizes) + assert len(d) <= 2 + if len(d) == 2: + lo, hi = sorted(d) + assert lo + 1 == hi + + mr = n / len(sizes) + for i, s in enumerate(accumulate(sizes, initial=0)): + assert int(mr * i) == s + + newsizes = [] + for a, b in batched(sizes, 2): + assert abs(a - b) <= 1 + newsizes.append(a + b) + sizes = newsizes + smsll = large + large *= 2 + + assert sizes[0] == n + +for n in range(2_000_001): + chew(n) \ No newline at end of file From 1dc7ed89af5328c9dfdcfdfebaa50283e3c38b44 Mon Sep 17 00:00:00 2001 From: Russell Keith-Magee Date: Fri, 27 Jun 2025 12:58:20 +0800 Subject: [PATCH 062/212] gh-135968: Add iOS binary stubs for strip (#135970) Adds iOS binary stubs for invoking `strip` --- .../Tools-Demos/2025-06-26-15-58-13.gh-issue-135968.C4v_-W.rst | 1 + iOS/Resources/bin/arm64-apple-ios-simulator-strip | 2 ++ iOS/Resources/bin/arm64-apple-ios-strip | 2 ++ iOS/Resources/bin/x86_64-apple-ios-simulator-strip | 2 ++ 4 files changed, 7 insertions(+) create mode 100644 Misc/NEWS.d/next/Tools-Demos/2025-06-26-15-58-13.gh-issue-135968.C4v_-W.rst create mode 100755 iOS/Resources/bin/arm64-apple-ios-simulator-strip create mode 100755 iOS/Resources/bin/arm64-apple-ios-strip create mode 100755 iOS/Resources/bin/x86_64-apple-ios-simulator-strip diff --git a/Misc/NEWS.d/next/Tools-Demos/2025-06-26-15-58-13.gh-issue-135968.C4v_-W.rst b/Misc/NEWS.d/next/Tools-Demos/2025-06-26-15-58-13.gh-issue-135968.C4v_-W.rst new file mode 100644 index 00000000000000..1c0b3825c71c1d --- /dev/null +++ b/Misc/NEWS.d/next/Tools-Demos/2025-06-26-15-58-13.gh-issue-135968.C4v_-W.rst @@ -0,0 +1 @@ +Stubs for ``strip`` are now provided as part of an iOS install. diff --git a/iOS/Resources/bin/arm64-apple-ios-simulator-strip b/iOS/Resources/bin/arm64-apple-ios-simulator-strip new file mode 100755 index 00000000000000..fd59d309b73a20 --- /dev/null +++ b/iOS/Resources/bin/arm64-apple-ios-simulator-strip @@ -0,0 +1,2 @@ +#!/bin/sh +xcrun --sdk iphonesimulator${IOS_SDK_VERSION} strip -arch arm64 "$@" diff --git a/iOS/Resources/bin/arm64-apple-ios-strip b/iOS/Resources/bin/arm64-apple-ios-strip new file mode 100755 index 00000000000000..75e823a3d02d61 --- /dev/null +++ b/iOS/Resources/bin/arm64-apple-ios-strip @@ -0,0 +1,2 @@ +#!/bin/sh +xcrun --sdk iphoneos${IOS_SDK_VERSION} strip -arch arm64 "$@" diff --git a/iOS/Resources/bin/x86_64-apple-ios-simulator-strip b/iOS/Resources/bin/x86_64-apple-ios-simulator-strip new file mode 100755 index 00000000000000..c5cfb28929195a --- /dev/null +++ b/iOS/Resources/bin/x86_64-apple-ios-simulator-strip @@ -0,0 +1,2 @@ +#!/bin/sh +xcrun --sdk iphonesimulator${IOS_SDK_VERSION} strip -arch x86_64 "$@" From 91edb7480e011c30ce6540b96361efb2373d1994 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 27 Jun 2025 10:50:59 +0300 Subject: [PATCH 063/212] gh-53203: Fix strptime() for %c, %x and %X formats on some locales (#135971) * Add detection of decimal non-ASCII alt digits. * Add support of non-decimal alt digits on locale lzh_TW. * Accept only numbers in correct range if alt digits are known. * Fix bug in detecting the position of the week day name on locales byn_ER and wal_ET. * Fix support of single-digit hour on locales ar_SA and bg_BG. * Add support for %T, %R, %r, %C, %OC. * Prepare code to use nl_langinfo(). --- Lib/_strptime.py | 190 ++++++++++++++---- Lib/test/test_strptime.py | 28 ++- ...5-06-26-11-52-40.gh-issue-53203.TMigBr.rst | 2 + 3 files changed, 164 insertions(+), 56 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-26-11-52-40.gh-issue-53203.TMigBr.rst diff --git a/Lib/_strptime.py b/Lib/_strptime.py index ae67949626d460..7ac6f36360cb69 100644 --- a/Lib/_strptime.py +++ b/Lib/_strptime.py @@ -14,6 +14,7 @@ import time import locale import calendar +import re from re import compile as re_compile from re import sub as re_sub from re import IGNORECASE @@ -41,6 +42,21 @@ def _findall(haystack, needle): yield i i += len(needle) + +lzh_TW_alt_digits = ( + # 〇:一:二:三:四:五:六:七:八:九 + '\u3007', '\u4e00', '\u4e8c', '\u4e09', '\u56db', + '\u4e94', '\u516d', '\u4e03', '\u516b', '\u4e5d', + # 十:十一:十二:十三:十四:十五:十六:十七:十八:十九 + '\u5341', '\u5341\u4e00', '\u5341\u4e8c', '\u5341\u4e09', '\u5341\u56db', + '\u5341\u4e94', '\u5341\u516d', '\u5341\u4e03', '\u5341\u516b', '\u5341\u4e5d', + # 廿:廿一:廿二:廿三:廿四:廿五:廿六:廿七:廿八:廿九 + '\u5eff', '\u5eff\u4e00', '\u5eff\u4e8c', '\u5eff\u4e09', '\u5eff\u56db', + '\u5eff\u4e94', '\u5eff\u516d', '\u5eff\u4e03', '\u5eff\u516b', '\u5eff\u4e5d', + # 卅:卅一 + '\u5345', '\u5345\u4e00') + + class LocaleTime(object): """Stores and handles locale-specific information related to time. @@ -84,6 +100,7 @@ def __init__(self): self.__calc_weekday() self.__calc_month() self.__calc_am_pm() + self.__calc_alt_digits() self.__calc_timezone() self.__calc_date_time() if _getlang() != self.lang: @@ -119,9 +136,43 @@ def __calc_am_pm(self): am_pm.append(time.strftime("%p", time_tuple).lower().strip()) self.am_pm = am_pm + def __calc_alt_digits(self): + # Set self.LC_alt_digits by using time.strftime(). + + # The magic data should contain all decimal digits. + time_tuple = time.struct_time((1998, 1, 27, 10, 43, 56, 1, 27, 0)) + s = time.strftime("%x%X", time_tuple) + if s.isascii(): + # Fast path -- all digits are ASCII. + self.LC_alt_digits = () + return + + digits = ''.join(sorted(set(re.findall(r'\d', s)))) + if len(digits) == 10 and ord(digits[-1]) == ord(digits[0]) + 9: + # All 10 decimal digits from the same set. + if digits.isascii(): + # All digits are ASCII. + self.LC_alt_digits = () + return + + self.LC_alt_digits = [a + b for a in digits for b in digits] + # Test whether the numbers contain leading zero. + time_tuple2 = time.struct_time((2000, 1, 1, 1, 1, 1, 5, 1, 0)) + if self.LC_alt_digits[1] not in time.strftime("%x %X", time_tuple2): + self.LC_alt_digits[:10] = digits + return + + # Either non-Gregorian calendar or non-decimal numbers. + if {'\u4e00', '\u4e03', '\u4e5d', '\u5341', '\u5eff'}.issubset(s): + # lzh_TW + self.LC_alt_digits = lzh_TW_alt_digits + return + + self.LC_alt_digits = None + def __calc_date_time(self): - # Set self.date_time, self.date, & self.time by using - # time.strftime(). + # Set self.LC_date_time, self.LC_date, self.LC_time and + # self.LC_time_ampm by using time.strftime(). # Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of # overloaded numbers is minimized. The order in which searches for @@ -129,26 +180,32 @@ def __calc_date_time(self): # possible ambiguity for what something represents. time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0)) time_tuple2 = time.struct_time((1999,1,3,1,1,1,6,3,0)) - replacement_pairs = [ + replacement_pairs = [] + + # Non-ASCII digits + if self.LC_alt_digits or self.LC_alt_digits is None: + for n, d in [(19, '%OC'), (99, '%Oy'), (22, '%OH'), + (44, '%OM'), (55, '%OS'), (17, '%Od'), + (3, '%Om'), (2, '%Ow'), (10, '%OI')]: + if self.LC_alt_digits is None: + s = chr(0x660 + n // 10) + chr(0x660 + n % 10) + replacement_pairs.append((s, d)) + if n < 10: + replacement_pairs.append((s[1], d)) + elif len(self.LC_alt_digits) > n: + replacement_pairs.append((self.LC_alt_digits[n], d)) + else: + replacement_pairs.append((time.strftime(d, time_tuple), d)) + replacement_pairs += [ ('1999', '%Y'), ('99', '%y'), ('22', '%H'), ('44', '%M'), ('55', '%S'), ('76', '%j'), ('17', '%d'), ('03', '%m'), ('3', '%m'), # '3' needed for when no leading zero. ('2', '%w'), ('10', '%I'), - # Non-ASCII digits - ('\u0661\u0669\u0669\u0669', '%Y'), - ('\u0669\u0669', '%Oy'), - ('\u0662\u0662', '%OH'), - ('\u0664\u0664', '%OM'), - ('\u0665\u0665', '%OS'), - ('\u0661\u0667', '%Od'), - ('\u0660\u0663', '%Om'), - ('\u0663', '%Om'), - ('\u0662', '%Ow'), - ('\u0661\u0660', '%OI'), ] + date_time = [] - for directive in ('%c', '%x', '%X'): + for directive in ('%c', '%x', '%X', '%r'): current_format = time.strftime(directive, time_tuple).lower() current_format = current_format.replace('%', '%%') # The month and the day of the week formats are treated specially @@ -172,9 +229,10 @@ def __calc_date_time(self): if tz: current_format = current_format.replace(tz, "%Z") # Transform all non-ASCII digits to digits in range U+0660 to U+0669. - current_format = re_sub(r'\d(?3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])", 'f': r"(?P[0-9]{1,6})", - 'H': r"(?P2[0-3]|[0-1]\d|\d)", + 'H': r"(?P2[0-3]|[0-1]\d|\d| \d)", + 'k': r"(?P2[0-3]|[0-1]\d|\d| \d)", 'I': r"(?P1[0-2]|0[1-9]|[1-9]| [1-9])", + 'l': r"(?P1[0-2]|0[1-9]|[1-9]| [1-9])", 'G': r"(?P\d\d\d\d)", 'j': r"(?P36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|[1-9]\d|0[1-9]|[1-9])", 'm': r"(?P1[0-2]|0[1-9]|[1-9])", @@ -312,16 +373,49 @@ def __init__(self, locale_time=None): for tz in tz_names), 'Z'), '%': '%'} - for d in 'dmyHIMS': - mapping['O' + d] = r'(?P<%s>\d\d|\d| \d)' % d - mapping['Ow'] = r'(?P\d)' + if self.locale_time.LC_alt_digits is None: + for d in 'dmyCHIMS': + mapping['O' + d] = r'(?P<%s>\d\d|\d| \d)' % d + mapping['Ow'] = r'(?P\d)' + else: + mapping.update({ + 'Od': self.__seqToRE(self.locale_time.LC_alt_digits[1:32], 'd', + '3[0-1]|[1-2][0-9]|0[1-9]|[1-9]'), + 'Om': self.__seqToRE(self.locale_time.LC_alt_digits[1:13], 'm', + '1[0-2]|0[1-9]|[1-9]'), + 'Ow': self.__seqToRE(self.locale_time.LC_alt_digits[:7], 'w', + '[0-6]'), + 'Oy': self.__seqToRE(self.locale_time.LC_alt_digits, 'y', + '[0-9][0-9]'), + 'OC': self.__seqToRE(self.locale_time.LC_alt_digits, 'C', + '[0-9][0-9]'), + 'OH': self.__seqToRE(self.locale_time.LC_alt_digits[:24], 'H', + '2[0-3]|[0-1][0-9]|[0-9]'), + 'OI': self.__seqToRE(self.locale_time.LC_alt_digits[1:13], 'I', + '1[0-2]|0[1-9]|[1-9]'), + 'OM': self.__seqToRE(self.locale_time.LC_alt_digits[:60], 'M', + '[0-5][0-9]|[0-9]'), + 'OS': self.__seqToRE(self.locale_time.LC_alt_digits[:62], 'S', + '6[0-1]|[0-5][0-9]|[0-9]'), + }) + mapping.update({ + 'e': mapping['d'], + 'Oe': mapping['Od'], + 'P': mapping['p'], + 'Op': mapping['p'], + 'W': mapping['U'].replace('U', 'W'), + }) mapping['W'] = mapping['U'].replace('U', 'W') + base.__init__(mapping) + base.__setitem__('T', self.pattern('%H:%M:%S')) + base.__setitem__('R', self.pattern('%H:%M')) + base.__setitem__('r', self.pattern(self.locale_time.LC_time_ampm)) base.__setitem__('X', self.pattern(self.locale_time.LC_time)) base.__setitem__('x', self.pattern(self.locale_time.LC_date)) base.__setitem__('c', self.pattern(self.locale_time.LC_date_time)) - def __seqToRE(self, to_convert, directive): + def __seqToRE(self, to_convert, directive, altregex=None): """Convert a list to a regex string for matching a directive. Want possible matching values to be from longest to shortest. This @@ -337,8 +431,9 @@ def __seqToRE(self, to_convert, directive): else: return '' regex = '|'.join(re_escape(stuff) for stuff in to_convert) - regex = '(?P<%s>%s' % (directive, regex) - return '%s)' % regex + if altregex is not None: + regex += '|' + altregex + return '(?P<%s>%s)' % (directive, regex) def pattern(self, format): """Return regex pattern for the format string. @@ -365,7 +460,7 @@ def repl(m): nonlocal day_of_month_in_format day_of_month_in_format = True return self[format_char] - format = re_sub(r'%([OE]?\\?.?)', repl, format) + format = re_sub(r'%[-_0^#]*[0-9]*([OE]?\\?.?)', repl, format) if day_of_month_in_format and not year_in_format: import warnings warnings.warn("""\ @@ -467,6 +562,15 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"): # values weekday = julian = None found_dict = found.groupdict() + if locale_time.LC_alt_digits: + def parse_int(s): + try: + return locale_time.LC_alt_digits.index(s) + except ValueError: + return int(s) + else: + parse_int = int + for group_key in found_dict.keys(): # Directives not explicitly handled below: # c, x, X @@ -474,30 +578,34 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"): # U, W # worthless without day of the week if group_key == 'y': - year = int(found_dict['y']) - # Open Group specification for strptime() states that a %y - #value in the range of [00, 68] is in the century 2000, while - #[69,99] is in the century 1900 - if year <= 68: - year += 2000 + year = parse_int(found_dict['y']) + if 'C' in found_dict: + century = parse_int(found_dict['C']) + year += century * 100 else: - year += 1900 + # Open Group specification for strptime() states that a %y + #value in the range of [00, 68] is in the century 2000, while + #[69,99] is in the century 1900 + if year <= 68: + year += 2000 + else: + year += 1900 elif group_key == 'Y': year = int(found_dict['Y']) elif group_key == 'G': iso_year = int(found_dict['G']) elif group_key == 'm': - month = int(found_dict['m']) + month = parse_int(found_dict['m']) elif group_key == 'B': month = locale_time.f_month.index(found_dict['B'].lower()) elif group_key == 'b': month = locale_time.a_month.index(found_dict['b'].lower()) elif group_key == 'd': - day = int(found_dict['d']) + day = parse_int(found_dict['d']) elif group_key == 'H': - hour = int(found_dict['H']) + hour = parse_int(found_dict['H']) elif group_key == 'I': - hour = int(found_dict['I']) + hour = parse_int(found_dict['I']) ampm = found_dict.get('p', '').lower() # If there was no AM/PM indicator, we'll treat this like AM if ampm in ('', locale_time.am_pm[0]): @@ -513,9 +621,9 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"): if hour != 12: hour += 12 elif group_key == 'M': - minute = int(found_dict['M']) + minute = parse_int(found_dict['M']) elif group_key == 'S': - second = int(found_dict['S']) + second = parse_int(found_dict['S']) elif group_key == 'f': s = found_dict['f'] # Pad to always return microseconds. diff --git a/Lib/test/test_strptime.py b/Lib/test/test_strptime.py index 268230f6da78f8..e52c46f8c58cce 100644 --- a/Lib/test/test_strptime.py +++ b/Lib/test/test_strptime.py @@ -221,14 +221,16 @@ def test_ValueError(self): self.assertRaises(ValueError, _strptime._strptime_time, data_string="%d", format="%A") for bad_format in ("%", "% ", "%\n"): - with self.assertRaisesRegex(ValueError, "stray % in format "): + with (self.subTest(format=bad_format), + self.assertRaisesRegex(ValueError, "stray % in format ")): _strptime._strptime_time("2005", bad_format) - for bad_format in ("%e", "%Oe", "%O", "%O ", "%Ee", "%E", "%E ", - "%.", "%+", "%_", "%~", "%\\", + for bad_format in ("%i", "%Oi", "%O", "%O ", "%Ee", "%E", "%E ", + "%.", "%+", "%~", "%\\", "%O.", "%O+", "%O_", "%O~", "%O\\"): directive = bad_format[1:].rstrip() - with self.assertRaisesRegex(ValueError, - f"'{re.escape(directive)}' is a bad directive in format "): + with (self.subTest(format=bad_format), + self.assertRaisesRegex(ValueError, + f"'{re.escape(directive)}' is a bad directive in format ")): _strptime._strptime_time("2005", bad_format) msg_week_no_year_or_weekday = r"ISO week directive '%V' must be used with " \ @@ -480,13 +482,11 @@ def test_bad_timezone(self): # * Year is not included: ha_NG. # * Use non-Gregorian calendar: lo_LA, thai, th_TH. # On Windows: ar_IN, ar_SA, fa_IR, ps_AF. - # - # BUG: Generates regexp that does not match the current date and time - # for lzh_TW. @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'he_IL', 'eu_ES', 'ar_AE', 'mfe_MU', 'yo_NG', 'csb_PL', 'br_FR', 'gez_ET', 'brx_IN', - 'my_MM', 'or_IN', 'shn_MM', 'az_IR') + 'my_MM', 'or_IN', 'shn_MM', 'az_IR', + 'byn_ER', 'wal_ET', 'lzh_TW') def test_date_time_locale(self): # Test %c directive loc = locale.getlocale(locale.LC_TIME)[0] @@ -525,11 +525,9 @@ def test_date_time_locale2(self): # NB: Does not roundtrip because use non-Gregorian calendar: # lo_LA, thai, th_TH. On Windows: ar_IN, ar_SA, fa_IR, ps_AF. - # BUG: Generates regexp that does not match the current date - # for lzh_TW. @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'he_IL', 'eu_ES', 'ar_AE', - 'az_IR', 'my_MM', 'or_IN', 'shn_MM') + 'az_IR', 'my_MM', 'or_IN', 'shn_MM', 'lzh_TW') def test_date_locale(self): # Test %x directive now = time.time() @@ -546,7 +544,7 @@ def test_date_locale(self): # NB: Dates before 1969 do not roundtrip on many locales, including C. @unittest.skipIf(support.linked_to_musl(), "musl libc issue, bpo-46390") @run_with_locales('LC_TIME', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', - 'eu_ES', 'ar_AE', 'my_MM', 'shn_MM') + 'eu_ES', 'ar_AE', 'my_MM', 'shn_MM', 'lzh_TW') def test_date_locale2(self): # Test %x directive loc = locale.getlocale(locale.LC_TIME)[0] @@ -562,11 +560,11 @@ def test_date_locale2(self): # norwegian, nynorsk. # * Hours are in 12-hour notation without AM/PM indication: hy_AM, # ms_MY, sm_WS. - # BUG: Generates regexp that does not match the current time for lzh_TW. @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'aa_ET', 'am_ET', 'az_IR', 'byn_ER', 'fa_IR', 'gez_ET', 'my_MM', 'om_ET', 'or_IN', 'shn_MM', 'sid_ET', 'so_SO', - 'ti_ET', 'tig_ER', 'wal_ET') + 'ti_ET', 'tig_ER', 'wal_ET', 'lzh_TW', + 'ar_SA', 'bg_BG') def test_time_locale(self): # Test %X directive loc = locale.getlocale(locale.LC_TIME)[0] diff --git a/Misc/NEWS.d/next/Library/2025-06-26-11-52-40.gh-issue-53203.TMigBr.rst b/Misc/NEWS.d/next/Library/2025-06-26-11-52-40.gh-issue-53203.TMigBr.rst new file mode 100644 index 00000000000000..ba2fae49fdc933 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-26-11-52-40.gh-issue-53203.TMigBr.rst @@ -0,0 +1,2 @@ +Fix :func:`time.strptime` for ``%c`` and ``%x`` formats on locales byn_ER, +wal_ET and lzh_TW, and for ``%X`` format on locales ar_SA, bg_BG and lzh_TW. From eba2d4c05579fc459dd10d4998c3048060fc7983 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Fri, 27 Jun 2025 11:01:51 +0200 Subject: [PATCH 064/212] gh-136017: avoid decref in rich compare for bool objects (#136018) --- Objects/object.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Objects/object.c b/Objects/object.c index 4d60128b092c22..1223983753ac46 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -1131,11 +1131,14 @@ PyObject_RichCompareBool(PyObject *v, PyObject *w, int op) res = PyObject_RichCompare(v, w, op); if (res == NULL) return -1; - if (PyBool_Check(res)) + if (PyBool_Check(res)) { ok = (res == Py_True); - else + assert(_Py_IsImmortal(res)); + } + else { ok = PyObject_IsTrue(res); - Py_DECREF(res); + Py_DECREF(res); + } return ok; } From 551f92751450c8059c3f1c9d38727e98ae29a607 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Fri, 27 Jun 2025 13:00:25 +0300 Subject: [PATCH 065/212] gh-128051: Fix tests if sys.float_repr_style is 'legacy' (#135908) Co-authored-by: Victor Stinner --- Lib/difflib.py | 4 ++-- Lib/test/test_builtin.py | 3 ++- Lib/test/test_configparser.py | 12 ++++++------ Lib/test/test_ctypes/test_parameters.py | 4 +++- Lib/test/test_enum.py | 2 +- Lib/test/test_float.py | 2 ++ Lib/test/test_format.py | 10 +++++----- Lib/test/test_fstring.py | 8 ++++---- Lib/test/test_json/test_tool.py | 2 +- Lib/test/test_optparse.py | 4 ++-- Lib/test/test_peepholer.py | 6 +++--- Lib/test/test_pprint.py | 2 +- Lib/test/test_reprlib.py | 20 ++++++++++---------- Lib/test/test_statistics.py | 3 ++- Lib/test/test_str.py | 8 ++++---- Lib/test/test_types.py | 4 ++-- 16 files changed, 50 insertions(+), 44 deletions(-) diff --git a/Lib/difflib.py b/Lib/difflib.py index 18801a9b19eb9d..487936dbf47cdc 100644 --- a/Lib/difflib.py +++ b/Lib/difflib.py @@ -78,8 +78,8 @@ class SequenceMatcher: sequences. As a rule of thumb, a .ratio() value over 0.6 means the sequences are close matches: - >>> print(round(s.ratio(), 3)) - 0.866 + >>> print(round(s.ratio(), 2)) + 0.87 >>> If you're only interested in where the sequences match, diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index d221aa5e1d999f..14fe3355239615 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -2991,7 +2991,8 @@ def test_namespace_order(self): def load_tests(loader, tests, pattern): from doctest import DocTestSuite - tests.addTest(DocTestSuite(builtins)) + if sys.float_repr_style == 'short': + tests.addTest(DocTestSuite(builtins)) return tests if __name__ == "__main__": diff --git a/Lib/test/test_configparser.py b/Lib/test/test_configparser.py index 23904d17d326d8..e7364e18742c16 100644 --- a/Lib/test/test_configparser.py +++ b/Lib/test/test_configparser.py @@ -986,12 +986,12 @@ def test_add_section_default(self): def test_defaults_keyword(self): """bpo-23835 fix for ConfigParser""" - cf = self.newconfig(defaults={1: 2.4}) - self.assertEqual(cf[self.default_section]['1'], '2.4') - self.assertAlmostEqual(cf[self.default_section].getfloat('1'), 2.4) - cf = self.newconfig(defaults={"A": 5.2}) - self.assertEqual(cf[self.default_section]['a'], '5.2') - self.assertAlmostEqual(cf[self.default_section].getfloat('a'), 5.2) + cf = self.newconfig(defaults={1: 2.5}) + self.assertEqual(cf[self.default_section]['1'], '2.5') + self.assertAlmostEqual(cf[self.default_section].getfloat('1'), 2.5) + cf = self.newconfig(defaults={"A": 5.25}) + self.assertEqual(cf[self.default_section]['a'], '5.25') + self.assertAlmostEqual(cf[self.default_section].getfloat('a'), 5.25) class ConfigParserTestCaseNoInterpolation(BasicTestCase, unittest.TestCase): diff --git a/Lib/test/test_ctypes/test_parameters.py b/Lib/test/test_ctypes/test_parameters.py index f89521cf8b3a67..46f8ff93efa915 100644 --- a/Lib/test/test_ctypes/test_parameters.py +++ b/Lib/test/test_ctypes/test_parameters.py @@ -1,3 +1,4 @@ +import sys import unittest import test.support from ctypes import (CDLL, PyDLL, ArgumentError, @@ -240,7 +241,8 @@ def test_parameter_repr(self): self.assertRegex(repr(c_ulonglong.from_param(20000)), r"^$") self.assertEqual(repr(c_float.from_param(1.5)), "") self.assertEqual(repr(c_double.from_param(1.5)), "") - self.assertEqual(repr(c_double.from_param(1e300)), "") + if sys.float_repr_style == 'short': + self.assertEqual(repr(c_double.from_param(1e300)), "") self.assertRegex(repr(c_longdouble.from_param(1.5)), r"^$") self.assertRegex(repr(c_char_p.from_param(b'hihi')), r"^$") self.assertRegex(repr(c_wchar_p.from_param('hihi')), r"^$") diff --git a/Lib/test/test_enum.py b/Lib/test/test_enum.py index 221f9db7763764..bbc7630fa83f45 100644 --- a/Lib/test/test_enum.py +++ b/Lib/test/test_enum.py @@ -36,7 +36,7 @@ def load_tests(loader, tests, ignore): optionflags=doctest.ELLIPSIS|doctest.NORMALIZE_WHITESPACE, )) howto_tests = os.path.join(REPO_ROOT, 'Doc/howto/enum.rst') - if os.path.exists(howto_tests): + if os.path.exists(howto_tests) and sys.float_repr_style == 'short': tests.addTests(doctest.DocFileSuite( howto_tests, module_relative=False, diff --git a/Lib/test/test_float.py b/Lib/test/test_float.py index 237d7b5d35edd7..00518abcb11b46 100644 --- a/Lib/test/test_float.py +++ b/Lib/test/test_float.py @@ -795,6 +795,8 @@ def test_format(self): self.assertRaises(ValueError, format, x, '.6,n') @support.requires_IEEE_754 + @unittest.skipUnless(sys.float_repr_style == 'short', + "applies only when using short float repr style") def test_format_testfile(self): with open(format_testfile, encoding="utf-8") as testfile: for line in testfile: diff --git a/Lib/test/test_format.py b/Lib/test/test_format.py index c7cc32e09490b2..1f626d87fa6c7a 100644 --- a/Lib/test/test_format.py +++ b/Lib/test/test_format.py @@ -346,12 +346,12 @@ def __bytes__(self): testcommon(b"%s", memoryview(b"abc"), b"abc") # %a will give the equivalent of # repr(some_obj).encode('ascii', 'backslashreplace') - testcommon(b"%a", 3.14, b"3.14") + testcommon(b"%a", 3.25, b"3.25") testcommon(b"%a", b"ghi", b"b'ghi'") testcommon(b"%a", "jkl", b"'jkl'") testcommon(b"%a", "\u0544", b"'\\u0544'") # %r is an alias for %a - testcommon(b"%r", 3.14, b"3.14") + testcommon(b"%r", 3.25, b"3.25") testcommon(b"%r", b"ghi", b"b'ghi'") testcommon(b"%r", "jkl", b"'jkl'") testcommon(b"%r", "\u0544", b"'\\u0544'") @@ -407,19 +407,19 @@ def test_non_ascii(self): self.assertEqual(format("abc", "\u2007<5"), "abc\u2007\u2007") self.assertEqual(format(123, "\u2007<5"), "123\u2007\u2007") - self.assertEqual(format(12.3, "\u2007<6"), "12.3\u2007\u2007") + self.assertEqual(format(12.5, "\u2007<6"), "12.5\u2007\u2007") self.assertEqual(format(0j, "\u2007<4"), "0j\u2007\u2007") self.assertEqual(format(1+2j, "\u2007<8"), "(1+2j)\u2007\u2007") self.assertEqual(format("abc", "\u2007>5"), "\u2007\u2007abc") self.assertEqual(format(123, "\u2007>5"), "\u2007\u2007123") - self.assertEqual(format(12.3, "\u2007>6"), "\u2007\u200712.3") + self.assertEqual(format(12.5, "\u2007>6"), "\u2007\u200712.5") self.assertEqual(format(1+2j, "\u2007>8"), "\u2007\u2007(1+2j)") self.assertEqual(format(0j, "\u2007>4"), "\u2007\u20070j") self.assertEqual(format("abc", "\u2007^5"), "\u2007abc\u2007") self.assertEqual(format(123, "\u2007^5"), "\u2007123\u2007") - self.assertEqual(format(12.3, "\u2007^6"), "\u200712.3\u2007") + self.assertEqual(format(12.5, "\u2007^6"), "\u200712.5\u2007") self.assertEqual(format(1+2j, "\u2007^8"), "\u2007(1+2j)\u2007") self.assertEqual(format(0j, "\u2007^4"), "\u20070j\u2007") diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py index f54557056784f2..58a30c8e6ac447 100644 --- a/Lib/test/test_fstring.py +++ b/Lib/test/test_fstring.py @@ -1336,9 +1336,9 @@ def test_equal_equal(self): def test_conversions(self): self.assertEqual(f'{3.14:10.10}', ' 3.14') - self.assertEqual(f'{3.14!s:10.10}', '3.14 ') - self.assertEqual(f'{3.14!r:10.10}', '3.14 ') - self.assertEqual(f'{3.14!a:10.10}', '3.14 ') + self.assertEqual(f'{1.25!s:10.10}', '1.25 ') + self.assertEqual(f'{1.25!r:10.10}', '1.25 ') + self.assertEqual(f'{1.25!a:10.10}', '1.25 ') self.assertEqual(f'{"a"}', 'a') self.assertEqual(f'{"a"!r}', "'a'") @@ -1347,7 +1347,7 @@ def test_conversions(self): # Conversions can have trailing whitespace after them since it # does not provide any significance self.assertEqual(f"{3!s }", "3") - self.assertEqual(f'{3.14!s :10.10}', '3.14 ') + self.assertEqual(f'{1.25!s :10.10}', '1.25 ') # Not a conversion. self.assertEqual(f'{"a!r"}', "a!r") diff --git a/Lib/test/test_json/test_tool.py b/Lib/test/test_json/test_tool.py index 9ea2679c77ec17..30f9bb3331605c 100644 --- a/Lib/test/test_json/test_tool.py +++ b/Lib/test/test_json/test_tool.py @@ -270,7 +270,7 @@ def test_colors(self): (r'" \"foo\" "', f'{t.string}" \\"foo\\" "{t.reset}'), ('"α"', f'{t.string}"\\u03b1"{t.reset}'), ('123', f'{t.number}123{t.reset}'), - ('-1.2345e+23', f'{t.number}-1.2345e+23{t.reset}'), + ('-1.25e+23', f'{t.number}-1.25e+23{t.reset}'), (r'{"\\": ""}', f'''\ {ob} diff --git a/Lib/test/test_optparse.py b/Lib/test/test_optparse.py index e6ffd2b0ffeb0e..e476e4727803e5 100644 --- a/Lib/test/test_optparse.py +++ b/Lib/test/test_optparse.py @@ -615,9 +615,9 @@ def test_float_default(self): self.parser.add_option( "-p", "--prob", help="blow up with probability PROB [default: %default]") - self.parser.set_defaults(prob=0.43) + self.parser.set_defaults(prob=0.25) expected_help = self.help_prefix + \ - " -p PROB, --prob=PROB blow up with probability PROB [default: 0.43]\n" + " -p PROB, --prob=PROB blow up with probability PROB [default: 0.25]\n" self.assertHelp(self.parser, expected_help) def test_alt_expand(self): diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index ef596630b930f7..3d7300e1480256 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -718,9 +718,9 @@ def format(fmt, *values): self.assertEqual(format('x = %d!', 1234), 'x = 1234!') self.assertEqual(format('x = %x!', 1234), 'x = 4d2!') self.assertEqual(format('x = %f!', 1234), 'x = 1234.000000!') - self.assertEqual(format('x = %s!', 1234.5678901), 'x = 1234.5678901!') - self.assertEqual(format('x = %f!', 1234.5678901), 'x = 1234.567890!') - self.assertEqual(format('x = %d!', 1234.5678901), 'x = 1234!') + self.assertEqual(format('x = %s!', 1234.0000625), 'x = 1234.0000625!') + self.assertEqual(format('x = %f!', 1234.0000625), 'x = 1234.000063!') + self.assertEqual(format('x = %d!', 1234.0000625), 'x = 1234!') self.assertEqual(format('x = %s%% %%%%', 1234), 'x = 1234% %%') self.assertEqual(format('x = %s!', '%% %s'), 'x = %% %s!') self.assertEqual(format('x = %s, y = %d', 12, 34), 'x = 12, y = 34') diff --git a/Lib/test/test_pprint.py b/Lib/test/test_pprint.py index 0c84d3d3bfd17a..41c337ade7eca1 100644 --- a/Lib/test/test_pprint.py +++ b/Lib/test/test_pprint.py @@ -458,7 +458,7 @@ def __new__(cls, celsius_degrees): return super().__new__(Temperature, celsius_degrees) def __repr__(self): kelvin_degrees = self + 273.15 - return f"{kelvin_degrees}°K" + return f"{kelvin_degrees:.2f}°K" self.assertEqual(pprint.pformat(Temperature(1000)), '1273.15°K') def test_sorted_dict(self): diff --git a/Lib/test/test_reprlib.py b/Lib/test/test_reprlib.py index d5631efcdb75b7..22a55b57c076eb 100644 --- a/Lib/test/test_reprlib.py +++ b/Lib/test/test_reprlib.py @@ -397,20 +397,20 @@ def test_valid_indent(self): 'object': { 1: 'two', b'three': [ - (4.5, 6.7), + (4.5, 6.25), [set((8, 9)), frozenset((10, 11))], ], }, 'tests': ( (dict(indent=None), '''\ - {1: 'two', b'three': [(4.5, 6.7), [{8, 9}, frozenset({10, 11})]]}'''), + {1: 'two', b'three': [(4.5, 6.25), [{8, 9}, frozenset({10, 11})]]}'''), (dict(indent=False), '''\ { 1: 'two', b'three': [ ( 4.5, - 6.7, + 6.25, ), [ { @@ -430,7 +430,7 @@ def test_valid_indent(self): b'three': [ ( 4.5, - 6.7, + 6.25, ), [ { @@ -450,7 +450,7 @@ def test_valid_indent(self): b'three': [ ( 4.5, - 6.7, + 6.25, ), [ { @@ -470,7 +470,7 @@ def test_valid_indent(self): b'three': [ ( 4.5, - 6.7, + 6.25, ), [ { @@ -490,7 +490,7 @@ def test_valid_indent(self): b'three': [ ( 4.5, - 6.7, + 6.25, ), [ { @@ -518,7 +518,7 @@ def test_valid_indent(self): b'three': [ ( 4.5, - 6.7, + 6.25, ), [ { @@ -538,7 +538,7 @@ def test_valid_indent(self): -->b'three': [ -->-->( -->-->-->4.5, - -->-->-->6.7, + -->-->-->6.25, -->-->), -->-->[ -->-->-->{ @@ -558,7 +558,7 @@ def test_valid_indent(self): ....b'three': [ ........( ............4.5, - ............6.7, + ............6.25, ........), ........[ ............{ diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py index 0dd619dd7c8ceb..8250b0aef09aec 100644 --- a/Lib/test/test_statistics.py +++ b/Lib/test/test_statistics.py @@ -3319,7 +3319,8 @@ def tearDown(self): def load_tests(loader, tests, ignore): """Used for doctest/unittest integration.""" tests.addTests(doctest.DocTestSuite()) - tests.addTests(doctest.DocTestSuite(statistics)) + if sys.float_repr_style == 'short': + tests.addTests(doctest.DocTestSuite(statistics)) return tests diff --git a/Lib/test/test_str.py b/Lib/test/test_str.py index d6a7bd0da59910..2584fbf72d3fa6 100644 --- a/Lib/test/test_str.py +++ b/Lib/test/test_str.py @@ -1231,10 +1231,10 @@ def __repr__(self): self.assertEqual('{0:\x00^6}'.format(3), '\x00\x003\x00\x00\x00') self.assertEqual('{0:<6}'.format(3), '3 ') - self.assertEqual('{0:\x00<6}'.format(3.14), '3.14\x00\x00') - self.assertEqual('{0:\x01<6}'.format(3.14), '3.14\x01\x01') - self.assertEqual('{0:\x00^6}'.format(3.14), '\x003.14\x00') - self.assertEqual('{0:^6}'.format(3.14), ' 3.14 ') + self.assertEqual('{0:\x00<6}'.format(3.25), '3.25\x00\x00') + self.assertEqual('{0:\x01<6}'.format(3.25), '3.25\x01\x01') + self.assertEqual('{0:\x00^6}'.format(3.25), '\x003.25\x00') + self.assertEqual('{0:^6}'.format(3.25), ' 3.25 ') self.assertEqual('{0:\x00<12}'.format(3+2.0j), '(3+2j)\x00\x00\x00\x00\x00\x00') self.assertEqual('{0:\x01<12}'.format(3+2.0j), '(3+2j)\x01\x01\x01\x01\x01\x01') diff --git a/Lib/test/test_types.py b/Lib/test/test_types.py index a117413301bebe..02592ea5eb21a1 100644 --- a/Lib/test/test_types.py +++ b/Lib/test/test_types.py @@ -517,8 +517,8 @@ def test(f, format_spec, result): # and a number after the decimal. This is tricky, because # a totally empty format specifier means something else. # So, just use a sign flag - test(1e200, '+g', '+1e+200') - test(1e200, '+', '+1e+200') + test(1.25e200, '+g', '+1.25e+200') + test(1.25e200, '+', '+1.25e+200') test(1.1e200, '+g', '+1.1e+200') test(1.1e200, '+', '+1.1e+200') From 5ee1779887ab1e27d2818f365cc3f3b4dea5dfba Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 27 Jun 2025 14:35:55 +0300 Subject: [PATCH 066/212] gh-78465: Fix error message for cls.__new__(cls, ...) where cls is not instantiable (GH-135981) Previous error message suggested to use cls.__new__(), which obviously does not work. Now the error message is the same as for cls(...). --- Lib/test/support/__init__.py | 1 + Lib/test/test_sys.py | 7 +------ Lib/test/test_types.py | 5 ++--- .../2025-06-26-15-25-51.gh-issue-78465.MbDN8X.rst | 2 ++ Objects/typeobject.c | 5 +++++ 5 files changed, 11 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-26-15-25-51.gh-issue-78465.MbDN8X.rst diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 51c0ce11e8269d..fd39d3f7c95368 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -2333,6 +2333,7 @@ def check_disallow_instantiation(testcase, tp, *args, **kwds): qualname = f"{name}" msg = f"cannot create '{re.escape(qualname)}' instances" testcase.assertRaisesRegex(TypeError, msg, tp, *args, **kwds) + testcase.assertRaisesRegex(TypeError, msg, tp.__new__, tp, *args, **kwds) def get_recursion_depth(): """Get the recursion depth of the caller function. diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 27524d86355b9c..486bf10a0b5647 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -869,12 +869,7 @@ def test_sys_flags(self): def assert_raise_on_new_sys_type(self, sys_attr): # Users are intentionally prevented from creating new instances of # sys.flags, sys.version_info, and sys.getwindowsversion. - arg = sys_attr - attr_type = type(sys_attr) - with self.assertRaises(TypeError): - attr_type(arg) - with self.assertRaises(TypeError): - attr_type.__new__(attr_type, arg) + support.check_disallow_instantiation(self, type(sys_attr), sys_attr) def test_sys_flags_no_instantiation(self): self.assert_raise_on_new_sys_type(sys.flags) diff --git a/Lib/test/test_types.py b/Lib/test/test_types.py index 02592ea5eb21a1..fc26e71ffcb67b 100644 --- a/Lib/test/test_types.py +++ b/Lib/test/test_types.py @@ -2,7 +2,7 @@ from test.support import ( run_with_locale, cpython_only, no_rerun, - MISSING_C_DOCSTRINGS, EqualToForwardRef, + MISSING_C_DOCSTRINGS, EqualToForwardRef, check_disallow_instantiation, ) from test.support.script_helper import assert_python_ok from test.support.import_helper import import_fresh_module @@ -1148,8 +1148,7 @@ def test_or_type_operator_reference_cycle(self): msg='Check for union reference leak.') def test_instantiation(self): - with self.assertRaises(TypeError): - types.UnionType() + check_disallow_instantiation(self, types.UnionType) self.assertIs(int, types.UnionType[int]) self.assertIs(int, types.UnionType[int, int]) self.assertEqual(int | str, types.UnionType[int, str]) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-26-15-25-51.gh-issue-78465.MbDN8X.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-26-15-25-51.gh-issue-78465.MbDN8X.rst new file mode 100644 index 00000000000000..99734d63c5d87e --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-26-15-25-51.gh-issue-78465.MbDN8X.rst @@ -0,0 +1,2 @@ +Fix error message for ``cls.__new__(cls, ...)`` where ``cls`` is not +instantiable builtin or extension type (with ``tp_new`` set to ``NULL``). diff --git a/Objects/typeobject.c b/Objects/typeobject.c index b9d549610693c1..6e7471cb5941a7 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -10020,6 +10020,11 @@ tp_new_wrapper(PyObject *self, PyObject *args, PyObject *kwds) /* If staticbase is NULL now, it is a really weird type. In the spirit of backwards compatibility (?), just shut up. */ if (staticbase && staticbase->tp_new != type->tp_new) { + if (staticbase->tp_new == NULL) { + PyErr_Format(PyExc_TypeError, + "cannot create '%s' instances", subtype->tp_name); + return NULL; + } PyErr_Format(PyExc_TypeError, "%s.__new__(%s) is not safe, use %s.__new__()", type->tp_name, From a6a656ff9412cde028a9fe65ed216042fde5b0d1 Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Fri, 27 Jun 2025 19:37:44 +0800 Subject: [PATCH 067/212] gh-132732: Automatically constant evaluate pure operations (GH-132733) This adds a "macro" to the optimizer DSL called "REPLACE_OPCODE_IF_EVALUATES_PURE", which allows automatically constant evaluating a bytecode body if certain inputs have no side effects upon evaluations (such as ints, strings, and floats). Co-authored-by: Tomas R. --- Include/internal/pycore_optimizer.h | 5 +- Include/internal/pycore_uop_metadata.h | 2 +- Lib/test/test_generated_cases.py | 197 ++++++++++++ ...-04-19-16-22-47.gh-issue-132732.jgqhlF.rst | 1 + Python/bytecodes.c | 2 +- Python/optimizer_analysis.c | 7 + Python/optimizer_bytecodes.c | 77 +---- Python/optimizer_cases.c.h | 304 ++++++++++++++---- Python/optimizer_symbols.c | 39 +++ Tools/cases_generator/optimizer_generator.py | 194 ++++++++++- 10 files changed, 706 insertions(+), 122 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-04-19-16-22-47.gh-issue-132732.jgqhlF.rst diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 576c27947824b4..8b7f12bf03d624 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -10,7 +10,7 @@ extern "C" { #include "pycore_typedefs.h" // _PyInterpreterFrame #include "pycore_uop_ids.h" -#include "pycore_stackref.h" +#include "pycore_stackref.h" // _PyStackRef #include @@ -316,6 +316,9 @@ extern JitOptRef _Py_uop_sym_new_type( JitOptContext *ctx, PyTypeObject *typ); extern JitOptRef _Py_uop_sym_new_const(JitOptContext *ctx, PyObject *const_val); +extern JitOptRef _Py_uop_sym_new_const_steal(JitOptContext *ctx, PyObject *const_val); +bool _Py_uop_sym_is_safe_const(JitOptContext *ctx, JitOptRef sym); +_PyStackRef _Py_uop_sym_get_const_as_stackref(JitOptContext *ctx, JitOptRef sym); extern JitOptRef _Py_uop_sym_new_null(JitOptContext *ctx); extern bool _Py_uop_sym_has_type(JitOptRef sym); extern bool _Py_uop_sym_matches_type(JitOptRef sym, PyTypeObject *typ); diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 52cbc2fffe484e..ff7e800aa9bb1a 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -106,7 +106,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_BINARY_OP_ADD_UNICODE] = HAS_ERROR_FLAG | HAS_PURE_FLAG, [_BINARY_OP_INPLACE_ADD_UNICODE] = HAS_LOCAL_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_GUARD_BINARY_OP_EXTEND] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, - [_BINARY_OP_EXTEND] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_BINARY_OP_EXTEND] = HAS_ESCAPES_FLAG, [_BINARY_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_STORE_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_BINARY_OP_SUBSCR_LIST_INT] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, diff --git a/Lib/test/test_generated_cases.py b/Lib/test/test_generated_cases.py index 9e0fd1218f2534..eb01328b6ea946 100644 --- a/Lib/test/test_generated_cases.py +++ b/Lib/test/test_generated_cases.py @@ -2224,5 +2224,202 @@ def test_validate_uop_unused_size_mismatch(self): "Inputs must have equal sizes"): self.run_cases_test(input, input2, output) + def test_pure_uop_body_copied_in(self): + # Note: any non-escaping call works. + # In this case, we use PyStackRef_IsNone. + input = """ + pure op(OP, (foo -- res)) { + res = PyStackRef_IsNone(foo); + } + """ + input2 = """ + op(OP, (foo -- res)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(foo); + res = sym_new_known(ctx, foo); + } + """ + output = """ + case OP: { + JitOptRef foo; + JitOptRef res; + foo = stack_pointer[-1]; + if ( + sym_is_safe_const(ctx, foo) + ) { + JitOptRef foo_sym = foo; + _PyStackRef foo = sym_get_const_as_stackref(ctx, foo_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + res_stackref = PyStackRef_IsNone(foo); + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + stack_pointer[-1] = res; + break; + } + res = sym_new_known(ctx, foo); + stack_pointer[-1] = res; + break; + } + """ + self.run_cases_test(input, input2, output) + + def test_pure_uop_body_copied_in_deopt(self): + # Note: any non-escaping call works. + # In this case, we use PyStackRef_IsNone. + input = """ + pure op(OP, (foo -- res)) { + DEOPT_IF(PyStackRef_IsNull(foo)); + res = foo; + } + """ + input2 = """ + op(OP, (foo -- res)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(foo); + res = foo; + } + """ + output = """ + case OP: { + JitOptRef foo; + JitOptRef res; + foo = stack_pointer[-1]; + if ( + sym_is_safe_const(ctx, foo) + ) { + JitOptRef foo_sym = foo; + _PyStackRef foo = sym_get_const_as_stackref(ctx, foo_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + if (PyStackRef_IsNull(foo)) { + ctx->done = true; + break; + } + res_stackref = foo; + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + stack_pointer[-1] = res; + break; + } + res = foo; + stack_pointer[-1] = res; + break; + } + """ + self.run_cases_test(input, input2, output) + + def test_pure_uop_body_copied_in_error_if(self): + # Note: any non-escaping call works. + # In this case, we use PyStackRef_IsNone. + input = """ + pure op(OP, (foo -- res)) { + ERROR_IF(PyStackRef_IsNull(foo)); + res = foo; + } + """ + input2 = """ + op(OP, (foo -- res)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(foo); + res = foo; + } + """ + output = """ + case OP: { + JitOptRef foo; + JitOptRef res; + foo = stack_pointer[-1]; + if ( + sym_is_safe_const(ctx, foo) + ) { + JitOptRef foo_sym = foo; + _PyStackRef foo = sym_get_const_as_stackref(ctx, foo_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + if (PyStackRef_IsNull(foo)) { + goto error; + } + res_stackref = foo; + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + stack_pointer[-1] = res; + break; + } + res = foo; + stack_pointer[-1] = res; + break; + } + """ + self.run_cases_test(input, input2, output) + + + def test_replace_opcode_uop_body_copied_in_complex(self): + input = """ + pure op(OP, (foo -- res)) { + if (foo) { + res = PyStackRef_IsNone(foo); + } + else { + res = 1; + } + } + """ + input2 = """ + op(OP, (foo -- res)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(foo); + res = sym_new_known(ctx, foo); + } + """ + output = """ + case OP: { + JitOptRef foo; + JitOptRef res; + foo = stack_pointer[-1]; + if ( + sym_is_safe_const(ctx, foo) + ) { + JitOptRef foo_sym = foo; + _PyStackRef foo = sym_get_const_as_stackref(ctx, foo_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + if (foo) { + res_stackref = PyStackRef_IsNone(foo); + } + else { + res_stackref = 1; + } + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + stack_pointer[-1] = res; + break; + } + res = sym_new_known(ctx, foo); + stack_pointer[-1] = res; + break; + } + """ + self.run_cases_test(input, input2, output) + + def test_replace_opocode_uop_reject_array_effects(self): + input = """ + pure op(OP, (foo[2] -- res)) { + if (foo) { + res = PyStackRef_IsNone(foo); + } + else { + res = 1; + } + } + """ + input2 = """ + op(OP, (foo[2] -- res)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(foo); + res = sym_new_unknown(ctx); + } + """ + output = """ + """ + with self.assertRaisesRegex(SyntaxError, + "Pure evaluation cannot take array-like inputs"): + self.run_cases_test(input, input2, output) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-04-19-16-22-47.gh-issue-132732.jgqhlF.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-04-19-16-22-47.gh-issue-132732.jgqhlF.rst new file mode 100644 index 00000000000000..aadaf2169fd01a --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-04-19-16-22-47.gh-issue-132732.jgqhlF.rst @@ -0,0 +1 @@ +Automatically constant evaluate bytecode operations marked as pure in the JIT optimizer. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 535e552e047475..1a5a9ff13a23a5 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -850,7 +850,7 @@ dummy_func( DEOPT_IF(!res); } - pure op(_BINARY_OP_EXTEND, (descr/4, left, right -- res)) { + op(_BINARY_OP_EXTEND, (descr/4, left, right -- res)) { PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(INLINE_CACHE_ENTRIES_BINARY_OP == 5); diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 145a8c118d3612..fab6fef5ccda10 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -26,6 +26,8 @@ #include "pycore_function.h" #include "pycore_uop_ids.h" #include "pycore_range.h" +#include "pycore_unicodeobject.h" +#include "pycore_ceval.h" #include #include @@ -321,7 +323,10 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, /* Shortened forms for convenience, used in optimizer_bytecodes.c */ #define sym_is_not_null _Py_uop_sym_is_not_null #define sym_is_const _Py_uop_sym_is_const +#define sym_is_safe_const _Py_uop_sym_is_safe_const #define sym_get_const _Py_uop_sym_get_const +#define sym_new_const_steal _Py_uop_sym_new_const_steal +#define sym_get_const_as_stackref _Py_uop_sym_get_const_as_stackref #define sym_new_unknown _Py_uop_sym_new_unknown #define sym_new_not_null _Py_uop_sym_new_not_null #define sym_new_type _Py_uop_sym_new_type @@ -350,6 +355,8 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, #define sym_new_compact_int _Py_uop_sym_new_compact_int #define sym_new_truthiness _Py_uop_sym_new_truthiness +#define JUMP_TO_LABEL(label) goto label; + static int optimize_to_bool( _PyUOpInstruction *this_instr, diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index f8a0484bdc2b04..3182e8b3b70144 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -181,6 +181,7 @@ dummy_func(void) { } op(_BINARY_OP, (lhs, rhs -- res)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(lhs, rhs); bool lhs_int = sym_matches_type(lhs, &PyLong_Type); bool rhs_int = sym_matches_type(rhs, &PyLong_Type); bool lhs_float = sym_matches_type(lhs, &PyFloat_Type); @@ -235,35 +236,23 @@ dummy_func(void) { } op(_BINARY_OP_ADD_INT, (left, right -- res)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(left, right); res = sym_new_compact_int(ctx); } op(_BINARY_OP_SUBTRACT_INT, (left, right -- res)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(left, right); res = sym_new_compact_int(ctx); } op(_BINARY_OP_MULTIPLY_INT, (left, right -- res)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(left, right); res = sym_new_compact_int(ctx); } op(_BINARY_OP_ADD_FLOAT, (left, right -- res)) { - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyFloat_CheckExact(sym_get_const(ctx, left))); - assert(PyFloat_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = PyFloat_FromDouble( - PyFloat_AS_DOUBLE(sym_get_const(ctx, left)) + - PyFloat_AS_DOUBLE(sym_get_const(ctx, right))); - if (temp == NULL) { - goto error; - } - res = sym_new_const(ctx, temp); - Py_DECREF(temp); - // TODO gh-115506: - // replace opcode with constant propagated one and update tests! - } - else { - res = sym_new_type(ctx, &PyFloat_Type); - } + REPLACE_OPCODE_IF_EVALUATES_PURE(left, right); + res = sym_new_type(ctx, &PyFloat_Type); // TODO (gh-134584): Refactor this to use another uop if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) { REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0); @@ -271,23 +260,8 @@ dummy_func(void) { } op(_BINARY_OP_SUBTRACT_FLOAT, (left, right -- res)) { - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyFloat_CheckExact(sym_get_const(ctx, left))); - assert(PyFloat_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = PyFloat_FromDouble( - PyFloat_AS_DOUBLE(sym_get_const(ctx, left)) - - PyFloat_AS_DOUBLE(sym_get_const(ctx, right))); - if (temp == NULL) { - goto error; - } - res = sym_new_const(ctx, temp); - Py_DECREF(temp); - // TODO gh-115506: - // replace opcode with constant propagated one and update tests! - } - else { - res = sym_new_type(ctx, &PyFloat_Type); - } + REPLACE_OPCODE_IF_EVALUATES_PURE(left, right); + res = sym_new_type(ctx, &PyFloat_Type); // TODO (gh-134584): Refactor this to use another uop if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) { REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0); @@ -295,23 +269,8 @@ dummy_func(void) { } op(_BINARY_OP_MULTIPLY_FLOAT, (left, right -- res)) { - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyFloat_CheckExact(sym_get_const(ctx, left))); - assert(PyFloat_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = PyFloat_FromDouble( - PyFloat_AS_DOUBLE(sym_get_const(ctx, left)) * - PyFloat_AS_DOUBLE(sym_get_const(ctx, right))); - if (temp == NULL) { - goto error; - } - res = sym_new_const(ctx, temp); - Py_DECREF(temp); - // TODO gh-115506: - // replace opcode with constant propagated one and update tests! - } - else { - res = sym_new_type(ctx, &PyFloat_Type); - } + REPLACE_OPCODE_IF_EVALUATES_PURE(left, right); + res = sym_new_type(ctx, &PyFloat_Type); // TODO (gh-134584): Refactor this to use another uop if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) { REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0); @@ -319,19 +278,8 @@ dummy_func(void) { } op(_BINARY_OP_ADD_UNICODE, (left, right -- res)) { - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyUnicode_CheckExact(sym_get_const(ctx, left))); - assert(PyUnicode_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = PyUnicode_Concat(sym_get_const(ctx, left), sym_get_const(ctx, right)); - if (temp == NULL) { - goto error; - } - res = sym_new_const(ctx, temp); - Py_DECREF(temp); - } - else { - res = sym_new_type(ctx, &PyUnicode_Type); - } + REPLACE_OPCODE_IF_EVALUATES_PURE(left, right); + res = sym_new_type(ctx, &PyUnicode_Type); } op(_BINARY_OP_INPLACE_ADD_UNICODE, (left, right -- )) { @@ -443,6 +391,7 @@ dummy_func(void) { } op(_UNARY_NOT, (value -- res)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(value); sym_set_type(value, &PyBool_Type); res = sym_new_truthiness(ctx, value, false); } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 10767ccdbd57f5..8d30df3aa7d429 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -206,6 +206,21 @@ JitOptRef value; JitOptRef res; value = stack_pointer[-1]; + if ( + sym_is_safe_const(ctx, value) + ) { + JitOptRef value_sym = value; + _PyStackRef value = sym_get_const_as_stackref(ctx, value_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + assert(PyStackRef_BoolCheck(value)); + res_stackref = PyStackRef_IsFalse(value) + ? PyStackRef_True : PyStackRef_False; + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + stack_pointer[-1] = res; + break; + } sym_set_type(value, &PyBool_Type); res = sym_new_truthiness(ctx, value, false); stack_pointer[-1] = res; @@ -391,7 +406,41 @@ } case _BINARY_OP_MULTIPLY_INT: { + JitOptRef right; + JitOptRef left; JitOptRef res; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + if ( + sym_is_safe_const(ctx, left) && + sym_is_safe_const(ctx, right) + ) { + JitOptRef left_sym = left; + JitOptRef right_sym = right; + _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym); + _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyLong_CheckExact(left_o)); + assert(PyLong_CheckExact(right_o)); + assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); + STAT_INC(BINARY_OP, hit); + res_stackref = _PyCompactLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); + if (PyStackRef_IsNull(res_stackref )) { + ctx->done = true; + break; + } + PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } res = sym_new_compact_int(ctx); stack_pointer[-2] = res; stack_pointer += -1; @@ -400,7 +449,41 @@ } case _BINARY_OP_ADD_INT: { + JitOptRef right; + JitOptRef left; JitOptRef res; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + if ( + sym_is_safe_const(ctx, left) && + sym_is_safe_const(ctx, right) + ) { + JitOptRef left_sym = left; + JitOptRef right_sym = right; + _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym); + _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyLong_CheckExact(left_o)); + assert(PyLong_CheckExact(right_o)); + assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); + STAT_INC(BINARY_OP, hit); + res_stackref = _PyCompactLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); + if (PyStackRef_IsNull(res_stackref )) { + ctx->done = true; + break; + } + PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } res = sym_new_compact_int(ctx); stack_pointer[-2] = res; stack_pointer += -1; @@ -409,7 +492,41 @@ } case _BINARY_OP_SUBTRACT_INT: { + JitOptRef right; + JitOptRef left; JitOptRef res; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + if ( + sym_is_safe_const(ctx, left) && + sym_is_safe_const(ctx, right) + ) { + JitOptRef left_sym = left; + JitOptRef right_sym = right; + _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym); + _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyLong_CheckExact(left_o)); + assert(PyLong_CheckExact(right_o)); + assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); + STAT_INC(BINARY_OP, hit); + res_stackref = _PyCompactLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); + if (PyStackRef_IsNull(res_stackref )) { + ctx->done = true; + break; + } + PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } res = sym_new_compact_int(ctx); stack_pointer[-2] = res; stack_pointer += -1; @@ -443,29 +560,42 @@ JitOptRef res; right = stack_pointer[-1]; left = stack_pointer[-2]; - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyFloat_CheckExact(sym_get_const(ctx, left))); - assert(PyFloat_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = PyFloat_FromDouble( - PyFloat_AS_DOUBLE(sym_get_const(ctx, left)) * - PyFloat_AS_DOUBLE(sym_get_const(ctx, right))); - if (temp == NULL) { + if ( + sym_is_safe_const(ctx, left) && + sym_is_safe_const(ctx, right) + ) { + JitOptRef left_sym = left; + JitOptRef right_sym = right; + _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym); + _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyFloat_CheckExact(left_o)); + assert(PyFloat_CheckExact(right_o)); + STAT_INC(BINARY_OP, hit); + double dres = + ((PyFloatObject *)left_o)->ob_fval * + ((PyFloatObject *)right_o)->ob_fval; + res_stackref = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + if (PyStackRef_IsNull(res_stackref )) { goto error; } - res = sym_new_const(ctx, temp); + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); - Py_DECREF(temp); - } - else { - res = sym_new_type(ctx, &PyFloat_Type); - stack_pointer += -1; + break; } + res = sym_new_type(ctx, &PyFloat_Type); if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) { REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0); } - stack_pointer[-1] = res; + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); break; } @@ -475,29 +605,42 @@ JitOptRef res; right = stack_pointer[-1]; left = stack_pointer[-2]; - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyFloat_CheckExact(sym_get_const(ctx, left))); - assert(PyFloat_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = PyFloat_FromDouble( - PyFloat_AS_DOUBLE(sym_get_const(ctx, left)) + - PyFloat_AS_DOUBLE(sym_get_const(ctx, right))); - if (temp == NULL) { + if ( + sym_is_safe_const(ctx, left) && + sym_is_safe_const(ctx, right) + ) { + JitOptRef left_sym = left; + JitOptRef right_sym = right; + _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym); + _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyFloat_CheckExact(left_o)); + assert(PyFloat_CheckExact(right_o)); + STAT_INC(BINARY_OP, hit); + double dres = + ((PyFloatObject *)left_o)->ob_fval + + ((PyFloatObject *)right_o)->ob_fval; + res_stackref = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + if (PyStackRef_IsNull(res_stackref )) { goto error; } - res = sym_new_const(ctx, temp); + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); - Py_DECREF(temp); - } - else { - res = sym_new_type(ctx, &PyFloat_Type); - stack_pointer += -1; + break; } + res = sym_new_type(ctx, &PyFloat_Type); if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) { REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0); } - stack_pointer[-1] = res; + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); break; } @@ -507,29 +650,42 @@ JitOptRef res; right = stack_pointer[-1]; left = stack_pointer[-2]; - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyFloat_CheckExact(sym_get_const(ctx, left))); - assert(PyFloat_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = PyFloat_FromDouble( - PyFloat_AS_DOUBLE(sym_get_const(ctx, left)) - - PyFloat_AS_DOUBLE(sym_get_const(ctx, right))); - if (temp == NULL) { + if ( + sym_is_safe_const(ctx, left) && + sym_is_safe_const(ctx, right) + ) { + JitOptRef left_sym = left; + JitOptRef right_sym = right; + _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym); + _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyFloat_CheckExact(left_o)); + assert(PyFloat_CheckExact(right_o)); + STAT_INC(BINARY_OP, hit); + double dres = + ((PyFloatObject *)left_o)->ob_fval - + ((PyFloatObject *)right_o)->ob_fval; + res_stackref = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + if (PyStackRef_IsNull(res_stackref )) { goto error; } - res = sym_new_const(ctx, temp); + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); - Py_DECREF(temp); - } - else { - res = sym_new_type(ctx, &PyFloat_Type); - stack_pointer += -1; + break; } + res = sym_new_type(ctx, &PyFloat_Type); if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) { REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0); } - stack_pointer[-1] = res; + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); break; } @@ -566,24 +722,39 @@ JitOptRef res; right = stack_pointer[-1]; left = stack_pointer[-2]; - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyUnicode_CheckExact(sym_get_const(ctx, left))); - assert(PyUnicode_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = PyUnicode_Concat(sym_get_const(ctx, left), sym_get_const(ctx, right)); - if (temp == NULL) { + if ( + sym_is_safe_const(ctx, left) && + sym_is_safe_const(ctx, right) + ) { + JitOptRef left_sym = left; + JitOptRef right_sym = right; + _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym); + _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyUnicode_CheckExact(left_o)); + assert(PyUnicode_CheckExact(right_o)); + STAT_INC(BINARY_OP, hit); + PyObject *res_o = PyUnicode_Concat(left_o, right_o); + PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); + if (res_o == NULL) { goto error; } - res = sym_new_const(ctx, temp); + res_stackref = PyStackRef_FromPyObjectSteal(res_o); + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); - Py_DECREF(temp); - } - else { - res = sym_new_type(ctx, &PyUnicode_Type); - stack_pointer += -1; + break; } - stack_pointer[-1] = res; + res = sym_new_type(ctx, &PyUnicode_Type); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); break; } @@ -2539,6 +2710,31 @@ JitOptRef res; rhs = stack_pointer[-1]; lhs = stack_pointer[-2]; + if ( + sym_is_safe_const(ctx, lhs) && + sym_is_safe_const(ctx, rhs) + ) { + JitOptRef lhs_sym = lhs; + JitOptRef rhs_sym = rhs; + _PyStackRef lhs = sym_get_const_as_stackref(ctx, lhs_sym); + _PyStackRef rhs = sym_get_const_as_stackref(ctx, rhs_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *lhs_o = PyStackRef_AsPyObjectBorrow(lhs); + PyObject *rhs_o = PyStackRef_AsPyObjectBorrow(rhs); + assert(_PyEval_BinaryOps[oparg]); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + PyObject *res_o = _PyEval_BinaryOps[oparg](lhs_o, rhs_o); + if (res_o == NULL) { + JUMP_TO_LABEL(error); + } + res_stackref = PyStackRef_FromPyObjectSteal(res_o); + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + break; + } bool lhs_int = sym_matches_type(lhs, &PyLong_Type); bool rhs_int = sym_matches_type(rhs, &PyLong_Type); bool lhs_float = sym_matches_type(lhs, &PyFloat_Type); diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index c3d9e0e778bf55..e4dbca8362f4ce 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -185,6 +185,35 @@ _Py_uop_sym_get_const(JitOptContext *ctx, JitOptRef ref) return NULL; } +_PyStackRef +_Py_uop_sym_get_const_as_stackref(JitOptContext *ctx, JitOptRef sym) +{ + PyObject *const_val = _Py_uop_sym_get_const(ctx, sym); + if (const_val == NULL) { + return PyStackRef_NULL; + } + return PyStackRef_FromPyObjectBorrow(const_val); +} + +/* + Indicates whether the constant is safe to constant evaluate + (without side effects). + */ +bool +_Py_uop_sym_is_safe_const(JitOptContext *ctx, JitOptRef sym) +{ + PyObject *const_val = _Py_uop_sym_get_const(ctx, sym); + if (const_val == NULL) { + return false; + } + PyTypeObject *typ = Py_TYPE(const_val); + return (typ == &PyLong_Type) || + (typ == &PyUnicode_Type) || + (typ == &PyFloat_Type) || + (typ == &PyTuple_Type) || + (typ == &PyBool_Type); +} + void _Py_uop_sym_set_type(JitOptContext *ctx, JitOptRef ref, PyTypeObject *typ) { @@ -467,6 +496,16 @@ _Py_uop_sym_new_const(JitOptContext *ctx, PyObject *const_val) return ref; } +JitOptRef +_Py_uop_sym_new_const_steal(JitOptContext *ctx, PyObject *const_val) +{ + assert(const_val != NULL); + JitOptRef res = _Py_uop_sym_new_const(ctx, const_val); + // Decref once because sym_new_const increfs it. + Py_DECREF(const_val); + return res; +} + JitOptRef _Py_uop_sym_new_null(JitOptContext *ctx) { diff --git a/Tools/cases_generator/optimizer_generator.py b/Tools/cases_generator/optimizer_generator.py index 3b4fe64b02a807..4556b6d5a74f37 100644 --- a/Tools/cases_generator/optimizer_generator.py +++ b/Tools/cases_generator/optimizer_generator.py @@ -12,6 +12,8 @@ analyze_files, StackItem, analysis_error, + CodeSection, + Label, ) from generators_common import ( DEFAULT_INPUT, @@ -19,6 +21,7 @@ write_header, Emitter, TokenIterator, + always_true, ) from cwriter import CWriter from typing import TextIO @@ -75,6 +78,9 @@ def type_name(var: StackItem) -> str: return "JitOptRef *" return "JitOptRef " +def stackref_type_name(var: StackItem) -> str: + assert not var.is_array(), "Unsafe to convert a symbol to an array-like StackRef." + return "_PyStackRef " def declare_variables(uop: Uop, out: CWriter, skip_inputs: bool) -> None: variables = {"unused"} @@ -135,6 +141,12 @@ def emit_default(out: CWriter, uop: Uop, stack: Stack) -> None: class OptimizerEmitter(Emitter): + def __init__(self, out: CWriter, labels: dict[str, Label], original_uop: Uop, stack: Stack): + super().__init__(out, labels) + self._replacers["REPLACE_OPCODE_IF_EVALUATES_PURE"] = self.replace_opcode_if_evaluates_pure + self.original_uop = original_uop + self.stack = stack + def emit_save(self, storage: Storage) -> None: storage.flush(self.out) @@ -145,6 +157,185 @@ def goto_label(self, goto: Token, label: Token, storage: Storage) -> None: self.out.emit(goto) self.out.emit(label) + def replace_opcode_if_evaluates_pure( + self, + tkn: Token, + tkn_iter: TokenIterator, + uop: CodeSection, + storage: Storage, + inst: Instruction | None, + ) -> bool: + assert isinstance(uop, Uop) + input_identifiers = [] + for token in tkn_iter: + if token.kind == "IDENTIFIER": + input_identifiers.append(token) + if token.kind == "SEMI": + break + + if len(input_identifiers) == 0: + raise analysis_error( + "To evaluate an operation as pure, it must have at least 1 input", + tkn + ) + # Check that the input identifiers belong to the uop's + # input stack effect + uop_stack_effect_input_identifers = {inp.name for inp in uop.stack.inputs} + for input_tkn in input_identifiers: + if input_tkn.text not in uop_stack_effect_input_identifers: + raise analysis_error(f"{input_tkn.text} referenced in " + f"REPLACE_OPCODE_IF_EVALUATES_PURE but does not " + f"exist in the base uop's input stack effects", + input_tkn) + input_identifiers_as_str = {tkn.text for tkn in input_identifiers} + used_stack_inputs = [inp for inp in uop.stack.inputs if inp.name in input_identifiers_as_str] + assert len(used_stack_inputs) > 0 + emitter = OptimizerConstantEmitter(self.out, {}, self.original_uop, self.stack.copy()) + emitter.emit("if (\n") + for inp in used_stack_inputs[:-1]: + emitter.emit(f"sym_is_safe_const(ctx, {inp.name}) &&\n") + emitter.emit(f"sym_is_safe_const(ctx, {used_stack_inputs[-1].name})\n") + emitter.emit(') {\n') + # Declare variables, before they are shadowed. + for inp in used_stack_inputs: + if inp.used: + emitter.emit(f"{type_name(inp)}{inp.name}_sym = {inp.name};\n") + # Shadow the symbolic variables with stackrefs. + for inp in used_stack_inputs: + if inp.is_array(): + raise analysis_error("Pure evaluation cannot take array-like inputs.", tkn) + if inp.used: + emitter.emit(f"{stackref_type_name(inp)}{inp.name} = sym_get_const_as_stackref(ctx, {inp.name}_sym);\n") + # Rename all output variables to stackref variant. + for outp in self.original_uop.stack.outputs: + if outp.is_array(): + raise analysis_error( + "Array output StackRefs not supported for evaluating pure ops.", + self.original_uop.body.open + ) + emitter.emit(f"_PyStackRef {outp.name}_stackref;\n") + + + storage = Storage.for_uop(self.stack, self.original_uop, CWriter.null(), check_liveness=False) + # No reference management of outputs needed. + for var in storage.outputs: + var.in_local = True + emitter.emit("/* Start of uop copied from bytecodes for constant evaluation */\n") + emitter.emit_tokens(self.original_uop, storage, inst=None, emit_braces=False) + self.out.start_line() + emitter.emit("/* End of uop copied from bytecodes for constant evaluation */\n") + # Finally, assign back the output stackrefs to symbolics. + for outp in self.original_uop.stack.outputs: + # All new stackrefs are created from new references. + # That's how the stackref contract works. + if not outp.peek: + emitter.emit(f"{outp.name} = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal({outp.name}_stackref));\n") + else: + emitter.emit(f"{outp.name} = sym_new_const(ctx, PyStackRef_AsPyObjectBorrow({outp.name}_stackref));\n") + storage.flush(self.out) + emitter.emit("break;\n") + emitter.emit("}\n") + return True + +class OptimizerConstantEmitter(OptimizerEmitter): + def __init__(self, out: CWriter, labels: dict[str, Label], original_uop: Uop, stack: Stack): + super().__init__(out, labels, original_uop, stack) + # Replace all outputs to point to their stackref versions. + overrides = { + outp.name: self.emit_stackref_override for outp in self.original_uop.stack.outputs + } + self._replacers = {**self._replacers, **overrides} + + def emit_to_with_replacement( + self, + out: CWriter, + tkn_iter: TokenIterator, + end: str, + uop: CodeSection, + storage: Storage, + inst: Instruction | None + ) -> Token: + parens = 0 + for tkn in tkn_iter: + if tkn.kind == end and parens == 0: + return tkn + if tkn.kind == "LPAREN": + parens += 1 + if tkn.kind == "RPAREN": + parens -= 1 + if tkn.text in self._replacers: + self._replacers[tkn.text](tkn, tkn_iter, uop, storage, inst) + else: + out.emit(tkn) + raise analysis_error(f"Expecting {end}. Reached end of file", tkn) + + def emit_stackref_override( + self, + tkn: Token, + tkn_iter: TokenIterator, + uop: CodeSection, + storage: Storage, + inst: Instruction | None, + ) -> bool: + self.out.emit(tkn) + self.out.emit("_stackref ") + return True + + def deopt_if( + self, + tkn: Token, + tkn_iter: TokenIterator, + uop: CodeSection, + storage: Storage, + inst: Instruction | None, + ) -> bool: + self.out.start_line() + self.out.emit("if (") + lparen = next(tkn_iter) + assert lparen.kind == "LPAREN" + first_tkn = tkn_iter.peek() + self.emit_to_with_replacement(self.out, tkn_iter, "RPAREN", uop, storage, inst) + self.emit(") {\n") + next(tkn_iter) # Semi colon + # We guarantee this will deopt in real-world code + # via constants analysis. So just bail. + self.emit("ctx->done = true;\n") + self.emit("break;\n") + self.emit("}\n") + return not always_true(first_tkn) + + exit_if = deopt_if + + def error_if( + self, + tkn: Token, + tkn_iter: TokenIterator, + uop: CodeSection, + storage: Storage, + inst: Instruction | None, + ) -> bool: + lparen = next(tkn_iter) + assert lparen.kind == "LPAREN" + first_tkn = tkn_iter.peek() + unconditional = always_true(first_tkn) + if unconditional: + next(tkn_iter) + next(tkn_iter) # RPAREN + self.out.start_line() + else: + self.out.emit_at("if ", tkn) + self.emit(lparen) + self.emit_to_with_replacement(self.out, tkn_iter, "RPAREN", uop, storage, inst) + self.out.emit(") {\n") + next(tkn_iter) # Semi colon + storage.clear_inputs("at ERROR_IF") + + self.out.emit("goto error;\n") + if not unconditional: + self.out.emit("}\n") + return not unconditional + + def write_uop( override: Uop | None, uop: Uop, @@ -175,13 +366,14 @@ def write_uop( cast = f"uint{cache.size*16}_t" out.emit(f"{type}{cache.name} = ({cast})this_instr->operand0;\n") if override: - emitter = OptimizerEmitter(out, {}) + emitter = OptimizerEmitter(out, {}, uop, stack.copy()) # No reference management of inputs needed. for var in storage.inputs: # type: ignore[possibly-undefined] var.in_local = False _, storage = emitter.emit_tokens(override, storage, None, False) out.start_line() storage.flush(out) + out.start_line() else: emit_default(out, uop, stack) out.start_line() From 9f48f74603c9b222488ab3ed35ae367658be778d Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Fri, 27 Jun 2025 16:23:33 +0300 Subject: [PATCH 068/212] gh-92266: Replace tabs with four spaces in Python files (#135983) --- .pre-commit-config.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 822a8a9f4e5076..86410c46d1d707 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -34,6 +34,13 @@ repos: name: Run Black on Tools/jit/ files: ^Tools/jit/ + - repo: https://github.com/Lucas-C/pre-commit-hooks + rev: v1.5.5 + hooks: + - id: remove-tabs + types: [python] + exclude: ^Tools/c-analyzer/cpython/_parser.py + - repo: https://github.com/pre-commit/pre-commit-hooks rev: v5.0.0 hooks: From 9a3af56cb5509ffd635748369eb806f352b3d7c4 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 27 Jun 2025 16:47:03 +0300 Subject: [PATCH 069/212] =?UTF-8?q?gh-136028:=20Fix=20parsing=20month=20na?= =?UTF-8?q?mes=20containing=20"=C4=B0"=20(U+0130)=20in=20strptime()=20(GH-?= =?UTF-8?q?136029)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This affects locales az_AZ, ber_DZ, ber_MA and crh_UA. --- Lib/_strptime.py | 12 ++++++++++-- Lib/test/test_strptime.py | 9 +++++++++ .../2025-06-27-13-34-28.gh-issue-136028.RY727g.rst | 3 +++ 3 files changed, 22 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-27-13-34-28.gh-issue-136028.RY727g.rst diff --git a/Lib/_strptime.py b/Lib/_strptime.py index 7ac6f36360cb69..cdc55e8daaffa6 100644 --- a/Lib/_strptime.py +++ b/Lib/_strptime.py @@ -42,6 +42,14 @@ def _findall(haystack, needle): yield i i += len(needle) +def _fixmonths(months): + yield from months + # The lower case of 'İ' ('\u0130') is 'i\u0307'. + # The re module only supports 1-to-1 character matching in + # case-insensitive mode. + for s in months: + if 'i\u0307' in s: + yield s.replace('i\u0307', '\u0130') lzh_TW_alt_digits = ( # 〇:一:二:三:四:五:六:七:八:九 @@ -366,8 +374,8 @@ def __init__(self, locale_time=None): 'z': r"(?P([+-]\d\d:?[0-5]\d(:?[0-5]\d(\.\d{1,6})?)?)|(?-i:Z))?", 'A': self.__seqToRE(self.locale_time.f_weekday, 'A'), 'a': self.__seqToRE(self.locale_time.a_weekday, 'a'), - 'B': self.__seqToRE(self.locale_time.f_month[1:], 'B'), - 'b': self.__seqToRE(self.locale_time.a_month[1:], 'b'), + 'B': self.__seqToRE(_fixmonths(self.locale_time.f_month[1:]), 'B'), + 'b': self.__seqToRE(_fixmonths(self.locale_time.a_month[1:]), 'b'), 'p': self.__seqToRE(self.locale_time.am_pm, 'p'), 'Z': self.__seqToRE((tz for tz_names in self.locale_time.timezone for tz in tz_names), diff --git a/Lib/test/test_strptime.py b/Lib/test/test_strptime.py index e52c46f8c58cce..0241e543cd7dde 100644 --- a/Lib/test/test_strptime.py +++ b/Lib/test/test_strptime.py @@ -337,6 +337,15 @@ def test_month_locale(self): self.roundtrip('%B', 1, (1900, m, 1, 0, 0, 0, 0, 1, 0)) self.roundtrip('%b', 1, (1900, m, 1, 0, 0, 0, 0, 1, 0)) + @run_with_locales('LC_TIME', 'az_AZ', 'ber_DZ', 'ber_MA', 'crh_UA') + def test_month_locale2(self): + # Test for month directives + # Month name contains 'İ' ('\u0130') + self.roundtrip('%B', 1, (2025, 6, 1, 0, 0, 0, 6, 152, 0)) + self.roundtrip('%b', 1, (2025, 6, 1, 0, 0, 0, 6, 152, 0)) + self.roundtrip('%B', 1, (2025, 7, 1, 0, 0, 0, 1, 182, 0)) + self.roundtrip('%b', 1, (2025, 7, 1, 0, 0, 0, 1, 182, 0)) + def test_day(self): # Test for day directives self.roundtrip('%d %Y', 2) diff --git a/Misc/NEWS.d/next/Library/2025-06-27-13-34-28.gh-issue-136028.RY727g.rst b/Misc/NEWS.d/next/Library/2025-06-27-13-34-28.gh-issue-136028.RY727g.rst new file mode 100644 index 00000000000000..9859df7cf6a69c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-27-13-34-28.gh-issue-136028.RY727g.rst @@ -0,0 +1,3 @@ +Fix parsing month names containing "İ" (U+0130, LATIN CAPITAL LETTER I WITH +DOT ABOVE) in :func:`time.strptime`. This affects locales az_AZ, ber_DZ, +ber_MA and crh_UA. From 22f7aedf3ce863a0398a1dc973d040d397dbc3b9 Mon Sep 17 00:00:00 2001 From: Will Childs-Klein Date: Fri, 27 Jun 2025 11:01:16 -0400 Subject: [PATCH 070/212] gh-135571: Guard `_hashlib` usage in `test_hashlib.py` (#135572) --- Lib/test/test_hashlib.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index 7b378c45e71563..5bad483ae9dafc 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -279,7 +279,10 @@ def test_clinic_signature(self): with self.assertWarnsRegex(DeprecationWarning, DEPRECATED_STRING_PARAMETER): hashlib.new(digest_name, string=b'') - if self._hashlib: + # Make sure that _hashlib contains the constructor + # to test when using a combination of libcrypto and + # interned hash implementations. + if self._hashlib and digest_name in self._hashlib._constructors: self._hashlib.new(digest_name, b'') self._hashlib.new(digest_name, data=b'') with self.assertWarnsRegex(DeprecationWarning, @@ -333,7 +336,8 @@ def test_clinic_signature_errors(self): with self.subTest(digest_name, args=args, kwds=kwds): with self.assertRaisesRegex(TypeError, errmsg): hashlib.new(digest_name, *args, **kwds) - if self._hashlib: + if (self._hashlib and + digest_name in self._hashlib._constructors): with self.assertRaisesRegex(TypeError, errmsg): self._hashlib.new(digest_name, *args, **kwds) From 9597a3cd9d6119a6b014cb064ec1e15c4ae38469 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 27 Jun 2025 17:12:21 +0200 Subject: [PATCH 071/212] gh-135755: rename undocumented `HACL_CAN_COMPILE_SIMD{128,256}` macros (#135847) Rename undocumented `HACL_CAN_COMPILE_SIMD{128,256}` macros to `_Py_HACL_CAN_COMPILE_VEC{128,256}`. These macros are private. --- Modules/blake2module.c | 66 +++++++++++++++++++------------------- Modules/hmacmodule.c | 21 ++++++------ PCbuild/pythoncore.vcxproj | 8 +++-- configure | 4 +-- configure.ac | 6 ++-- pyconfig.h.in | 12 +++---- 6 files changed, 62 insertions(+), 55 deletions(-) diff --git a/Modules/blake2module.c b/Modules/blake2module.c index 9e279e11b518d2..163f238a4268d0 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -43,25 +43,25 @@ // SIMD256 can't be compiled on macOS ARM64, and performance of SIMD128 isn't // great; but when compiling a universal2 binary, autoconf will set -// HACL_CAN_COMPILE_SIMD128 and HACL_CAN_COMPILE_SIMD256 because they *can* be -// compiled on x86_64. If we're on macOS ARM64, disable these preprocessor -// symbols. +// _Py_HACL_CAN_COMPILE_VEC{128,256} because they *can* be compiled on x86_64. +// If we're on macOS ARM64, we however disable these preprocessor symbols. #if defined(__APPLE__) && defined(__arm64__) -# undef HACL_CAN_COMPILE_SIMD128 -# undef HACL_CAN_COMPILE_SIMD256 +# undef _Py_HACL_CAN_COMPILE_VEC128 +# undef _Py_HACL_CAN_COMPILE_VEC256 #endif -// Small mismatch between the variable names Python defines as part of configure -// at the ones HACL* expects to be set in order to enable those headers. -#define HACL_CAN_COMPILE_VEC128 HACL_CAN_COMPILE_SIMD128 -#define HACL_CAN_COMPILE_VEC256 HACL_CAN_COMPILE_SIMD256 +// HACL* expects HACL_CAN_COMPILE_VEC* macros to be set in order to enable +// the corresponding SIMD instructions so we need to "forward" the values +// we just deduced above. +#define HACL_CAN_COMPILE_VEC128 _Py_HACL_CAN_COMPILE_VEC128 +#define HACL_CAN_COMPILE_VEC256 _Py_HACL_CAN_COMPILE_VEC256 #include "_hacl/Hacl_Hash_Blake2s.h" #include "_hacl/Hacl_Hash_Blake2b.h" -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 #include "_hacl/Hacl_Hash_Blake2s_Simd128.h" #endif -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 #include "_hacl/Hacl_Hash_Blake2b_Simd256.h" #endif @@ -88,7 +88,7 @@ blake2_get_state(PyObject *module) return (Blake2State *)state; } -#if defined(HACL_CAN_COMPILE_SIMD128) || defined(HACL_CAN_COMPILE_SIMD256) +#if defined(_Py_HACL_CAN_COMPILE_VEC128) || defined(_Py_HACL_CAN_COMPILE_VEC256) static inline Blake2State * blake2_get_state_from_type(PyTypeObject *module) { @@ -181,7 +181,7 @@ blake2module_init_cpu_features(Blake2State *state) #undef ECX_SSE3 #undef EBX_AVX2 -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection state->can_run_simd128 = sse && sse2 && sse3 && sse41 && sse42 && cmov; #else @@ -191,7 +191,7 @@ blake2module_init_cpu_features(Blake2State *state) state->can_run_simd128 = false; #endif -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection state->can_run_simd256 = state->can_run_simd128 && avx && avx2; #else @@ -332,18 +332,18 @@ is_blake2s(blake2_impl impl) static inline blake2_impl type_to_impl(PyTypeObject *type) { -#if defined(HACL_CAN_COMPILE_SIMD128) || defined(HACL_CAN_COMPILE_SIMD256) +#if defined(_Py_HACL_CAN_COMPILE_VEC128) || defined(_Py_HACL_CAN_COMPILE_VEC256) Blake2State *st = blake2_get_state_from_type(type); #endif if (!strcmp(type->tp_name, blake2b_type_spec.name)) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 return st->can_run_simd256 ? Blake2b_256 : Blake2b; #else return Blake2b; #endif } else if (!strcmp(type->tp_name, blake2s_type_spec.name)) { -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 return st->can_run_simd128 ? Blake2s_128 : Blake2s; #else return Blake2s; @@ -357,10 +357,10 @@ typedef struct { union { Hacl_Hash_Blake2s_state_t *blake2s_state; Hacl_Hash_Blake2b_state_t *blake2b_state; -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 Hacl_Hash_Blake2s_Simd128_state_t *blake2s_128_state; #endif -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 Hacl_Hash_Blake2b_Simd256_state_t *blake2b_256_state; #endif }; @@ -429,13 +429,13 @@ blake2_update_unlocked(Blake2Object *self, uint8_t *buf, Py_ssize_t len) switch (self->impl) { // blake2b_256_state and blake2s_128_state must be if'd since // otherwise this results in an unresolved symbol at link-time. -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: HACL_UPDATE(Hacl_Hash_Blake2b_Simd256_update, self->blake2b_256_state, buf, len); return; #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: HACL_UPDATE(Hacl_Hash_Blake2s_Simd128_update, self->blake2s_128_state, buf, len); @@ -555,12 +555,12 @@ py_blake2_new(PyTypeObject *type, PyObject *data, int digest_size, // Ensure that the states are NULL-initialized in case of an error. // See: py_blake2_clear() for more details. switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: self->blake2b_256_state = NULL; break; #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: self->blake2s_128_state = NULL; break; @@ -623,12 +623,12 @@ py_blake2_new(PyTypeObject *type, PyObject *data, int digest_size, } while (0) switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: BLAKE2_MALLOC(Blake2b_Simd256, self->blake2b_256_state); break; #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: BLAKE2_MALLOC(Blake2s_Simd128, self->blake2s_128_state); break; @@ -756,12 +756,12 @@ blake2_blake2b_copy_unlocked(Blake2Object *self, Blake2Object *cpy) } while (0) switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: BLAKE2_COPY(Blake2b_Simd256, blake2b_256_state); break; #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: BLAKE2_COPY(Blake2s_Simd128, blake2s_128_state); break; @@ -840,12 +840,12 @@ static uint8_t blake2_blake2b_compute_digest(Blake2Object *self, uint8_t *digest) { switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: return Hacl_Hash_Blake2b_Simd256_digest( self->blake2b_256_state, digest); #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: return Hacl_Hash_Blake2s_Simd128_digest( self->blake2s_128_state, digest); @@ -923,11 +923,11 @@ static Hacl_Hash_Blake2b_index hacl_get_blake2_info(Blake2Object *self) { switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: return Hacl_Hash_Blake2b_Simd256_info(self->blake2b_256_state); #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: return Hacl_Hash_Blake2s_Simd128_info(self->blake2s_128_state); #endif @@ -975,12 +975,12 @@ py_blake2_clear(PyObject *op) } while (0) switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: BLAKE2_FREE(Blake2b_Simd256, self->blake2b_256_state); break; #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: BLAKE2_FREE(Blake2s_Simd128, self->blake2s_128_state); break; diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index e7a5ccbb19b45c..95e400231bb65c 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -31,14 +31,15 @@ #endif #if defined(__APPLE__) && defined(__arm64__) -# undef HACL_CAN_COMPILE_SIMD128 -# undef HACL_CAN_COMPILE_SIMD256 +# undef _Py_HACL_CAN_COMPILE_VEC128 +# undef _Py_HACL_CAN_COMPILE_VEC256 #endif -// Small mismatch between the variable names Python defines as part of configure -// at the ones HACL* expects to be set in order to enable those headers. -#define HACL_CAN_COMPILE_VEC128 HACL_CAN_COMPILE_SIMD128 -#define HACL_CAN_COMPILE_VEC256 HACL_CAN_COMPILE_SIMD256 +// HACL* expects HACL_CAN_COMPILE_VEC* macros to be set in order to enable +// the corresponding SIMD instructions so we need to "forward" the values +// we just deduced above. +#define HACL_CAN_COMPILE_VEC128 _Py_HACL_CAN_COMPILE_VEC128 +#define HACL_CAN_COMPILE_VEC256 _Py_HACL_CAN_COMPILE_VEC256 #include "_hacl/Hacl_HMAC.h" #include "_hacl/Hacl_Streaming_HMAC.h" // Hacl_Agile_Hash_* identifiers @@ -361,7 +362,7 @@ narrow_hmac_hash_kind(hmacmodule_state *state, HMAC_Hash_Kind kind) { switch (kind) { case Py_hmac_kind_hmac_blake2s_32: { -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 if (state->can_run_simd128) { return Py_hmac_kind_hmac_vectorized_blake2s_32; } @@ -369,7 +370,7 @@ narrow_hmac_hash_kind(hmacmodule_state *state, HMAC_Hash_Kind kind) return kind; } case Py_hmac_kind_hmac_blake2b_32: { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 if (state->can_run_simd256) { return Py_hmac_kind_hmac_vectorized_blake2b_32; } @@ -1601,7 +1602,7 @@ hmacmodule_init_cpu_features(hmacmodule_state *state) #undef ECX_SSE3 #undef EBX_AVX2 -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection state->can_run_simd128 = sse && sse2 && sse3 && sse41 && sse42 && cmov; #else @@ -1611,7 +1612,7 @@ hmacmodule_init_cpu_features(hmacmodule_state *state) state->can_run_simd128 = false; #endif -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection state->can_run_simd256 = state->can_run_simd128 && avx && avx2; #else diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 32a8f2dbad3d5e..b911c9385634d7 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -419,8 +419,12 @@ - HACL_CAN_COMPILE_SIMD128;%(PreprocessorDefinitions) - HACL_CAN_COMPILE_SIMD256;%(PreprocessorDefinitions) + + _Py_HACL_CAN_COMPILE_VEC128;%(PreprocessorDefinitions) + + + _Py_HACL_CAN_COMPILE_VEC256;%(PreprocessorDefinitions) + diff --git a/configure b/configure index 43b36d9231e341..75ae1699a8e451 100755 --- a/configure +++ b/configure @@ -32633,7 +32633,7 @@ then : LIBHACL_SIMD128_FLAGS="-msse -msse2 -msse3 -msse4.1 -msse4.2" -printf "%s\n" "#define HACL_CAN_COMPILE_SIMD128 1" >>confdefs.h +printf "%s\n" "#define _Py_HACL_CAN_COMPILE_VEC128 1" >>confdefs.h # macOS universal2 builds *support* the -msse etc flags because they're @@ -32709,7 +32709,7 @@ then : LIBHACL_SIMD256_FLAGS="-mavx2" -printf "%s\n" "#define HACL_CAN_COMPILE_SIMD256 1" >>confdefs.h +printf "%s\n" "#define _Py_HACL_CAN_COMPILE_VEC256 1" >>confdefs.h # macOS universal2 builds *support* the -mavx2 compiler flag because it's diff --git a/configure.ac b/configure.ac index e77696e3a4e025..4da1ba78b54b0d 100644 --- a/configure.ac +++ b/configure.ac @@ -8026,7 +8026,8 @@ then AX_CHECK_COMPILE_FLAG([-msse -msse2 -msse3 -msse4.1 -msse4.2],[ [LIBHACL_SIMD128_FLAGS="-msse -msse2 -msse3 -msse4.1 -msse4.2"] - AC_DEFINE([HACL_CAN_COMPILE_SIMD128], [1], [HACL* library can compile SIMD128 implementations]) + AC_DEFINE([_Py_HACL_CAN_COMPILE_VEC128], [1], [ + HACL* library can compile SIMD128 implementations]) # macOS universal2 builds *support* the -msse etc flags because they're # available on x86_64. However, performance of the HACL SIMD128 implementation @@ -8057,7 +8058,8 @@ if test "$ac_sys_system" != "Linux-android" -a "$ac_sys_system" != "WASI" || \ then AX_CHECK_COMPILE_FLAG([-mavx2],[ [LIBHACL_SIMD256_FLAGS="-mavx2"] - AC_DEFINE([HACL_CAN_COMPILE_SIMD256], [1], [HACL* library can compile SIMD256 implementations]) + AC_DEFINE([_Py_HACL_CAN_COMPILE_VEC256], [1], [ + HACL* library can compile SIMD256 implementations]) # macOS universal2 builds *support* the -mavx2 compiler flag because it's # available on x86_64; but the HACL SIMD256 build then fails because the diff --git a/pyconfig.h.in b/pyconfig.h.in index d4f1da7fb10776..d7c496fccc682c 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -50,12 +50,6 @@ /* Define if getpgrp() must be called as getpgrp(0). */ #undef GETPGRP_HAVE_ARG -/* HACL* library can compile SIMD128 implementations */ -#undef HACL_CAN_COMPILE_SIMD128 - -/* HACL* library can compile SIMD256 implementations */ -#undef HACL_CAN_COMPILE_SIMD256 - /* Define if you have the 'accept' function. */ #undef HAVE_ACCEPT @@ -2026,6 +2020,12 @@ /* Defined if _Complex C type can be used with libffi. */ #undef _Py_FFI_SUPPORT_C_COMPLEX +/* HACL* library can compile SIMD128 implementations */ +#undef _Py_HACL_CAN_COMPILE_VEC128 + +/* HACL* library can compile SIMD256 implementations */ +#undef _Py_HACL_CAN_COMPILE_VEC256 + /* Define to force use of thread-safe errno, h_errno, and other functions */ #undef _REENTRANT From dd67d75367aa413cead18a9a69cbd4b1c9ba16f4 Mon Sep 17 00:00:00 2001 From: Lee Dogeon Date: Sat, 28 Jun 2025 00:15:11 +0900 Subject: [PATCH 072/212] gh-108765: fix comment about macro definitions in `_stat.c` post GH-108854 (#136027) --- Modules/_stat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Modules/_stat.c b/Modules/_stat.c index f11ca7d23b440d..1dabf2f6d5b07f 100644 --- a/Modules/_stat.c +++ b/Modules/_stat.c @@ -57,7 +57,7 @@ typedef unsigned short mode_t; * Only the names are defined by POSIX but not their value. All common file * types seems to have the same numeric value on all platforms, though. * - * pyport.h guarantees S_IFMT, S_IFDIR, S_IFCHR, S_IFREG and S_IFLNK + * fileutils.h guarantees S_IFMT, S_IFDIR, S_IFCHR, S_IFREG and S_IFLNK */ #ifndef S_IFBLK @@ -86,7 +86,7 @@ typedef unsigned short mode_t; /* S_ISXXX() - * pyport.h defines S_ISDIR(), S_ISREG() and S_ISCHR() + * fileutils.h defines S_ISDIR(), S_ISREG() and S_ISCHR() */ #ifndef S_ISBLK From d048a8ce348ee9f71316e2487cf5bb936753dba5 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Fri, 27 Jun 2025 08:20:51 -0700 Subject: [PATCH 073/212] GH-135904: Optimize the JIT's assembly control flow (GH-135905) --- ...-06-24-16-46-34.gh-issue-135904.78xfon.rst | 2 + Tools/jit/_optimizers.py | 319 ++++++++++++++++++ Tools/jit/_stencils.py | 67 +--- Tools/jit/_targets.py | 58 ++-- 4 files changed, 352 insertions(+), 94 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-24-16-46-34.gh-issue-135904.78xfon.rst create mode 100644 Tools/jit/_optimizers.py diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-24-16-46-34.gh-issue-135904.78xfon.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-24-16-46-34.gh-issue-135904.78xfon.rst new file mode 100644 index 00000000000000..ecbd8fda9a5e9d --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-24-16-46-34.gh-issue-135904.78xfon.rst @@ -0,0 +1,2 @@ +Perform more aggressive control-flow optimizations on the machine code +templates emitted by the experimental JIT compiler. diff --git a/Tools/jit/_optimizers.py b/Tools/jit/_optimizers.py new file mode 100644 index 00000000000000..1077e4106fdfbd --- /dev/null +++ b/Tools/jit/_optimizers.py @@ -0,0 +1,319 @@ +"""Low-level optimization of textual assembly.""" + +import dataclasses +import pathlib +import re +import typing + +# Same as saying "not string.startswith('')": +_RE_NEVER_MATCH = re.compile(r"(?!)") +# Dictionary mapping branch instructions to their inverted branch instructions. +# If a branch cannot be inverted, the value is None: +_X86_BRANCHES = { + # https://www.felixcloutier.com/x86/jcc + "ja": "jna", + "jae": "jnae", + "jb": "jnb", + "jbe": "jnbe", + "jc": "jnc", + "jcxz": None, + "je": "jne", + "jecxz": None, + "jg": "jng", + "jge": "jnge", + "jl": "jnl", + "jle": "jnle", + "jo": "jno", + "jp": "jnp", + "jpe": "jpo", + "jrcxz": None, + "js": "jns", + "jz": "jnz", + # https://www.felixcloutier.com/x86/loop:loopcc + "loop": None, + "loope": None, + "loopne": None, + "loopnz": None, + "loopz": None, +} +# Update with all of the inverted branches, too: +_X86_BRANCHES |= {v: k for k, v in _X86_BRANCHES.items() if v} + + +@dataclasses.dataclass +class _Block: + label: str | None = None + # Non-instruction lines like labels, directives, and comments: + noninstructions: list[str] = dataclasses.field(default_factory=list) + # Instruction lines: + instructions: list[str] = dataclasses.field(default_factory=list) + # If this block ends in a jump, where to? + target: typing.Self | None = None + # The next block in the linked list: + link: typing.Self | None = None + # Whether control flow can fall through to the linked block above: + fallthrough: bool = True + # Whether this block can eventually reach the next uop (_JIT_CONTINUE): + hot: bool = False + + def resolve(self) -> typing.Self: + """Find the first non-empty block reachable from this one.""" + block = self + while block.link and not block.instructions: + block = block.link + return block + + +@dataclasses.dataclass +class Optimizer: + """Several passes of analysis and optimization for textual assembly.""" + + path: pathlib.Path + _: dataclasses.KW_ONLY + # prefix used to mangle symbols on some platforms: + prefix: str = "" + # The first block in the linked list: + _root: _Block = dataclasses.field(init=False, default_factory=_Block) + _labels: dict[str, _Block] = dataclasses.field(init=False, default_factory=dict) + # No groups: + _re_noninstructions: typing.ClassVar[re.Pattern[str]] = re.compile( + r"\s*(?:\.|#|//|$)" + ) + # One group (label): + _re_label: typing.ClassVar[re.Pattern[str]] = re.compile( + r'\s*(?P