From df1ed8439bb14435ee944b2a35001e29436abb48 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Thu, 24 Jul 2025 16:45:15 +0300 Subject: [PATCH 1/3] [mypyc] Make type objects immortal if using free threading If they are not immortal, concurrent construction objects by multiple threads can cause serious contention due to reference count updates. This is similar how user-defined normal Python classes in free-threaded builds are immmortal. This speeds up a micro-benchmark that constructs instances of a native class in multiple threads by a big factor (5x+). --- mypyc/irbuild/classdef.py | 8 +++++++- mypyc/lib-rt/CPy.h | 4 ++++ mypyc/lib-rt/misc_ops.c | 12 +++++++++++- mypyc/lib-rt/mypyc_util.h | 3 ++- mypyc/primitives/misc_ops.py | 15 +++++++++++++++ 5 files changed, 39 insertions(+), 3 deletions(-) diff --git a/mypyc/irbuild/classdef.py b/mypyc/irbuild/classdef.py index 6b59750c7dec..3c7b11a302f6 100644 --- a/mypyc/irbuild/classdef.py +++ b/mypyc/irbuild/classdef.py @@ -4,6 +4,7 @@ from abc import abstractmethod from typing import Callable, Final +import sys from mypy.nodes import ( EXCLUDED_ENUM_ATTRIBUTES, @@ -28,7 +29,7 @@ is_class_var, ) from mypy.types import Instance, UnboundType, get_proper_type -from mypyc.common import PROPSET_PREFIX +from mypyc.common import PROPSET_PREFIX, IS_FREE_THREADED from mypyc.ir.class_ir import ClassIR, NonExtClassInfo from mypyc.ir.func_ir import FuncDecl, FuncSignature from mypyc.ir.ops import ( @@ -81,6 +82,7 @@ py_calc_meta_op, pytype_from_template_op, type_object_op, + set_immortal_op, ) from mypyc.subtype import is_subtype @@ -449,6 +451,10 @@ def allocate_class(builder: IRBuilder, cdef: ClassDef) -> Value: ) # Create the class tp = builder.call_c(pytype_from_template_op, [template, tp_bases, modname], cdef.line) + if IS_FREE_THREADED and sys.version_info >= (3, 14): + # Set type object to be immortal, as otherwise reference count contention + # can cause a massive performance hit in the worst case. + builder.call_c(set_immortal_op, [tp], cdef.line) # Immediately fix up the trait vtables, before doing anything with the class. ir = builder.mapper.type_to_ir[cdef.info] if not ir.is_trait and not ir.builtin_base: diff --git a/mypyc/lib-rt/CPy.h b/mypyc/lib-rt/CPy.h index e7a7f9a07626..1881aa97f308 100644 --- a/mypyc/lib-rt/CPy.h +++ b/mypyc/lib-rt/CPy.h @@ -931,6 +931,10 @@ PyObject *CPy_GetANext(PyObject *aiter); void CPy_SetTypeAliasTypeComputeFunction(PyObject *alias, PyObject *compute_value); void CPyTrace_LogEvent(const char *location, const char *line, const char *op, const char *details); +#if CPY_3_14_FEATURES +void CPy_SetImmortal(PyObject *obj); +#endif + #ifdef __cplusplus } #endif diff --git a/mypyc/lib-rt/misc_ops.c b/mypyc/lib-rt/misc_ops.c index 8aa25cc11e02..3787ea553037 100644 --- a/mypyc/lib-rt/misc_ops.c +++ b/mypyc/lib-rt/misc_ops.c @@ -1058,7 +1058,7 @@ void CPyTrace_LogEvent(const char *location, const char *line, const char *op, c #endif -#ifdef CPY_3_12_FEATURES +#if CPY_3_12_FEATURES // Copied from Python 3.12.3, since this struct is internal to CPython. It defines // the structure of typing.TypeAliasType objects. We need it since compute_value is @@ -1088,3 +1088,13 @@ void CPy_SetTypeAliasTypeComputeFunction(PyObject *alias, PyObject *compute_valu } #endif + +#if CPY_3_14_FEATURES + +#include "internal/pycore_object.h" + +void CPy_SetImmortal(PyObject *obj) { + _Py_SetImmortal(obj); +} + +#endif diff --git a/mypyc/lib-rt/mypyc_util.h b/mypyc/lib-rt/mypyc_util.h index 3d4eba3a3cdb..f200d4f90def 100644 --- a/mypyc/lib-rt/mypyc_util.h +++ b/mypyc/lib-rt/mypyc_util.h @@ -139,8 +139,9 @@ static inline CPyTagged CPyTagged_ShortFromSsize_t(Py_ssize_t x) { return x << 1; } -// Are we targeting Python 3.12 or newer? +// Are we targeting Python 3.X or newer? #define CPY_3_12_FEATURES (PY_VERSION_HEX >= 0x030c0000) +#define CPY_3_14_FEATURES (PY_VERSION_HEX >= 0x030e0000) #if CPY_3_12_FEATURES diff --git a/mypyc/primitives/misc_ops.py b/mypyc/primitives/misc_ops.py index e2a1aea1a8d6..e3d59f53ed76 100644 --- a/mypyc/primitives/misc_ops.py +++ b/mypyc/primitives/misc_ops.py @@ -311,3 +311,18 @@ return_type=void_rtype, error_kind=ERR_NEVER, ) + +# Mark object as immortal -- it won't be freed via reference counting, as +# the reference count won't be updated any longer. Immortal objects support +# fast concurrent read-only access from multiple threads when using free +# threading, since this eliminates contention from concurrent reference count +# updates. +# +# Needs at least Python 3.14. +set_immortal_op = custom_primitive_op( + name="set_immmortal", + c_function_name="CPy_SetImmortal", + arg_types=[object_rprimitive], + return_type=void_rtype, + error_kind=ERR_NEVER, +) From 18f1d04c94827403ffb87cca3c70ea4b33d60cfc Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Wed, 30 Jul 2025 11:37:12 +0300 Subject: [PATCH 2/3] Also support non-native classes --- mypyc/irbuild/builder.py | 8 ++++++++ mypyc/irbuild/classdef.py | 16 +++++++++------- mypyc/irbuild/ll_builder.py | 8 ++++++++ 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/mypyc/irbuild/builder.py b/mypyc/irbuild/builder.py index 7e63d482c786..ec3c1b1b1f3c 100644 --- a/mypyc/irbuild/builder.py +++ b/mypyc/irbuild/builder.py @@ -424,6 +424,10 @@ def new_tuple(self, items: list[Value], line: int) -> Value: def debug_print(self, toprint: str | Value) -> None: return self.builder.debug_print(toprint) + def set_immortal_if_free_threaded(self, v: Value, line: int) -> None: + """Make an object immortal on free-threaded builds (to avoid contention).""" + self.builder.set_immortal_if_free_threaded(v, line) + # Helpers for IR building def add_to_non_ext_dict( @@ -433,6 +437,10 @@ def add_to_non_ext_dict( key_unicode = self.load_str(key) self.primitive_op(dict_set_item_op, [non_ext.dict, key_unicode, val], line) + # It's important that accessing class dictionary items from multiple threads + # doesn't cause contention. + self.builder.set_immortal_if_free_threaded(val, line) + def gen_import(self, id: str, line: int) -> None: self.imports[id] = None diff --git a/mypyc/irbuild/classdef.py b/mypyc/irbuild/classdef.py index 3c7b11a302f6..3282e836ac9e 100644 --- a/mypyc/irbuild/classdef.py +++ b/mypyc/irbuild/classdef.py @@ -4,7 +4,6 @@ from abc import abstractmethod from typing import Callable, Final -import sys from mypy.nodes import ( EXCLUDED_ENUM_ATTRIBUTES, @@ -29,7 +28,7 @@ is_class_var, ) from mypy.types import Instance, UnboundType, get_proper_type -from mypyc.common import PROPSET_PREFIX, IS_FREE_THREADED +from mypyc.common import PROPSET_PREFIX from mypyc.ir.class_ir import ClassIR, NonExtClassInfo from mypyc.ir.func_ir import FuncDecl, FuncSignature from mypyc.ir.ops import ( @@ -82,7 +81,6 @@ py_calc_meta_op, pytype_from_template_op, type_object_op, - set_immortal_op, ) from mypyc.subtype import is_subtype @@ -264,6 +262,9 @@ def finalize(self, ir: ClassIR) -> None: non_ext_class = load_non_ext_class(self.builder, ir, self.non_ext, self.cdef.line) non_ext_class = load_decorated_class(self.builder, self.cdef, non_ext_class) + # Try to avoid contention when using free threading. + self.builder.set_immortal_if_free_threaded(non_ext_class, self.cdef.line) + # Save the decorated class self.builder.add( InitStatic(non_ext_class, self.cdef.name, self.builder.module_name, NAMESPACE_TYPE) @@ -451,10 +452,11 @@ def allocate_class(builder: IRBuilder, cdef: ClassDef) -> Value: ) # Create the class tp = builder.call_c(pytype_from_template_op, [template, tp_bases, modname], cdef.line) - if IS_FREE_THREADED and sys.version_info >= (3, 14): - # Set type object to be immortal, as otherwise reference count contention - # can cause a massive performance hit in the worst case. - builder.call_c(set_immortal_op, [tp], cdef.line) + + # Set type object to be immortal if free threaded, as otherwise reference count contention + # can cause a big performance hit. + builder.set_immortal_if_free_threaded(tp, cdef.line) + # Immediately fix up the trait vtables, before doing anything with the class. ir = builder.mapper.type_to_ir[cdef.info] if not ir.is_trait and not ir.builtin_base: diff --git a/mypyc/irbuild/ll_builder.py b/mypyc/irbuild/ll_builder.py index 79ad4cc62822..386f449e8c8a 100644 --- a/mypyc/irbuild/ll_builder.py +++ b/mypyc/irbuild/ll_builder.py @@ -6,6 +6,7 @@ from __future__ import annotations +import sys from collections.abc import Sequence from typing import Callable, Final, Optional @@ -16,6 +17,7 @@ from mypyc.common import ( BITMAP_BITS, FAST_ISINSTANCE_MAX_SUBCLASSES, + IS_FREE_THREADED, MAX_LITERAL_SHORT_INT, MAX_SHORT_INT, MIN_LITERAL_SHORT_INT, @@ -164,6 +166,7 @@ fast_isinstance_op, none_object_op, not_implemented_op, + set_immortal_op, var_object_size, ) from mypyc.primitives.registry import ( @@ -2322,6 +2325,11 @@ def new_tuple_with_length(self, length: Value, line: int) -> Value: def int_to_float(self, n: Value, line: int) -> Value: return self.primitive_op(int_to_float_op, [n], line) + def set_immortal_if_free_threaded(self, v: Value, line: int) -> None: + """Make an object immortal on free-threaded builds (to avoid contention).""" + if IS_FREE_THREADED and sys.version_info >= (3, 14): + self.call_c(set_immortal_op, [v], line) + # Internal helpers def decompose_union_helper( From ad9a3d7d50002bca1167dd0221ebd1735e2f7495 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Wed, 30 Jul 2025 14:41:26 +0300 Subject: [PATCH 3/3] Fix type check error --- mypyc/irbuild/ll_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypyc/irbuild/ll_builder.py b/mypyc/irbuild/ll_builder.py index 386f449e8c8a..a5e28268efed 100644 --- a/mypyc/irbuild/ll_builder.py +++ b/mypyc/irbuild/ll_builder.py @@ -2328,7 +2328,7 @@ def int_to_float(self, n: Value, line: int) -> Value: def set_immortal_if_free_threaded(self, v: Value, line: int) -> None: """Make an object immortal on free-threaded builds (to avoid contention).""" if IS_FREE_THREADED and sys.version_info >= (3, 14): - self.call_c(set_immortal_op, [v], line) + self.primitive_op(set_immortal_op, [v], line) # Internal helpers