From 4d2a9d760f89d660378ef28c41751d4ecb683b33 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Tue, 3 Jun 2025 14:06:16 +0100 Subject: [PATCH 01/15] WIP cache freed generator object instance to speed up allocation --- mypyc/codegen/emit.py | 25 +++++++++++++++++++++++ mypyc/codegen/emitclass.py | 40 ++++++++++++++++++++++++++++++++++++- mypyc/codegen/emitmodule.py | 4 +++- mypyc/ir/class_ir.py | 5 +++++ mypyc/irbuild/generator.py | 1 + 5 files changed, 73 insertions(+), 2 deletions(-) diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index d7d7d9c7abda..ba8b8307e1fd 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -1115,6 +1115,31 @@ def emit_gc_clear(self, target: str, rtype: RType) -> None: else: assert False, "emit_gc_clear() not implemented for %s" % repr(rtype) + def emit_reuse_clear(self, target: str, rtype: RType) -> None: + """Emit attribute clear before object is added into freelist. + + Assume that 'target' represents a C expression that refers to a + struct member, such as 'self->x'. + + Unlike emit_gc_clear(), initialize attribute value to match a freshly + allocated object. + """ + if isinstance(rtype, RTuple): + for i, item_type in enumerate(rtype.types): + self.emit_reuse_clear(f"{target}.f{i}", item_type) + elif not rtype.is_refcounted: + self.emit_line(f"{target} = {rtype.c_undefined};") + elif isinstance(rtype, RPrimitive) and rtype.name == "builtins.int": + self.emit_line(f"if (CPyTagged_CheckLong({target})) {{") + self.emit_line(f"CPyTagged __tmp = {target};") + self.emit_line(f"{target} = {self.c_undefined_value(rtype)};") + self.emit_line("Py_XDECREF(CPyTagged_LongAsObject(__tmp));") + self.emit_line("} else {") + self.emit_line(f"{target} = {self.c_undefined_value(rtype)};") + self.emit_line("}") + else: + self.emit_gc_clear(target, rtype) + def emit_traceback( self, source_path: str, module_name: str, traceback_entry: tuple[str, int] ) -> None: diff --git a/mypyc/codegen/emitclass.py b/mypyc/codegen/emitclass.py index da3d14f9dafe..b5c77b174e51 100644 --- a/mypyc/codegen/emitclass.py +++ b/mypyc/codegen/emitclass.py @@ -186,6 +186,16 @@ def generate_class_type_decl( ) +def generate_class_reuse( + cl: ClassIR, c_emitter: Emitter, external_emitter: Emitter, emitter: Emitter +) -> None: + assert cl.reuse_freed_instance + context = c_emitter.context + name = cl.name_prefix(c_emitter.names) + "_free_instance" + struct_name = cl.struct_name(c_emitter.names) + context.declarations[name] = HeaderDeclaration(f"{struct_name} *{name};", needs_export=True) + + def generate_class(cl: ClassIR, module: str, emitter: Emitter) -> None: """Generate C code for a class. @@ -557,7 +567,17 @@ def generate_setup_for_class( emitter.emit_line("static PyObject *") emitter.emit_line(f"{func_name}(PyTypeObject *type)") emitter.emit_line("{") - emitter.emit_line(f"{cl.struct_name(emitter.names)} *self;") + struct_name = cl.struct_name(emitter.names) + emitter.emit_line(f"{struct_name} *self;") + + prefix = cl.name_prefix(emitter.names) + if cl.reuse_freed_instance: + emitter.emit_line(f"if ({prefix}_free_instance != NULL) {{") + emitter.emit_line(f"self = {prefix}_free_instance;") + emitter.emit_line(f"{prefix}_free_instance = NULL;") + emitter.emit_line("return (PyObject *)self;") + emitter.emit_line("}") + emitter.emit_line(f"self = ({cl.struct_name(emitter.names)} *)type->tp_alloc(type, 0);") emitter.emit_line("if (self == NULL)") emitter.emit_line(" return NULL;") @@ -786,6 +806,8 @@ def generate_dealloc_for_class( emitter.emit_line("if (!PyObject_GC_IsFinalized((PyObject *)self)) {") emitter.emit_line("Py_TYPE(self)->tp_finalize((PyObject *)self);") emitter.emit_line("}") + if cl.reuse_freed_instance: + emit_reuse_dealloc(cl, emitter) emitter.emit_line("PyObject_GC_UnTrack(self);") # The trashcan is needed to handle deep recursive deallocations emitter.emit_line(f"CPy_TRASHCAN_BEGIN(self, {dealloc_func_name})") @@ -795,6 +817,22 @@ def generate_dealloc_for_class( emitter.emit_line("}") +def emit_reuse_dealloc(cl: ClassIR, emitter: Emitter) -> None: + prefix = cl.name_prefix(emitter.names) + emitter.emit_line(f"if ({prefix}_free_instance == NULL) {{") + emitter.emit_line(f"{prefix}_free_instance = self;") + emitter.emit_line("Py_INCREF(self);") + + # TODO: emit_clear_bitmaps(cl, emitter) + + for base in reversed(cl.base_mro): + for attr, rtype in base.attributes.items(): + emitter.emit_reuse_clear(f"self->{emitter.attr(attr)}", rtype) + + emitter.emit_line("return;") + emitter.emit_line("}") + + def generate_finalize_for_class( del_method: FuncIR, finalize_func_name: str, emitter: Emitter ) -> None: diff --git a/mypyc/codegen/emitmodule.py b/mypyc/codegen/emitmodule.py index f914bfd6345d..e1b6a7857294 100644 --- a/mypyc/codegen/emitmodule.py +++ b/mypyc/codegen/emitmodule.py @@ -29,7 +29,7 @@ from mypy.util import hash_digest, json_dumps from mypyc.codegen.cstring import c_string_initializer from mypyc.codegen.emit import Emitter, EmitterContext, HeaderDeclaration, c_array_initializer -from mypyc.codegen.emitclass import generate_class, generate_class_type_decl +from mypyc.codegen.emitclass import generate_class, generate_class_reuse, generate_class_type_decl from mypyc.codegen.emitfunc import generate_native_function, native_function_header from mypyc.codegen.emitwrapper import ( generate_legacy_wrapper_function, @@ -609,6 +609,8 @@ def generate_c_for_modules(self) -> list[tuple[str, str]]: self.declare_finals(module_name, module.final_names, declarations) for cl in module.classes: generate_class_type_decl(cl, emitter, ext_declarations, declarations) + if cl.reuse_freed_instance: + generate_class_reuse(cl, emitter, ext_declarations, declarations) self.declare_type_vars(module_name, module.type_var_names, declarations) for fn in module.functions: generate_function_declaration(fn, declarations) diff --git a/mypyc/ir/class_ir.py b/mypyc/ir/class_ir.py index c88b9b0c7afc..0ecf909a18f9 100644 --- a/mypyc/ir/class_ir.py +++ b/mypyc/ir/class_ir.py @@ -204,6 +204,11 @@ def __init__( # If this is a generator environment class, what is the actual method for it self.env_user_function: FuncIR | None = None + # If True, keep one freed, cleared instance available for immediate reuse to + # speed up allocations. This helps if many objects are freed quickly, before + # other instances of the same class are allocated. + self.reuse_freed_instance = False + def __repr__(self) -> str: return ( "ClassIR(" diff --git a/mypyc/irbuild/generator.py b/mypyc/irbuild/generator.py index 782cb4319757..0e4b0e3e184a 100644 --- a/mypyc/irbuild/generator.py +++ b/mypyc/irbuild/generator.py @@ -156,6 +156,7 @@ def setup_generator_class(builder: IRBuilder) -> ClassIR: name = f"{builder.fn_info.namespaced_name()}_gen" generator_class_ir = ClassIR(name, builder.module_name, is_generated=True, is_final_class=True) + generator_class_ir.reuse_freed_instance = True if builder.fn_info.can_merge_generator_and_env_classes(): builder.fn_info.env_class = generator_class_ir else: From 39d50d36a335c4bc4bb86d3981d8622bb5971dea Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Tue, 3 Jun 2025 14:09:34 +0100 Subject: [PATCH 02/15] Fix serialization --- mypyc/ir/class_ir.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mypyc/ir/class_ir.py b/mypyc/ir/class_ir.py index 0ecf909a18f9..f813294235bc 100644 --- a/mypyc/ir/class_ir.py +++ b/mypyc/ir/class_ir.py @@ -408,6 +408,7 @@ def serialize(self) -> JsonDict: "_sometimes_initialized_attrs": sorted(self._sometimes_initialized_attrs), "init_self_leak": self.init_self_leak, "env_user_function": self.env_user_function.id if self.env_user_function else None, + "reuse_freed_instance": self.reuse_freed_instance, } @classmethod @@ -463,6 +464,7 @@ def deserialize(cls, data: JsonDict, ctx: DeserMaps) -> ClassIR: ir.env_user_function = ( ctx.functions[data["env_user_function"]] if data["env_user_function"] else None ) + ir.reuse_freed_instance = data["reuse_freed_instance"] return ir From 5b6dda6350b239df212a7330c4bfe84a8c140bef Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Mon, 9 Jun 2025 13:30:38 +0100 Subject: [PATCH 03/15] WIP try better approach --- mypyc/codegen/emitclass.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mypyc/codegen/emitclass.py b/mypyc/codegen/emitclass.py index b5c77b174e51..32eb05a8a31a 100644 --- a/mypyc/codegen/emitclass.py +++ b/mypyc/codegen/emitclass.py @@ -575,6 +575,8 @@ def generate_setup_for_class( emitter.emit_line(f"if ({prefix}_free_instance != NULL) {{") emitter.emit_line(f"self = {prefix}_free_instance;") emitter.emit_line(f"{prefix}_free_instance = NULL;") + emitter.emit_line("Py_SET_REFCNT(self, 1);") + emitter.emit_line("PyObject_GC_Track(self);") emitter.emit_line("return (PyObject *)self;") emitter.emit_line("}") @@ -806,9 +808,9 @@ def generate_dealloc_for_class( emitter.emit_line("if (!PyObject_GC_IsFinalized((PyObject *)self)) {") emitter.emit_line("Py_TYPE(self)->tp_finalize((PyObject *)self);") emitter.emit_line("}") + emitter.emit_line("PyObject_GC_UnTrack(self);") if cl.reuse_freed_instance: emit_reuse_dealloc(cl, emitter) - emitter.emit_line("PyObject_GC_UnTrack(self);") # The trashcan is needed to handle deep recursive deallocations emitter.emit_line(f"CPy_TRASHCAN_BEGIN(self, {dealloc_func_name})") emitter.emit_line(f"{clear_func_name}(self);") @@ -821,7 +823,6 @@ def emit_reuse_dealloc(cl: ClassIR, emitter: Emitter) -> None: prefix = cl.name_prefix(emitter.names) emitter.emit_line(f"if ({prefix}_free_instance == NULL) {{") emitter.emit_line(f"{prefix}_free_instance = self;") - emitter.emit_line("Py_INCREF(self);") # TODO: emit_clear_bitmaps(cl, emitter) From fb5520ab973c2fa71494320a5075109b2fee45d5 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Thu, 19 Jun 2025 17:29:11 +0100 Subject: [PATCH 04/15] Add tests --- mypyc/test-data/run-generators.test | 45 ++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/mypyc/test-data/run-generators.test b/mypyc/test-data/run-generators.test index 2e55ded76f74..b9e14a73f3dc 100644 --- a/mypyc/test-data/run-generators.test +++ b/mypyc/test-data/run-generators.test @@ -681,7 +681,6 @@ def test_basic() -> None: assert context.x == 1 assert context.x == 0 - [case testYieldSpill] from typing import Generator from testutil import run_generator @@ -697,3 +696,47 @@ def test_basic() -> None: yields, val = x assert yields == ('foo',) assert val == 3, val + +[case testGeneratorReuse] +from typing import Iterator + +def gen(x: list[int]) -> Iterator[list[int]]: + y = [9] + for z in x: + yield y + [z] + yield y + +def gen_range(n: int) -> Iterator[int]: + for x in range(n): + yield x + +def test_use_generator_multiple_times_one_at_a_time() -> None: + for i in range(100): + a = [] + for x in gen([2, i]): + a.append(x) + assert a == [[9, 2], [9, i], [9]] + +def test_use_multiple_generator_instances_at_same_time() -> None: + a = [] + for x in gen([2]): + a.append(x) + for y in gen([3, 4]): + a.append(y) + assert a == [[9, 2], [9, 3], [9, 4], [9], [9], [9, 3], [9, 4], [9]] + +def test_use_multiple_generator_instances_at_same_time_2() -> None: + a = [] + for x in gen_range(2): + a.append(x) + b = [] + for y in gen_range(3): + b.append(y) + c = [] + for z in gen_range(4): + c.append(z) + assert c == [0, 1, 2, 3] + assert b == [0, 1, 2] + assert a == [0, 1] + assert list(gen_range(5)) == list(range(5)) + From 7c11a7ac1af1ae92e524e85dce00d07604b88989 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Thu, 19 Jun 2025 18:40:35 +0100 Subject: [PATCH 05/15] [mypyc] Foundations for supporting thread-local C variables Use e.g. `CPyThreadLocal int x;` to define a thread-local variable that should work across most compilers we might want to support. --- mypyc/lib-rt/mypyc_util.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/mypyc/lib-rt/mypyc_util.h b/mypyc/lib-rt/mypyc_util.h index 27a11ab9f581..01154824c8f1 100644 --- a/mypyc/lib-rt/mypyc_util.h +++ b/mypyc/lib-rt/mypyc_util.h @@ -23,6 +23,31 @@ #define CPy_NOINLINE #endif +#ifndef Py_GIL_DISABLED + +// Everything is running in the same thread, so no need for thread locals +#define CPyThreadLocal + +#else + +// 1. Use C11 standard thread_local storage, if available +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_THREADS__) +#define CPyThreadLocal _Thread_local + +// 2. Microsoft Visual Studio fallback +#elif defined(_MSC_VER) +#define CPyThreadLocal __declspec(thread) + +// 3. GNU thread local storage for GCC/Clang targets that still need it +#elif defined(__GNUC__) || defined(__clang__) +#define CPyThreadLocal __thread + +#else +#error "Cannot define CPyThreadLocal for this compiler/target" +#endif + +#endif // Py_GIL_DISABLED + // INCREF and DECREF that assert the pointer is not NULL. // asserts are disabled in release builds so there shouldn't be a perf hit. // I'm honestly kind of surprised that this isn't done by default. From 5c89db777214b8e382733e99fc33356a65f9165f Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Thu, 19 Jun 2025 18:57:09 +0100 Subject: [PATCH 06/15] Mark as thread local --- mypyc/codegen/emitclass.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypyc/codegen/emitclass.py b/mypyc/codegen/emitclass.py index 32eb05a8a31a..99e935ccb7b8 100644 --- a/mypyc/codegen/emitclass.py +++ b/mypyc/codegen/emitclass.py @@ -193,7 +193,7 @@ def generate_class_reuse( context = c_emitter.context name = cl.name_prefix(c_emitter.names) + "_free_instance" struct_name = cl.struct_name(c_emitter.names) - context.declarations[name] = HeaderDeclaration(f"{struct_name} *{name};", needs_export=True) + context.declarations[name] = HeaderDeclaration(f"CPyThreadLocal {struct_name} *{name};", needs_export=True) def generate_class(cl: ClassIR, module: str, emitter: Emitter) -> None: From 82987bb567a3f17cf7177bc789eb2158d8b48cdb Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Fri, 20 Jun 2025 10:11:10 +0100 Subject: [PATCH 07/15] Add identity based test --- mypyc/test-data/run-generators.test | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/mypyc/test-data/run-generators.test b/mypyc/test-data/run-generators.test index b9e14a73f3dc..bb60f7fdc881 100644 --- a/mypyc/test-data/run-generators.test +++ b/mypyc/test-data/run-generators.test @@ -698,7 +698,7 @@ def test_basic() -> None: assert val == 3, val [case testGeneratorReuse] -from typing import Iterator +from typing import Iterator, Any def gen(x: list[int]) -> Iterator[list[int]]: y = [9] @@ -740,3 +740,29 @@ def test_use_multiple_generator_instances_at_same_time_2() -> None: assert a == [0, 1] assert list(gen_range(5)) == list(range(5)) +def gen_a(x: int) -> Iterator[int]: + yield x + 1 + +def gen_b(x: int) -> Iterator[int]: + yield x + 2 + +def test_generator_identities() -> None: + # Sanity check: two distinct live objects can't reuse the same memory location + g1 = gen_a(1) + g2 = gen_a(1) + assert g1 is not g2 + + # If two generators have non-overlapping lifetimes, they should reuse a memory location + g3 = gen_b(1) + id1 = id(g3) + g3 = gen_b(1) + assert id(g3) == id1 + + # More complex case of reuse: allocate other objects in between + g4: Any = gen_a(1) + id2 = id(g4) + g4 = gen_b(1) + g4 = [gen_b(n) for n in range(100)] + g4 = gen_a(1) + assert id(g4) == id2 + From 13f2b33cef15b2ad93f6764dc586bc16c191d928 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Fri, 20 Jun 2025 10:22:42 +0100 Subject: [PATCH 08/15] Add multithreaded test --- mypyc/test-data/run-generators.test | 32 +++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/mypyc/test-data/run-generators.test b/mypyc/test-data/run-generators.test index bb60f7fdc881..b3077b1149a8 100644 --- a/mypyc/test-data/run-generators.test +++ b/mypyc/test-data/run-generators.test @@ -766,3 +766,35 @@ def test_generator_identities() -> None: g4 = gen_a(1) assert id(g4) == id2 +[case testGeneratorReuseWithGilDisabled] +import sys +import threading +from typing import Iterator + +def gen() -> Iterator[int]: + yield 1 + +def is_gil_disabled() -> bool: + return hasattr(sys, "_is_gil_enabled") and not sys._is_gil_enabled() + +def test_each_thread_gets_separate_instance() -> None: + if not is_gil_disabled(): + # This only makes sense if GIL is disabled + return + + g = gen() + id1 = id(g) + + id2 = 0 + + def run() -> None: + nonlocal id2 + g = gen() + id2 = id(g) + + t = threading.Thread(target=run) + t.start() + t.join() + + # Each thread should get a separate reused instance + assert id1 != id2 From 2b0e4e86eed8a002fbd0284df7d67af6c0823292 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Fri, 20 Jun 2025 11:26:56 +0100 Subject: [PATCH 09/15] Clear attribute definedness bitmaps --- mypyc/codegen/emitclass.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/mypyc/codegen/emitclass.py b/mypyc/codegen/emitclass.py index 99e935ccb7b8..1a3b31695be1 100644 --- a/mypyc/codegen/emitclass.py +++ b/mypyc/codegen/emitclass.py @@ -824,7 +824,7 @@ def emit_reuse_dealloc(cl: ClassIR, emitter: Emitter) -> None: emitter.emit_line(f"if ({prefix}_free_instance == NULL) {{") emitter.emit_line(f"{prefix}_free_instance = self;") - # TODO: emit_clear_bitmaps(cl, emitter) + emit_clear_bitmaps(cl, emitter) for base in reversed(cl.base_mro): for attr, rtype in base.attributes.items(): @@ -834,6 +834,13 @@ def emit_reuse_dealloc(cl: ClassIR, emitter: Emitter) -> None: emitter.emit_line("}") +def emit_clear_bitmaps(cl: ClassIR, emitter: Emitter) -> None: + """Emit C code to clear bitmaps that track if attributes have an assigned value.""" + for i in range(0, len(cl.bitmap_attrs), BITMAP_BITS): + field = emitter.bitmap_field(i) + emitter.emit_line(f"self->{field} = 0;") + + def generate_finalize_for_class( del_method: FuncIR, finalize_func_name: str, emitter: Emitter ) -> None: From 4ad5e295de7a820c02d2d0de36339e3cbfdc6a04 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Fri, 20 Jun 2025 12:13:09 +0100 Subject: [PATCH 10/15] Test undefined attribute --- mypyc/test-data/run-generators.test | 31 +++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/mypyc/test-data/run-generators.test b/mypyc/test-data/run-generators.test index b3077b1149a8..9c0b51d58e79 100644 --- a/mypyc/test-data/run-generators.test +++ b/mypyc/test-data/run-generators.test @@ -798,3 +798,34 @@ def test_each_thread_gets_separate_instance() -> None: # Each thread should get a separate reused instance assert id1 != id2 + +[case testGeneratorWithUndefinedLocalInEnvironment] +from typing import Iterator + +from testutil import assertRaises + +def gen(set: bool) -> Iterator[float]: + if set: + y = float("-113.0") + yield 1.0 + yield y + +def test_bitmap_is_cleared_when_object_is_reused() -> None: + # This updates the bitmap of the shared instance. + list(gen(True)) + + # Ensure bitmap has been cleared. + with assertRaises(AttributeError): # TODO: Should be UnboundLocalError + list(gen(False)) + +def gen2(set: bool) -> Iterator[int]: + if set: + y = int("5") + yield 1 + yield y + +def test_undefined_int_in_environment() -> None: + list(gen2(True)) + + with assertRaises(AttributeError): # TODO: Should be UnboundLocalError + list(gen2(False)) From c2a823d4eff20b2b8b6e284736d8cc5c379c4fef Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Fri, 20 Jun 2025 13:38:02 +0100 Subject: [PATCH 11/15] Initialize attribute default values + some refactoring --- mypyc/codegen/emitclass.py | 49 +++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/mypyc/codegen/emitclass.py b/mypyc/codegen/emitclass.py index 1a3b31695be1..5610eac96c2b 100644 --- a/mypyc/codegen/emitclass.py +++ b/mypyc/codegen/emitclass.py @@ -193,7 +193,9 @@ def generate_class_reuse( context = c_emitter.context name = cl.name_prefix(c_emitter.names) + "_free_instance" struct_name = cl.struct_name(c_emitter.names) - context.declarations[name] = HeaderDeclaration(f"CPyThreadLocal {struct_name} *{name};", needs_export=True) + context.declarations[name] = HeaderDeclaration( + f"CPyThreadLocal {struct_name} *{name};", needs_export=True + ) def generate_class(cl: ClassIR, module: str, emitter: Emitter) -> None: @@ -572,11 +574,14 @@ def generate_setup_for_class( prefix = cl.name_prefix(emitter.names) if cl.reuse_freed_instance: + # Attempt to use a per-type free list first (a free "list" with up to one object only). emitter.emit_line(f"if ({prefix}_free_instance != NULL) {{") emitter.emit_line(f"self = {prefix}_free_instance;") emitter.emit_line(f"{prefix}_free_instance = NULL;") emitter.emit_line("Py_SET_REFCNT(self, 1);") emitter.emit_line("PyObject_GC_Track(self);") + if defaults_fn is not None: + emit_attr_defaults_func_call(defaults_fn, "self", emitter) emitter.emit_line("return (PyObject *)self;") emitter.emit_line("}") @@ -593,9 +598,7 @@ def generate_setup_for_class( else: emitter.emit_line(f"self->vtable = {vtable_name};") - for i in range(0, len(cl.bitmap_attrs), BITMAP_BITS): - field = emitter.bitmap_field(i) - emitter.emit_line(f"self->{field} = 0;") + emit_clear_bitmaps(cl, emitter) if cl.has_method("__call__"): name = cl.method_decl("__call__").cname(emitter.names) @@ -612,19 +615,34 @@ def generate_setup_for_class( # Initialize attributes to default values, if necessary if defaults_fn is not None: - emitter.emit_lines( - "if ({}{}((PyObject *)self) == 0) {{".format( - NATIVE_PREFIX, defaults_fn.cname(emitter.names) - ), - "Py_DECREF(self);", - "return NULL;", - "}", - ) + emit_attr_defaults_func_call(defaults_fn, "self", emitter) emitter.emit_line("return (PyObject *)self;") emitter.emit_line("}") +def emit_clear_bitmaps(cl: ClassIR, emitter: Emitter) -> None: + """Emit C code to clear bitmaps that track if attributes have an assigned value.""" + for i in range(0, len(cl.bitmap_attrs), BITMAP_BITS): + field = emitter.bitmap_field(i) + emitter.emit_line(f"self->{field} = 0;") + + +def emit_attr_defaults_func_call(defaults_fn: FuncIR, self_name: str, emitter: Emitter) -> None: + """Emit C code to initialize attribute defaults by calling defaults_fn. + + The code returns NULL on a raised exception. + """ + emitter.emit_lines( + "if ({}{}((PyObject *){}) == 0) {{".format( + NATIVE_PREFIX, defaults_fn.cname(emitter.names), self_name + ), + "Py_DECREF(self);", + "return NULL;", + "}", + ) + + def generate_constructor_for_class( cl: ClassIR, fn: FuncDecl, @@ -834,13 +852,6 @@ def emit_reuse_dealloc(cl: ClassIR, emitter: Emitter) -> None: emitter.emit_line("}") -def emit_clear_bitmaps(cl: ClassIR, emitter: Emitter) -> None: - """Emit C code to clear bitmaps that track if attributes have an assigned value.""" - for i in range(0, len(cl.bitmap_attrs), BITMAP_BITS): - field = emitter.bitmap_field(i) - emitter.emit_line(f"self->{field} = 0;") - - def generate_finalize_for_class( del_method: FuncIR, finalize_func_name: str, emitter: Emitter ) -> None: From 3460ce5f6a425120b0d35cd39ffd2b80ab17aa34 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Fri, 20 Jun 2025 13:50:14 +0100 Subject: [PATCH 12/15] Update docstrings and comments --- mypyc/codegen/emitclass.py | 17 +++++++++++++++++ mypyc/ir/class_ir.py | 3 ++- mypyc/lib-rt/mypyc_util.h | 2 +- 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/mypyc/codegen/emitclass.py b/mypyc/codegen/emitclass.py index 5610eac96c2b..576787424cbf 100644 --- a/mypyc/codegen/emitclass.py +++ b/mypyc/codegen/emitclass.py @@ -189,7 +189,18 @@ def generate_class_type_decl( def generate_class_reuse( cl: ClassIR, c_emitter: Emitter, external_emitter: Emitter, emitter: Emitter ) -> None: + """Generate a definition of a single-object per-class free "list". + + This speeds up object allocation and freeing when there are many short-lived + objects. + + TODO: Generalize to support a free list with up to N objects. + """ assert cl.reuse_freed_instance + + # The free list implementation doesn't support class hierarchies + assert cl.is_final_class or cl.children == [] + context = c_emitter.context name = cl.name_prefix(c_emitter.names) + "_free_instance" struct_name = cl.struct_name(c_emitter.names) @@ -838,10 +849,16 @@ def generate_dealloc_for_class( def emit_reuse_dealloc(cl: ClassIR, emitter: Emitter) -> None: + """Emit code to deallocate object by putting it to per-type free list. + + The free "list" currently can have up to one object. + """ prefix = cl.name_prefix(emitter.names) emitter.emit_line(f"if ({prefix}_free_instance == NULL) {{") emitter.emit_line(f"{prefix}_free_instance = self;") + # Clear attributes and free referenced objects. + emit_clear_bitmaps(cl, emitter) for base in reversed(cl.base_mro): diff --git a/mypyc/ir/class_ir.py b/mypyc/ir/class_ir.py index f813294235bc..561dc9d438c4 100644 --- a/mypyc/ir/class_ir.py +++ b/mypyc/ir/class_ir.py @@ -206,7 +206,8 @@ def __init__( # If True, keep one freed, cleared instance available for immediate reuse to # speed up allocations. This helps if many objects are freed quickly, before - # other instances of the same class are allocated. + # other instances of the same class are allocated. This is effectively a + # per-type free "list" of up to length 1. self.reuse_freed_instance = False def __repr__(self) -> str: diff --git a/mypyc/lib-rt/mypyc_util.h b/mypyc/lib-rt/mypyc_util.h index 01154824c8f1..127d70a49091 100644 --- a/mypyc/lib-rt/mypyc_util.h +++ b/mypyc/lib-rt/mypyc_util.h @@ -38,7 +38,7 @@ #elif defined(_MSC_VER) #define CPyThreadLocal __declspec(thread) -// 3. GNU thread local storage for GCC/Clang targets that still need it +// 3. GNU thread local storage for GCC/Clang targets that still need it #elif defined(__GNUC__) || defined(__clang__) #define CPyThreadLocal __thread From 06e9378f708f31a4eb92ee44b83c351a49849c91 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Fri, 20 Jun 2025 17:57:30 +0100 Subject: [PATCH 13/15] Improve robustness on free threaded builds and add a TODO --- mypyc/codegen/emitclass.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mypyc/codegen/emitclass.py b/mypyc/codegen/emitclass.py index 576787424cbf..fb6dbc9ecd7c 100644 --- a/mypyc/codegen/emitclass.py +++ b/mypyc/codegen/emitclass.py @@ -855,7 +855,6 @@ def emit_reuse_dealloc(cl: ClassIR, emitter: Emitter) -> None: """ prefix = cl.name_prefix(emitter.names) emitter.emit_line(f"if ({prefix}_free_instance == NULL) {{") - emitter.emit_line(f"{prefix}_free_instance = self;") # Clear attributes and free referenced objects. @@ -865,6 +864,11 @@ def emit_reuse_dealloc(cl: ClassIR, emitter: Emitter) -> None: for attr, rtype in base.attributes.items(): emitter.emit_reuse_clear(f"self->{emitter.attr(attr)}", rtype) + # TODO: Insert a memory barrier on free-threaded builds? This appears not to be + # needed on x86-64 because of the memory model. + + emitter.emit_line(f"{prefix}_free_instance = self;") + emitter.emit_line("return;") emitter.emit_line("}") From e89504e83d28c1ea620c95970bf8f9287c15f092 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Fri, 20 Jun 2025 18:45:26 +0100 Subject: [PATCH 14/15] Revert "Improve robustness on free threaded builds and add a TODO" This reverts commit 06e9378f708f31a4eb92ee44b83c351a49849c91. --- mypyc/codegen/emitclass.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/mypyc/codegen/emitclass.py b/mypyc/codegen/emitclass.py index fb6dbc9ecd7c..576787424cbf 100644 --- a/mypyc/codegen/emitclass.py +++ b/mypyc/codegen/emitclass.py @@ -855,6 +855,7 @@ def emit_reuse_dealloc(cl: ClassIR, emitter: Emitter) -> None: """ prefix = cl.name_prefix(emitter.names) emitter.emit_line(f"if ({prefix}_free_instance == NULL) {{") + emitter.emit_line(f"{prefix}_free_instance = self;") # Clear attributes and free referenced objects. @@ -864,11 +865,6 @@ def emit_reuse_dealloc(cl: ClassIR, emitter: Emitter) -> None: for attr, rtype in base.attributes.items(): emitter.emit_reuse_clear(f"self->{emitter.attr(attr)}", rtype) - # TODO: Insert a memory barrier on free-threaded builds? This appears not to be - # needed on x86-64 because of the memory model. - - emitter.emit_line(f"{prefix}_free_instance = self;") - emitter.emit_line("return;") emitter.emit_line("}") From 57e2bc27718790d71f7429e5f31a072c01ac5fe1 Mon Sep 17 00:00:00 2001 From: Jukka Lehtosalo Date: Thu, 3 Jul 2025 11:21:11 +0100 Subject: [PATCH 15/15] Address review --- mypyc/lib-rt/mypyc_util.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypyc/lib-rt/mypyc_util.h b/mypyc/lib-rt/mypyc_util.h index 127d70a49091..3d4eba3a3cdb 100644 --- a/mypyc/lib-rt/mypyc_util.h +++ b/mypyc/lib-rt/mypyc_util.h @@ -43,7 +43,7 @@ #define CPyThreadLocal __thread #else -#error "Cannot define CPyThreadLocal for this compiler/target" +#error "Can't define CPyThreadLocal for this compiler/target (consider using a non-free-threaded Python build)" #endif #endif // Py_GIL_DISABLED