Skip to content

Commit c32b0a5

Browse files
p-sawickibrianschubertpre-commit-ci[bot]
authored
[mypyc] Use defined __new__ method in tp_new and constructor (#19739)
Fixes #16012 mypyc ignored custom implementations of `__new__` because, even though a C function representing the method was generated, it was called neither in the type constructor nor in the method assigned to the `tp_new` pointer. Now if there's a `__new__` method defined for a type, the corresponding function is called in place of the setup function which is responsible for allocating memory for new objects and initializing their attributes to default values. The setup function is still called when creating instances of the type as calls resolving to `object.__new__()` are transformed to call the setup function. This way, `__new__` can return instances of other types and instances of the type of the class where `__new__` is defined are setup correctly. There are a couple of limitations: - Programs with `super().__new__()` calls in `__new__` methods of non-native classes are rejected because it's more difficult to resolve the setup function for non-native classes but this could probably be supported in the future. - Similarly, programs are rejected when a class inherits from a non-compiled class. In this case calling the `tp_new` method of the parent type results in an error because cpython expects the sub type to use a wrapper for `tp_new` which compiled classes don't. Allowing this would require compiled types to be initialized more closely to the way cpython does it which might need a lot of work. - Lastly, when `__new__` is annotated with `@classmethod`, calling it without the type parameter works in compiled code but raises an error in interpreted. I'm not sure of the reason and it's difficult to make it a compiler error because it's outside of what mypyc sees. --------- Co-authored-by: Brian Schubert <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent f83ec9f commit c32b0a5

File tree

12 files changed

+677
-46
lines changed

12 files changed

+677
-46
lines changed

mypyc/analysis/attrdefined.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ def analyze_always_defined_attrs_in_class(cl: ClassIR, seen: set[ClassIR]) -> No
138138
or cl.builtin_base is not None
139139
or cl.children is None
140140
or cl.is_serializable()
141+
or cl.has_method("__new__")
141142
):
142143
# Give up -- we can't enforce that attributes are always defined.
143144
return

mypyc/codegen/emitclass.py

Lines changed: 68 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from collections.abc import Mapping
66
from typing import Callable
77

8+
from mypy.nodes import ARG_STAR, ARG_STAR2
89
from mypyc.codegen.cstring import c_string_initializer
910
from mypyc.codegen.emit import Emitter, HeaderDeclaration, ReturnHandler
1011
from mypyc.codegen.emitfunc import native_function_doc_initializer, native_function_header
@@ -224,7 +225,7 @@ def generate_class(cl: ClassIR, module: str, emitter: Emitter) -> None:
224225
name = cl.name
225226
name_prefix = cl.name_prefix(emitter.names)
226227

227-
setup_name = f"{name_prefix}_setup"
228+
setup_name = emitter.native_function_name(cl.setup)
228229
new_name = f"{name_prefix}_new"
229230
finalize_name = f"{name_prefix}_finalize"
230231
members_name = f"{name_prefix}_members"
@@ -317,10 +318,8 @@ def emit_line() -> None:
317318
fields["tp_basicsize"] = base_size
318319

319320
if generate_full:
320-
# Declare setup method that allocates and initializes an object. type is the
321-
# type of the class being initialized, which could be another class if there
322-
# is an interpreted subclass.
323-
emitter.emit_line(f"static PyObject *{setup_name}(PyTypeObject *type);")
321+
assert cl.setup is not None
322+
emitter.emit_line(native_function_header(cl.setup, emitter) + ";")
324323
assert cl.ctor is not None
325324
emitter.emit_line(native_function_header(cl.ctor, emitter) + ";")
326325

@@ -390,9 +389,7 @@ def emit_line() -> None:
390389

391390
emitter.emit_line()
392391
if generate_full:
393-
generate_setup_for_class(
394-
cl, setup_name, defaults_fn, vtable_name, shadow_vtable_name, emitter
395-
)
392+
generate_setup_for_class(cl, defaults_fn, vtable_name, shadow_vtable_name, emitter)
396393
emitter.emit_line()
397394
generate_constructor_for_class(cl, cl.ctor, init_fn, setup_name, vtable_name, emitter)
398395
emitter.emit_line()
@@ -579,16 +576,16 @@ def generate_vtable(
579576

580577
def generate_setup_for_class(
581578
cl: ClassIR,
582-
func_name: str,
583579
defaults_fn: FuncIR | None,
584580
vtable_name: str,
585581
shadow_vtable_name: str | None,
586582
emitter: Emitter,
587583
) -> None:
588584
"""Generate a native function that allocates an instance of a class."""
589-
emitter.emit_line("static PyObject *")
590-
emitter.emit_line(f"{func_name}(PyTypeObject *type)")
585+
emitter.emit_line(native_function_header(cl.setup, emitter))
591586
emitter.emit_line("{")
587+
type_arg_name = REG_PREFIX + cl.setup.sig.args[0].name
588+
emitter.emit_line(f"PyTypeObject *type = (PyTypeObject*){type_arg_name};")
592589
struct_name = cl.struct_name(emitter.names)
593590
emitter.emit_line(f"{struct_name} *self;")
594591

@@ -663,6 +660,35 @@ def emit_attr_defaults_func_call(defaults_fn: FuncIR, self_name: str, emitter: E
663660
)
664661

665662

663+
def emit_setup_or_dunder_new_call(
664+
cl: ClassIR,
665+
setup_name: str,
666+
type_arg: str,
667+
native_prefix: bool,
668+
new_args: str,
669+
emitter: Emitter,
670+
) -> None:
671+
def emit_null_check() -> None:
672+
emitter.emit_line("if (self == NULL)")
673+
emitter.emit_line(" return NULL;")
674+
675+
new_fn = cl.get_method("__new__")
676+
if not new_fn:
677+
emitter.emit_line(f"PyObject *self = {setup_name}({type_arg});")
678+
emit_null_check()
679+
return
680+
prefix = emitter.get_group_prefix(new_fn.decl) + NATIVE_PREFIX if native_prefix else PREFIX
681+
all_args = type_arg
682+
if new_args != "":
683+
all_args += ", " + new_args
684+
emitter.emit_line(f"PyObject *self = {prefix}{new_fn.cname(emitter.names)}({all_args});")
685+
emit_null_check()
686+
687+
# skip __init__ if __new__ returns some other type
688+
emitter.emit_line(f"if (Py_TYPE(self) != {emitter.type_struct_name(cl)})")
689+
emitter.emit_line(" return self;")
690+
691+
666692
def generate_constructor_for_class(
667693
cl: ClassIR,
668694
fn: FuncDecl,
@@ -674,17 +700,30 @@ def generate_constructor_for_class(
674700
"""Generate a native function that allocates and initializes an instance of a class."""
675701
emitter.emit_line(f"{native_function_header(fn, emitter)}")
676702
emitter.emit_line("{")
677-
emitter.emit_line(f"PyObject *self = {setup_name}({emitter.type_struct_name(cl)});")
678-
emitter.emit_line("if (self == NULL)")
679-
emitter.emit_line(" return NULL;")
680-
args = ", ".join(["self"] + [REG_PREFIX + arg.name for arg in fn.sig.args])
703+
704+
fn_args = [REG_PREFIX + arg.name for arg in fn.sig.args]
705+
type_arg = "(PyObject *)" + emitter.type_struct_name(cl)
706+
new_args = ", ".join(fn_args)
707+
708+
use_wrapper = (
709+
cl.has_method("__new__")
710+
and len(fn.sig.args) == 2
711+
and fn.sig.args[0].kind == ARG_STAR
712+
and fn.sig.args[1].kind == ARG_STAR2
713+
)
714+
emit_setup_or_dunder_new_call(cl, setup_name, type_arg, not use_wrapper, new_args, emitter)
715+
716+
args = ", ".join(["self"] + fn_args)
681717
if init_fn is not None:
718+
prefix = PREFIX if use_wrapper else NATIVE_PREFIX
719+
cast = "!= NULL ? 0 : -1" if use_wrapper else ""
682720
emitter.emit_line(
683-
"char res = {}{}{}({});".format(
721+
"char res = {}{}{}({}){};".format(
684722
emitter.get_group_prefix(init_fn.decl),
685-
NATIVE_PREFIX,
723+
prefix,
686724
init_fn.cname(emitter.names),
687725
args,
726+
cast,
688727
)
689728
)
690729
emitter.emit_line("if (res == 2) {")
@@ -717,7 +756,7 @@ def generate_init_for_class(cl: ClassIR, init_fn: FuncIR, emitter: Emitter) -> s
717756
emitter.emit_line("static int")
718757
emitter.emit_line(f"{func_name}(PyObject *self, PyObject *args, PyObject *kwds)")
719758
emitter.emit_line("{")
720-
if cl.allow_interpreted_subclasses or cl.builtin_base:
759+
if cl.allow_interpreted_subclasses or cl.builtin_base or cl.has_method("__new__"):
721760
emitter.emit_line(
722761
"return {}{}(self, args, kwds) != NULL ? 0 : -1;".format(
723762
PREFIX, init_fn.cname(emitter.names)
@@ -750,15 +789,22 @@ def generate_new_for_class(
750789
emitter.emit_line("return NULL;")
751790
emitter.emit_line("}")
752791

753-
if not init_fn or cl.allow_interpreted_subclasses or cl.builtin_base or cl.is_serializable():
792+
type_arg = "(PyObject*)type"
793+
new_args = "args, kwds"
794+
emit_setup_or_dunder_new_call(cl, setup_name, type_arg, False, new_args, emitter)
795+
if (
796+
not init_fn
797+
or cl.allow_interpreted_subclasses
798+
or cl.builtin_base
799+
or cl.is_serializable()
800+
or cl.has_method("__new__")
801+
):
754802
# Match Python semantics -- __new__ doesn't call __init__.
755-
emitter.emit_line(f"return {setup_name}(type);")
803+
emitter.emit_line("return self;")
756804
else:
757805
# __new__ of a native class implicitly calls __init__ so that we
758806
# can enforce that instances are always properly initialized. This
759807
# is needed to support always defined attributes.
760-
emitter.emit_line(f"PyObject *self = {setup_name}(type);")
761-
emitter.emit_lines("if (self == NULL)", " return NULL;")
762808
emitter.emit_line(
763809
f"PyObject *ret = {PREFIX}{init_fn.cname(emitter.names)}(self, args, kwds);"
764810
)

mypyc/codegen/emitmodule.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1274,8 +1274,8 @@ def is_fastcall_supported(fn: FuncIR, capi_version: tuple[int, int]) -> bool:
12741274
if fn.name == "__call__":
12751275
# We can use vectorcalls (PEP 590) when supported
12761276
return True
1277-
# TODO: Support fastcall for __init__.
1278-
return fn.name != "__init__"
1277+
# TODO: Support fastcall for __init__ and __new__.
1278+
return fn.name != "__init__" and fn.name != "__new__"
12791279
return True
12801280

12811281

mypyc/codegen/emitwrapper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ def generate_legacy_wrapper_function(
238238
real_args = list(fn.args)
239239
if fn.sig.num_bitmap_args:
240240
real_args = real_args[: -fn.sig.num_bitmap_args]
241-
if fn.class_name and fn.decl.kind != FUNC_STATICMETHOD:
241+
if fn.class_name and (fn.decl.name == "__new__" or fn.decl.kind != FUNC_STATICMETHOD):
242242
arg = real_args.pop(0)
243243
emitter.emit_line(f"PyObject *obj_{arg.name} = self;")
244244

mypyc/ir/class_ir.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55
from typing import NamedTuple
66

77
from mypyc.common import PROPSET_PREFIX, JsonDict
8-
from mypyc.ir.func_ir import FuncDecl, FuncIR, FuncSignature
8+
from mypyc.ir.func_ir import FuncDecl, FuncIR, FuncSignature, RuntimeArg
99
from mypyc.ir.ops import DeserMaps, Value
10-
from mypyc.ir.rtypes import RInstance, RType, deserialize_type
10+
from mypyc.ir.rtypes import RInstance, RType, deserialize_type, object_rprimitive
1111
from mypyc.namegen import NameGenerator, exported_name
1212

1313
# Some notes on the vtable layout: Each concrete class has a vtable
@@ -133,6 +133,16 @@ def __init__(
133133
self.builtin_base: str | None = None
134134
# Default empty constructor
135135
self.ctor = FuncDecl(name, None, module_name, FuncSignature([], RInstance(self)))
136+
# Declare setup method that allocates and initializes an object. type is the
137+
# type of the class being initialized, which could be another class if there
138+
# is an interpreted subclass.
139+
# TODO: Make it a regular method and generate its body in IR
140+
self.setup = FuncDecl(
141+
"__mypyc__" + name + "_setup",
142+
None,
143+
module_name,
144+
FuncSignature([RuntimeArg("type", object_rprimitive)], RInstance(self)),
145+
)
136146
# Attributes defined in the class (not inherited)
137147
self.attributes: dict[str, RType] = {}
138148
# Deletable attributes

mypyc/irbuild/expression.py

Lines changed: 31 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
from mypyc.ir.ops import (
5858
Assign,
5959
BasicBlock,
60+
Call,
6061
ComparisonOp,
6162
Integer,
6263
LoadAddress,
@@ -472,23 +473,42 @@ def translate_super_method_call(builder: IRBuilder, expr: CallExpr, callee: Supe
472473
if callee.name in base.method_decls:
473474
break
474475
else:
475-
if (
476-
ir.is_ext_class
477-
and ir.builtin_base is None
478-
and not ir.inherits_python
479-
and callee.name == "__init__"
480-
and len(expr.args) == 0
481-
):
482-
# Call translates to object.__init__(self), which is a
483-
# no-op, so omit the call.
484-
return builder.none()
476+
if ir.is_ext_class and ir.builtin_base is None and not ir.inherits_python:
477+
if callee.name == "__init__" and len(expr.args) == 0:
478+
# Call translates to object.__init__(self), which is a
479+
# no-op, so omit the call.
480+
return builder.none()
481+
elif callee.name == "__new__":
482+
# object.__new__(cls)
483+
assert (
484+
len(expr.args) == 1
485+
), f"Expected object.__new__() call to have exactly 1 argument, got {len(expr.args)}"
486+
typ_arg = expr.args[0]
487+
method_args = builder.fn_info.fitem.arg_names
488+
if (
489+
isinstance(typ_arg, NameExpr)
490+
and len(method_args) > 0
491+
and method_args[0] == typ_arg.name
492+
):
493+
subtype = builder.accept(expr.args[0])
494+
return builder.add(Call(ir.setup, [subtype], expr.line))
495+
496+
if callee.name == "__new__":
497+
call = "super().__new__()"
498+
if not ir.is_ext_class:
499+
builder.error(f"{call} not supported for non-extension classes", expr.line)
500+
if ir.inherits_python:
501+
builder.error(
502+
f"{call} not supported for classes inheriting from non-native classes",
503+
expr.line,
504+
)
485505
return translate_call(builder, expr, callee)
486506

487507
decl = base.method_decl(callee.name)
488508
arg_values = [builder.accept(arg) for arg in expr.args]
489509
arg_kinds, arg_names = expr.arg_kinds.copy(), expr.arg_names.copy()
490510

491-
if decl.kind != FUNC_STATICMETHOD:
511+
if decl.kind != FUNC_STATICMETHOD and decl.name != "__new__":
492512
# Grab first argument
493513
vself: Value = builder.self()
494514
if decl.kind == FUNC_CLASSMETHOD:

mypyc/irbuild/prepare.py

Lines changed: 43 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -193,9 +193,9 @@ def prepare_func_def(
193193
create_generator_class_if_needed(module_name, class_name, fdef, mapper)
194194

195195
kind = (
196-
FUNC_STATICMETHOD
197-
if fdef.is_static
198-
else (FUNC_CLASSMETHOD if fdef.is_class else FUNC_NORMAL)
196+
FUNC_CLASSMETHOD
197+
if fdef.is_class
198+
else (FUNC_STATICMETHOD if fdef.is_static else FUNC_NORMAL)
199199
)
200200
sig = mapper.fdef_to_sig(fdef, options.strict_dunders_typing)
201201
decl = FuncDecl(fdef.name, class_name, module_name, sig, kind)
@@ -555,21 +555,57 @@ def add_setter_declaration(
555555
ir.method_decls[setter_name] = decl
556556

557557

558+
def check_matching_args(init_sig: FuncSignature, new_sig: FuncSignature) -> bool:
559+
num_init_args = len(init_sig.args) - init_sig.num_bitmap_args
560+
num_new_args = len(new_sig.args) - new_sig.num_bitmap_args
561+
if num_init_args != num_new_args:
562+
return False
563+
564+
for idx in range(1, num_init_args):
565+
init_arg = init_sig.args[idx]
566+
new_arg = new_sig.args[idx]
567+
if init_arg.type != new_arg.type:
568+
return False
569+
570+
if init_arg.kind != new_arg.kind:
571+
return False
572+
573+
return True
574+
575+
558576
def prepare_init_method(cdef: ClassDef, ir: ClassIR, module_name: str, mapper: Mapper) -> None:
559577
# Set up a constructor decl
560578
init_node = cdef.info["__init__"].node
579+
580+
new_node: SymbolNode | None = None
581+
new_symbol = cdef.info.get("__new__")
582+
# We are only interested in __new__ method defined in a user-defined class,
583+
# so we ignore it if it comes from a builtin type. It's usually builtins.object
584+
# but could also be builtins.type for metaclasses so we detect the prefix which
585+
# matches both.
586+
if new_symbol and new_symbol.fullname and not new_symbol.fullname.startswith("builtins."):
587+
new_node = new_symbol.node
588+
if isinstance(new_node, (Decorator, OverloadedFuncDef)):
589+
new_node = get_func_def(new_node)
561590
if not ir.is_trait and not ir.builtin_base and isinstance(init_node, FuncDef):
562591
init_sig = mapper.fdef_to_sig(init_node, True)
592+
args_match = True
593+
if isinstance(new_node, FuncDef):
594+
new_sig = mapper.fdef_to_sig(new_node, True)
595+
args_match = check_matching_args(init_sig, new_sig)
563596

564597
defining_ir = mapper.type_to_ir.get(init_node.info)
565598
# If there is a nontrivial __init__ that wasn't defined in an
566599
# extension class, we need to make the constructor take *args,
567600
# **kwargs so it can call tp_init.
568601
if (
569-
defining_ir is None
570-
or not defining_ir.is_ext_class
571-
or cdef.info["__init__"].plugin_generated
572-
) and init_node.info.fullname != "builtins.object":
602+
(
603+
defining_ir is None
604+
or not defining_ir.is_ext_class
605+
or cdef.info["__init__"].plugin_generated
606+
)
607+
and init_node.info.fullname != "builtins.object"
608+
) or not args_match:
573609
init_sig = FuncSignature(
574610
[
575611
init_sig.args[0],

mypyc/lib-rt/misc_ops.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,17 @@ PyObject *CPyType_FromTemplate(PyObject *template,
227227
if (!name)
228228
goto error;
229229

230+
if (template_->tp_doc) {
231+
// cpython expects tp_doc to be heap-allocated so convert it here to
232+
// avoid segfaults on deallocation.
233+
Py_ssize_t size = strlen(template_->tp_doc) + 1;
234+
char *doc = (char *)PyMem_Malloc(size);
235+
if (!doc)
236+
goto error;
237+
memcpy(doc, template_->tp_doc, size);
238+
template_->tp_doc = doc;
239+
}
240+
230241
// Allocate the type and then copy the main stuff in.
231242
t = (PyHeapTypeObject*)PyType_GenericAlloc(&PyType_Type, 0);
232243
if (!t)

0 commit comments

Comments
 (0)