Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 69 additions & 0 deletions src/msgspec/_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,7 @@ typedef struct {
#endif
PyObject *astimezone;
PyObject *re_compile;
PyObject *copy_deepcopy;
uint8_t gc_cycle;
} MsgspecState;

Expand Down Expand Up @@ -7938,6 +7939,66 @@ Struct_copy(PyObject *self, PyObject *args)
return NULL;
}


static PyObject* get_deepcopy_func() {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Trying a new style of handling these kinds of imports here. We initialize the field on the module state as NULL, and import on an as-needed basis. I think this is a fairly reasonable compromise between performance and complexity. After the first import, it's just one additional x == NULL check, which should be negligible in terms of performance.

The only real downside I can see is that you now have got to remember to use the "getter function" and cannot rely on the module state.

@ofek lmk what you think about this.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

copy is a very cheap import (~200us on my machine), and is used by so many modules in the stdlib and downstream that delaying its import doesn't feel worth it (for example, the python repl in 3.13 will import it on startup). Maybe a useful pattern for other modules, but I'd just import this one at startup and avoid the complexity.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that we should not lazily import this module however I think your implementation would be extremely useful for basically everything else, if you wouldn't mind trying or documenting the pattern somewhere for a future PR?

Copy link
Contributor Author

@provinzkraut provinzkraut Dec 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if you wouldn't mind trying or documenting the pattern somewhere for a future PR?

Yeah, that was my goal. I wanted to use this opportunity to test out this pattern. I was planning to make a follow up PR using this for the re module, where I think it might be worth it. I'll move this implementation over there.

for basically everything else

I think the typing module will also always be imported no matter what, so it wouldn't be as useful there.

// lazily copy.deepcopy and cache in global state
PyObject *copy_mod, *deepcopy_func;
MsgspecState* st = msgspec_get_global_state();
deepcopy_func = st->copy_deepcopy;
if (deepcopy_func == NULL) {
copy_mod = PyImport_ImportModule("copy");
if (copy_mod == NULL) return NULL;
deepcopy_func = PyObject_GetAttrString(copy_mod, "deepcopy");
st->copy_deepcopy = deepcopy_func;
Py_DECREF(copy_mod);
if (st->copy_deepcopy == NULL) return NULL;
}

return deepcopy_func;
}

static PyObject *
Struct_deepcopy(PyObject *self, PyObject *args)
{
PyObject *memo;
PyObject *val = NULL, *res = NULL, *dc_val = NULL;
PyObject *deepcopy_func;
Py_ssize_t i, nfields;

if (!PyArg_ParseTuple(args, "O!:__deepcopy__", &PyDict_Type, &memo))
return NULL;

deepcopy_func = get_deepcopy_func();

res = Struct_alloc(Py_TYPE(self));
if (res == NULL)
return NULL;

nfields = StructMeta_GET_NFIELDS(Py_TYPE(self));
for (i = 0; i < nfields; i++) {
val = Struct_get_index(self, i);
if (val == NULL)
goto error;

dc_val = PyObject_CallFunctionObjArgs(deepcopy_func, val, memo, NULL);
if (dc_val == NULL)
goto error;

Struct_set_index(res, i, dc_val);
}

/* If self is tracked, then copy is tracked */
if (MS_OBJECT_IS_GC(self) && MS_IS_TRACKED(self))
PyObject_GC_Track(res);

return res;

error:
Py_DECREF(res);
return NULL;
}


static PyObject *
Struct_replace(
PyObject *self,
Expand Down Expand Up @@ -8002,6 +8063,8 @@ Struct_replace(
}
}

if (Struct_post_init(struct_type, out) < 0) goto error;

if (is_gc && !should_untrack) {
PyObject_GC_Track(out);
}
Expand Down Expand Up @@ -8358,6 +8421,7 @@ StructMixin_config(StructMetaObject *self, void *closure) {

static PyMethodDef Struct_methods[] = {
{"__copy__", Struct_copy, METH_NOARGS, "copy a struct"},
{"__deepcopy__", Struct_deepcopy, METH_VARARGS, "deepcopy a struct"},
{"__replace__", (PyCFunction) Struct_replace, METH_FASTCALL | METH_KEYWORDS, "create a new struct with replacements" },
{"__reduce__", Struct_reduce, METH_NOARGS, "reduce a struct"},
{"__rich_repr__", Struct_rich_repr, METH_NOARGS, "rich repr"},
Expand Down Expand Up @@ -22306,6 +22370,7 @@ msgspec_clear(PyObject *m)
#endif
Py_CLEAR(st->astimezone);
Py_CLEAR(st->re_compile);
Py_CLEAR(st->copy_deepcopy);
return 0;
}

Expand Down Expand Up @@ -22380,6 +22445,7 @@ msgspec_traverse(PyObject *m, visitproc visit, void *arg)
#endif
Py_VISIT(st->astimezone);
Py_VISIT(st->re_compile);
Py_VISIT(st->copy_deepcopy);
return 0;
}

Expand Down Expand Up @@ -22674,6 +22740,9 @@ PyInit__core(void)
Py_DECREF(temp_module);
if (st->re_compile == NULL) return NULL;

// cache for 'copy.deepcopy'. to access this function, use 'get_deepcopy_func'
st->copy_deepcopy = NULL;

/* Initialize cached constant strings */
#define CACHED_STRING(attr, str) \
if ((st->attr = PyUnicode_InternFromString(str)) == NULL) return NULL
Expand Down
73 changes: 70 additions & 3 deletions tests/unit/test_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -810,12 +810,65 @@ class Test(Struct):
b: int
a: int

x = copy.copy(Test(1, 2))
o = Test(1, 2)
x = copy.copy(o)
assert type(x) is Test
assert x is not o
assert x.b == 1
assert x.a == 2


def test_struct_deepcopy():
o = Struct()
x = copy.deepcopy(Struct())
assert type(x) is Struct
assert x is not o

class Sub(Struct):
one: str
two: list[int]

class Test(Struct):
a: int
b: int
c: list[str]
sub: Sub

o = Test(
a=1,
b=2,
c=["1", "2"],
sub=Sub(one="hello", two=[3]),
)
x = copy.deepcopy(o)
assert type(x) is Test
assert x.a == 1
assert x.b == 2
assert x.c == ["1", "2"]
assert x.c is not o.c
assert x.sub is not o.sub
assert x.sub.one == "hello"
assert x.sub.two == [3]
assert x.sub.two is not o.sub.two


def test_struct_deepcopy_custom_impl():
# ensure we respect custom __deepcopy__ methods
class CustomThing:
def __init__(self, value):
self.value = value

def __deepcopy__(self, memo):
return CustomThing(value=self.value + 1)

class TestWithCustom(Struct):
custom: CustomThing

t = TestWithCustom(CustomThing(1))
tc = copy.deepcopy(t)
assert tc.custom.value == 2


class FrozenPoint(Struct, frozen=True):
x: int
y: int
Expand Down Expand Up @@ -2664,7 +2717,7 @@ def __post_init__(self):
assert x1 == x2
assert count == 1

def test_post_init_not_called_on_replace(self):
def test_post_init_not_called_on_deepcopy(self):
count = 0

class Ex(Struct):
Expand All @@ -2674,6 +2727,20 @@ def __post_init__(self):

x1 = Ex()
assert count == 1
x2 = msgspec.structs.replace(x1)
x2 = copy.deepcopy(x1)
assert x1 == x2
assert count == 1

def test_post_init_called_on_replace(self, replace):
count = 0

class Ex(Struct):
def __post_init__(self):
nonlocal count
count += 1

x1 = Ex()
assert count == 1
x2 = replace(x1)
assert x1 == x2
assert count == 2