From a8e850147d67a01c2c94db4c79f260d583b0bdb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 30 Aug 2025 15:56:06 +0200 Subject: [PATCH 1/6] fully implement GC protocol for `bz2` objects --- Modules/_bz2module.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/Modules/_bz2module.c b/Modules/_bz2module.c index 914172684158a1..372ae0c0cdaa97 100644 --- a/Modules/_bz2module.c +++ b/Modules/_bz2module.c @@ -352,7 +352,7 @@ _bz2_BZ2Compressor_impl(PyTypeObject *type, int compresslevel) } assert(type != NULL && type->tp_alloc != NULL); - self = (BZ2Compressor *)type->tp_alloc(type, 0); + self = PyObject_GC_New(BZ2Compressor, type); if (self == NULL) { return NULL; } @@ -364,6 +364,8 @@ _bz2_BZ2Compressor_impl(PyTypeObject *type, int compresslevel) return NULL; } + // explicit fields initialization as PyObject_GC_New() does not change them + self->flushed = 0; self->bzs.opaque = NULL; self->bzs.bzalloc = BZ2_Malloc; self->bzs.bzfree = BZ2_Free; @@ -371,6 +373,7 @@ _bz2_BZ2Compressor_impl(PyTypeObject *type, int compresslevel) if (catch_bz2_error(bzerror)) goto error; + PyObject_GC_Track((PyObject *)self); return (PyObject *)self; error: @@ -381,12 +384,13 @@ _bz2_BZ2Compressor_impl(PyTypeObject *type, int compresslevel) static void BZ2Compressor_dealloc(PyObject *op) { + PyTypeObject *tp = Py_TYPE(op); + PyObject_GC_UnTrack(op); BZ2Compressor *self = _BZ2Compressor_CAST(op); BZ2_bzCompressEnd(&self->bzs); if (self->lock != NULL) { PyThread_free_lock(self->lock); } - PyTypeObject *tp = Py_TYPE(self); tp->tp_free((PyObject *)self); Py_DECREF(tp); } @@ -420,7 +424,7 @@ static PyType_Spec bz2_compressor_type_spec = { // bz2_compressor_type_spec does not have Py_TPFLAGS_BASETYPE flag // which prevents to create a subclass. // So calling PyType_GetModuleState() in this file is always safe. - .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE), + .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE | Py_TPFLAGS_HAVE_GC), .slots = bz2_compressor_type_slots, }; @@ -653,7 +657,7 @@ _bz2_BZ2Decompressor_impl(PyTypeObject *type) int bzerror; assert(type != NULL && type->tp_alloc != NULL); - self = (BZ2Decompressor *)type->tp_alloc(type, 0); + self = PyObject_GC_New(BZ2Decompressor, type); if (self == NULL) { return NULL; } @@ -665,6 +669,8 @@ _bz2_BZ2Decompressor_impl(PyTypeObject *type) return NULL; } + // explicit fields initialization as PyObject_GC_New() does not change them + self->eof = 0; self->needs_input = 1; self->bzs_avail_in_real = 0; self->input_buffer = NULL; @@ -673,10 +679,12 @@ _bz2_BZ2Decompressor_impl(PyTypeObject *type) if (self->unused_data == NULL) goto error; + self->bzs = (bz_stream){.opaque = NULL, .bzalloc = NULL, .bzfree = NULL}; bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0); if (catch_bz2_error(bzerror)) goto error; + PyObject_GC_Track((PyObject *)self); return (PyObject *)self; error: @@ -687,8 +695,9 @@ _bz2_BZ2Decompressor_impl(PyTypeObject *type) static void BZ2Decompressor_dealloc(PyObject *op) { + PyTypeObject *tp = Py_TYPE(op); + PyObject_GC_UnTrack(op); BZ2Decompressor *self = _BZ2Decompressor_CAST(op); - if(self->input_buffer != NULL) { PyMem_Free(self->input_buffer); } @@ -697,8 +706,6 @@ BZ2Decompressor_dealloc(PyObject *op) if (self->lock != NULL) { PyThread_free_lock(self->lock); } - - PyTypeObject *tp = Py_TYPE(self); tp->tp_free((PyObject *)self); Py_DECREF(tp); } @@ -751,7 +758,7 @@ static PyType_Spec bz2_decompressor_type_spec = { // bz2_decompressor_type_spec does not have Py_TPFLAGS_BASETYPE flag // which prevents to create a subclass. // So calling PyType_GetModuleState() in this file is always safe. - .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE), + .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE | Py_TPFLAGS_HAVE_GC), .slots = bz2_decompressor_type_slots, }; From c9d3ab625007062ec49bfe150a621ca0d8511f7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 31 Aug 2025 11:16:19 +0200 Subject: [PATCH 2/6] remove redundant casts --- Modules/_bz2module.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Modules/_bz2module.c b/Modules/_bz2module.c index 372ae0c0cdaa97..31bf0bf03d4313 100644 --- a/Modules/_bz2module.c +++ b/Modules/_bz2module.c @@ -373,7 +373,7 @@ _bz2_BZ2Compressor_impl(PyTypeObject *type, int compresslevel) if (catch_bz2_error(bzerror)) goto error; - PyObject_GC_Track((PyObject *)self); + PyObject_GC_Track(self); return (PyObject *)self; error: @@ -684,7 +684,7 @@ _bz2_BZ2Decompressor_impl(PyTypeObject *type) if (catch_bz2_error(bzerror)) goto error; - PyObject_GC_Track((PyObject *)self); + PyObject_GC_Track(self); return (PyObject *)self; error: From 81f77b68143132e12122fa8ddf408d7f71407eca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 31 Aug 2025 12:21:46 +0200 Subject: [PATCH 3/6] define fields post zero-initialization when needed --- Modules/_bz2module.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/Modules/_bz2module.c b/Modules/_bz2module.c index 31bf0bf03d4313..eba936c7dbdb3e 100644 --- a/Modules/_bz2module.c +++ b/Modules/_bz2module.c @@ -356,6 +356,9 @@ _bz2_BZ2Compressor_impl(PyTypeObject *type, int compresslevel) if (self == NULL) { return NULL; } + /* Initialize the remaining fields (untouched by PyObject_GC_New()). */ + const size_t offset = sizeof(struct { PyObject_HEAD }); + memset((char *)self + offset, 0, sizeof(*self) - offset); self->lock = PyThread_allocate_lock(); if (self->lock == NULL) { @@ -364,8 +367,6 @@ _bz2_BZ2Compressor_impl(PyTypeObject *type, int compresslevel) return NULL; } - // explicit fields initialization as PyObject_GC_New() does not change them - self->flushed = 0; self->bzs.opaque = NULL; self->bzs.bzalloc = BZ2_Malloc; self->bzs.bzfree = BZ2_Free; @@ -656,11 +657,14 @@ _bz2_BZ2Decompressor_impl(PyTypeObject *type) BZ2Decompressor *self; int bzerror; - assert(type != NULL && type->tp_alloc != NULL); + assert(type != NULL); self = PyObject_GC_New(BZ2Decompressor, type); if (self == NULL) { return NULL; } + /* Initialize the remaining fields (untouched by PyObject_GC_New()). */ + const size_t offset = sizeof(struct { PyObject_HEAD }); + memset((char *)self + offset, 0, sizeof(*self) - offset); self->lock = PyThread_allocate_lock(); if (self->lock == NULL) { @@ -669,17 +673,11 @@ _bz2_BZ2Decompressor_impl(PyTypeObject *type) return NULL; } - // explicit fields initialization as PyObject_GC_New() does not change them - self->eof = 0; self->needs_input = 1; - self->bzs_avail_in_real = 0; - self->input_buffer = NULL; - self->input_buffer_size = 0; self->unused_data = PyBytes_FromStringAndSize(NULL, 0); if (self->unused_data == NULL) goto error; - self->bzs = (bz_stream){.opaque = NULL, .bzalloc = NULL, .bzfree = NULL}; bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0); if (catch_bz2_error(bzerror)) goto error; From 2b50446ad7559bd44732b91f0032ce8b517d2319 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 31 Aug 2025 13:59:21 +0200 Subject: [PATCH 4/6] use `PyObject_GC_Del` for clarity --- Modules/_bz2module.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Modules/_bz2module.c b/Modules/_bz2module.c index eba936c7dbdb3e..4ded01db312fa9 100644 --- a/Modules/_bz2module.c +++ b/Modules/_bz2module.c @@ -392,7 +392,7 @@ BZ2Compressor_dealloc(PyObject *op) if (self->lock != NULL) { PyThread_free_lock(self->lock); } - tp->tp_free((PyObject *)self); + PyObject_GC_Del(self); Py_DECREF(tp); } @@ -704,7 +704,7 @@ BZ2Decompressor_dealloc(PyObject *op) if (self->lock != NULL) { PyThread_free_lock(self->lock); } - tp->tp_free((PyObject *)self); + PyObject_GC_Del(self); Py_DECREF(tp); } From a9b609685e2b0ecb94661c52ea5c7ee4ffec4edf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 31 Aug 2025 14:18:28 +0200 Subject: [PATCH 5/6] reduce diff for 3.14 and later --- Modules/_bz2module.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/Modules/_bz2module.c b/Modules/_bz2module.c index 4ded01db312fa9..1c91a9285bbe6b 100644 --- a/Modules/_bz2module.c +++ b/Modules/_bz2module.c @@ -352,13 +352,10 @@ _bz2_BZ2Compressor_impl(PyTypeObject *type, int compresslevel) } assert(type != NULL && type->tp_alloc != NULL); - self = PyObject_GC_New(BZ2Compressor, type); + self = (BZ2Compressor *)type->tp_alloc(type, 0); if (self == NULL) { return NULL; } - /* Initialize the remaining fields (untouched by PyObject_GC_New()). */ - const size_t offset = sizeof(struct { PyObject_HEAD }); - memset((char *)self + offset, 0, sizeof(*self) - offset); self->lock = PyThread_allocate_lock(); if (self->lock == NULL) { @@ -657,14 +654,11 @@ _bz2_BZ2Decompressor_impl(PyTypeObject *type) BZ2Decompressor *self; int bzerror; - assert(type != NULL); - self = PyObject_GC_New(BZ2Decompressor, type); + assert(type != NULL && type->tp_alloc != NULL); + self = (BZ2Decompressor *)type->tp_alloc(type, 0); if (self == NULL) { return NULL; } - /* Initialize the remaining fields (untouched by PyObject_GC_New()). */ - const size_t offset = sizeof(struct { PyObject_HEAD }); - memset((char *)self + offset, 0, sizeof(*self) - offset); self->lock = PyThread_allocate_lock(); if (self->lock == NULL) { @@ -674,6 +668,9 @@ _bz2_BZ2Decompressor_impl(PyTypeObject *type) } self->needs_input = 1; + self->bzs_avail_in_real = 0; + self->input_buffer = NULL; + self->input_buffer_size = 0; self->unused_data = PyBytes_FromStringAndSize(NULL, 0); if (self->unused_data == NULL) goto error; From 91a3e215e1c7a0067fd434df7af24640cc9843a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 31 Aug 2025 14:44:47 +0200 Subject: [PATCH 6/6] smash diff --- Modules/_bz2module.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Modules/_bz2module.c b/Modules/_bz2module.c index 1c91a9285bbe6b..d988901933703e 100644 --- a/Modules/_bz2module.c +++ b/Modules/_bz2module.c @@ -371,7 +371,6 @@ _bz2_BZ2Compressor_impl(PyTypeObject *type, int compresslevel) if (catch_bz2_error(bzerror)) goto error; - PyObject_GC_Track(self); return (PyObject *)self; error: @@ -389,7 +388,7 @@ BZ2Compressor_dealloc(PyObject *op) if (self->lock != NULL) { PyThread_free_lock(self->lock); } - PyObject_GC_Del(self); + tp->tp_free(self); Py_DECREF(tp); } @@ -679,7 +678,6 @@ _bz2_BZ2Decompressor_impl(PyTypeObject *type) if (catch_bz2_error(bzerror)) goto error; - PyObject_GC_Track(self); return (PyObject *)self; error: @@ -693,7 +691,8 @@ BZ2Decompressor_dealloc(PyObject *op) PyTypeObject *tp = Py_TYPE(op); PyObject_GC_UnTrack(op); BZ2Decompressor *self = _BZ2Decompressor_CAST(op); - if(self->input_buffer != NULL) { + + if (self->input_buffer != NULL) { PyMem_Free(self->input_buffer); } BZ2_bzDecompressEnd(&self->bzs); @@ -701,7 +700,7 @@ BZ2Decompressor_dealloc(PyObject *op) if (self->lock != NULL) { PyThread_free_lock(self->lock); } - PyObject_GC_Del(self); + tp->tp_free(self); Py_DECREF(tp); }