Skip to content

Commit 6a1b24b

Browse files
committed
Clear weakrefs created by finalizers.
Weakrefs to unreachable garbage that are created during running of finalizers need to be cleared. This avoids exposing objects that have `tp_clear` called on them to Python-level code.
1 parent 0240ef4 commit 6a1b24b

File tree

4 files changed

+127
-10
lines changed

4 files changed

+127
-10
lines changed

Lib/test/test_gc.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -262,9 +262,11 @@ class Cyclic(tuple):
262262
# finalizer.
263263
def __del__(self):
264264
265-
# 5. Create a weakref to `func` now. If we had created
266-
# it earlier, it would have been cleared by the
267-
# garbage collector before calling the finalizers.
265+
# 5. Create a weakref to `func` now. In previous
266+
# versions of Python, this would avoid having it
267+
# cleared by the garbage collector before calling
268+
# the finalizers. Now, weakrefs get cleared after
269+
# calling finalizers.
268270
self[1].ref = weakref.ref(self[0])
269271
270272
# 6. Drop the global reference to `latefin`. The only
@@ -293,14 +295,18 @@ def func():
293295
# which will find `cyc` and `func` as garbage.
294296
gc.collect()
295297
296-
# 9. Previously, this would crash because `func_qualname`
297-
# had been NULL-ed out by func_clear().
298+
# 9. Previously, this would crash because the weakref
299+
# created in the finalizer revealed the function after
300+
# `tp_clear` was called and `func_qualname`
301+
# had been NULL-ed out by func_clear(). Now, we clear
302+
# weakrefs to unreachable objects before calling `tp_clear`
303+
# but after calling finalizers.
298304
print(f"{func=}")
299305
"""
300-
# We're mostly just checking that this doesn't crash.
301306
rc, stdout, stderr = assert_python_ok("-c", code)
302307
self.assertEqual(rc, 0)
303-
self.assertRegex(stdout, rb"""\A\s*func=<function at \S+>\s*\z""")
308+
# The `func` global is None because the weakref was cleared.
309+
self.assertRegex(stdout, rb"""\A\s*func=None""")
304310
self.assertFalse(stderr)
305311

306312
@refcount_test
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
While performing garbage collection, clear weakrefs to unreachable objects
2+
that are created during running of finalizers. If those weakrefs were are
3+
not cleared, they could reveal unreachable objects.

Python/gc.c

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1037,6 +1037,62 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old)
10371037
return num_freed;
10381038
}
10391039

1040+
/* Clear all weakrefs to unreachable objects. When this returns, no object in
1041+
* `unreachable` is weakly referenced anymore.
1042+
*/
1043+
static void
1044+
clear_weakrefs(PyGC_Head *unreachable)
1045+
{
1046+
PyGC_Head *gc;
1047+
PyGC_Head *next;
1048+
1049+
for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) {
1050+
PyWeakReference **wrlist;
1051+
1052+
PyObject *op = FROM_GC(gc);
1053+
next = GC_NEXT(gc);
1054+
1055+
if (PyWeakref_Check(op)) {
1056+
/* A weakref inside the unreachable set must be cleared. If we
1057+
* allow its callback to execute inside delete_garbage(), it
1058+
* could expose objects that have tp_clear already called on
1059+
* them. Or, it could resurrect unreachable objects. One way
1060+
* this can happen is if some container objects do not implement
1061+
* tp_traverse. Then, wr_object can be outside the unreachable
1062+
* set but can be deallocated as a result of breaking the
1063+
* reference cycle. If we don't clear the weakref, the callback
1064+
* will run and potentially cause a crash. See bpo-38006 for
1065+
* one example.
1066+
*/
1067+
_PyWeakref_ClearRef((PyWeakReference *)op);
1068+
}
1069+
1070+
if (! _PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) {
1071+
continue;
1072+
}
1073+
1074+
/* It supports weakrefs. Does it have any?
1075+
*
1076+
* This is never triggered for static types so we can avoid the
1077+
* (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR().
1078+
*/
1079+
wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op);
1080+
1081+
/* `op` may have some weakrefs. March over the list, clear
1082+
* all the weakrefs.
1083+
*/
1084+
for (PyWeakReference *wr = *wrlist; wr != NULL; wr = *wrlist) {
1085+
/* _PyWeakref_ClearRef clears the weakref but leaves
1086+
* the callback pointer intact. Obscure: it also
1087+
* changes *wrlist.
1088+
*/
1089+
_PyObject_ASSERT((PyObject *)wr, wr->wr_object == op);
1090+
_PyWeakref_ClearRef(wr);
1091+
_PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None);
1092+
}
1093+
}
1094+
}
1095+
10401096
static void
10411097
debug_cycle(const char *msg, PyObject *op)
10421098
{
@@ -1751,6 +1807,14 @@ gc_collect_region(PyThreadState *tstate,
17511807
gc_list_init(&final_unreachable);
17521808
handle_resurrected_objects(&unreachable, &final_unreachable, to);
17531809

1810+
/* Clear weakrefs to objects in the unreachable set. No Python-level
1811+
* code must be allowed to access those unreachable objects. During
1812+
* delete_garbage(), finalizers outside the unreachable set might run
1813+
* and create new weakrefs. If those weakrefs were not cleared, they
1814+
* could reveal unreachable objects.
1815+
*/
1816+
clear_weakrefs(&final_unreachable);
1817+
17541818
/* Call tp_clear on objects in the final_unreachable set. This will cause
17551819
* the reference cycles to be broken. It may also cause some objects
17561820
* in finalizers to be freed.

Python/gc_free_threading.c

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1494,7 +1494,7 @@ move_legacy_finalizer_reachable(struct collection_state *state)
14941494
// Clear all weakrefs to unreachable objects. Weakrefs with callbacks are
14951495
// enqueued in `wrcb_to_call`, but not invoked yet.
14961496
static void
1497-
clear_weakrefs(struct collection_state *state)
1497+
handle_weakrefs(struct collection_state *state)
14981498
{
14991499
PyObject *op;
15001500
WORKSTACK_FOR_EACH(&state->unreachable, op) {
@@ -1545,6 +1545,42 @@ clear_weakrefs(struct collection_state *state)
15451545
}
15461546
}
15471547

1548+
// Clear all weakrefs to unreachable objects.
1549+
static void
1550+
clear_weakrefs(struct collection_state *state)
1551+
{
1552+
PyObject *op;
1553+
WORKSTACK_FOR_EACH(&state->unreachable, op) {
1554+
if (PyWeakref_Check(op)) {
1555+
// Clear weakrefs that are themselves unreachable to ensure their
1556+
// callbacks will not be executed later from a `tp_clear()`
1557+
// inside delete_garbage(). That would be unsafe: it could
1558+
// resurrect a dead object or access a an already cleared object.
1559+
// See bpo-38006 for one example.
1560+
_PyWeakref_ClearRef((PyWeakReference *)op);
1561+
}
1562+
1563+
if (!_PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) {
1564+
continue;
1565+
}
1566+
1567+
// NOTE: This is never triggered for static types so we can avoid the
1568+
// (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR().
1569+
PyWeakReference **wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op);
1570+
1571+
// `op` may have some weakrefs. March over the list, clear
1572+
// all the weakrefs.
1573+
for (PyWeakReference *wr = *wrlist; wr != NULL; wr = *wrlist) {
1574+
// _PyWeakref_ClearRef clears the weakref but leaves
1575+
// the callback pointer intact. Obscure: it also
1576+
// changes *wrlist.
1577+
_PyObject_ASSERT((PyObject *)wr, wr->wr_object == op);
1578+
_PyWeakref_ClearRef(wr);
1579+
_PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None);
1580+
}
1581+
}
1582+
}
1583+
15481584
static void
15491585
call_weakref_callbacks(struct collection_state *state)
15501586
{
@@ -2211,7 +2247,7 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
22112247
interp->gc.long_lived_total = state->long_lived_total;
22122248

22132249
// Clear weakrefs and enqueue callbacks (but do not call them).
2214-
clear_weakrefs(state);
2250+
handle_weakrefs(state);
22152251
_PyEval_StartTheWorld(interp);
22162252

22172253
// Deallocate any object from the refcount merge step
@@ -2222,11 +2258,19 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
22222258
call_weakref_callbacks(state);
22232259
finalize_garbage(state);
22242260

2225-
// Handle any objects that may have resurrected after the finalization.
22262261
_PyEval_StopTheWorld(interp);
2262+
// Handle any objects that may have resurrected after the finalization.
22272263
err = handle_resurrected_objects(state);
22282264
// Clear free lists in all threads
22292265
_PyGC_ClearAllFreeLists(interp);
2266+
if (err == 0) {
2267+
// Clear weakrefs to objects in the unreachable set. No Python-level
2268+
// code must be allowed to access those unreachable objects. During
2269+
// delete_garbage(), finalizers outside the unreachable set might
2270+
// run and create new weakrefs. If those weakrefs were not cleared,
2271+
// they could reveal unreachable objects.
2272+
clear_weakrefs(state);
2273+
}
22302274
_PyEval_StartTheWorld(interp);
22312275

22322276
if (err < 0) {

0 commit comments

Comments
 (0)