@@ -862,6 +862,40 @@ move_legacy_finalizer_reachable(PyGC_Head *finalizers)
862862 * outside the unreachable set -- indeed, those are precisely the weakrefs
863863 * whose callbacks must be invoked. See gc_weakref.txt for overview & some
864864 * details.
865+ *
866+ * The clearing of weakrefs is suble and must be done carefully, as there was
867+ * previous bugs related to this. First, weakrefs to the unreachable set of
868+ * objects must be cleared before we start calling `tp_clear`. If we don't,
869+ * those weakrefs can reveal unreachable objects to Python-level code and that
870+ * is not safe. Many objects are not usable after `tp_clear` has been called
871+ * and could even cause crashes if accessed (see bpo-38006 and gh-91636 as
872+ * example bugs).
873+ *
874+ * Weakrefs with callbacks always need to be cleared before executing the
875+ * callback. That's because sometimes a callback will call the ref object,
876+ * to check if the reference is actually dead (KeyedRef does this, for
877+ * example). We want to indicate that it is dead, even though it is possible
878+ * a finalizer might resurrect it. Clearing also prevents the callback from
879+ * executing more than once.
880+ *
881+ * Since Python 2.3, all weakrefs to cyclic garbage have been cleared *before*
882+ * calling finalizers. However, since tp_subclasses started being necessary
883+ * to invalidate caches (e.g. by PyType_Modified), that clearing has created
884+ * a bug. If the weakref to the subclass is cleared before a finalizer is
885+ * called, the cache may not be correctly invalidated. That can lead to
886+ * segfaults since the caches can refer to deallocated objects (GH-91636
887+ * is an example). Now, we delay the clear of weakrefs without callbacks
888+ * until *after* finalizers have been executed. That means weakrefs without
889+ * callbacks are still usable while finalizers are being executed.
890+ *
891+ * The weakrefs that are inside the unreachable set must also be cleared.
892+ * The reason this is required is not immediately obvious. If the weakref
893+ * refers to an object outside of the unreachable set, that object might go
894+ * away when we start clearing objects. Normally, the object should also be
895+ * part of the unreachable set but that's not true in the case of incomplete
896+ * or missing `tp_traverse` methods. When that object goes away, the callback
897+ * for weakref can be executed and that could reveal unreachable objects to
898+ * Python-level code. See bpo-38006 as an example bug.
865899 */
866900static int
867901handle_weakref_callbacks (PyGC_Head * unreachable , PyGC_Head * old )
@@ -904,36 +938,19 @@ handle_weakref_callbacks(PyGC_Head *unreachable, PyGC_Head *old)
904938 // next pointer in the wrlist).
905939 next_wr = wr -> wr_next ;
906940
907- // Weakrefs with callbacks always need to be cleared before
908- // executing the callback. Sometimes the callback will call
909- // the ref object, to check if it's actually a dead reference
910- // (KeyedRef does this, for example). We want to indicate that it
911- // is dead, even though it is possible a finalizer might resurrect
912- // it. Clearing also prevents the callback from being executing
913- // more than once.
914- //
915- // Since Python 2.3, all weakrefs to cyclic garbage have
916- // been cleared *before* calling finalizers. However, since
917- // tp_subclasses started being necessary to invalidate caches
918- // (e.g. by PyType_Modified()), that clearing has created a bug.
919- // If the weakref to the subclass is cleared before a finalizer
920- // is called, the cache may not be correctly invalidated. That
921- // can lead to segfaults since the caches can refer to deallocated
922- // objects. Delaying the clear of weakrefs until *after*
923- // finalizers have been called fixes that bug.
924- if (wr -> wr_callback != NULL ) {
925- // _PyWeakref_ClearRef clears the weakref but leaves the
926- // callback pointer intact. Obscure: it also changes *wrlist.
927- _PyObject_ASSERT ((PyObject * )wr , wr -> wr_object == op );
928- _PyWeakref_ClearRef (wr );
929- _PyObject_ASSERT ((PyObject * )wr , wr -> wr_object == Py_None );
930- }
931-
932941 if (wr -> wr_callback == NULL ) {
933942 /* no callback */
934943 continue ;
935944 }
936945
946+ // Clear the weakref. See the comments above this function for
947+ // reasons why we need to clear weakrefs that have callbacks.
948+ // Note that _PyWeakref_ClearRef clears the weakref but leaves the
949+ // callback pointer intact. Obscure: it also changes *wrlist.
950+ _PyObject_ASSERT ((PyObject * )wr , wr -> wr_object == op );
951+ _PyWeakref_ClearRef (wr );
952+ _PyObject_ASSERT ((PyObject * )wr , wr -> wr_object == Py_None );
953+
937954 /* Headache time. `op` is going away, and is weakly referenced by
938955 * `wr`, which has a callback. Should the callback be invoked? If wr
939956 * is also trash, no:
@@ -1030,7 +1047,8 @@ handle_weakref_callbacks(PyGC_Head *unreachable, PyGC_Head *old)
10301047}
10311048
10321049/* Clear all weakrefs to unreachable objects. When this returns, no object in
1033- * `unreachable` is weakly referenced anymore.
1050+ * `unreachable` is weakly referenced anymore. See the comments above
1051+ * handle_weakref_callbacks() for why these weakrefs need to be cleared.
10341052 */
10351053static void
10361054clear_weakrefs (PyGC_Head * unreachable )
@@ -1045,16 +1063,9 @@ clear_weakrefs(PyGC_Head *unreachable)
10451063 next = GC_NEXT (gc );
10461064
10471065 if (PyWeakref_Check (op )) {
1048- /* A weakref inside the unreachable set must be cleared. If we
1049- * allow its callback to execute inside delete_garbage(), it
1050- * could expose objects that have tp_clear already called on
1051- * them. Or, it could resurrect unreachable objects. One way
1052- * this can happen is if some container objects do not implement
1053- * tp_traverse. Then, wr_object can be outside the unreachable
1054- * set but can be deallocated as a result of breaking the
1055- * reference cycle. If we don't clear the weakref, the callback
1056- * will run and potentially cause a crash. See bpo-38006 for
1057- * one example.
1066+ /* A weakref inside the unreachable set is always cleared. See
1067+ * the comments above handle_weakref_callbacks() for why these
1068+ * must be cleared.
10581069 */
10591070 _PyWeakref_ClearRef ((PyWeakReference * )op );
10601071 }
@@ -1790,19 +1801,8 @@ gc_collect_region(PyThreadState *tstate,
17901801 gc_list_init (& final_unreachable );
17911802 handle_resurrected_objects (& unreachable , & final_unreachable , to );
17921803
1793- /* Clear weakrefs to objects in the unreachable set. No Python-level
1794- * code must be allowed to access those unreachable objects. During
1795- * delete_garbage(), finalizers outside the unreachable set might run
1796- * and if those weakrefs were not cleared, that could reveal unreachable
1797- * objects.
1798- *
1799- * We used to clear weakrefs earlier, before calling finalizers. That
1800- * causes at least two problems. First, the finalizers could create
1801- * new weakrefs, that refer to unreachable objects. Those would not be
1802- * cleared and could cause the problem described above (see GH-91636 as
1803- * an example). Second, we need the weakrefs in the tp_subclasses to
1804- * *not* be cleared so that caches based on the type version are correctly
1805- * invalidated (see GH-135552 as a bug caused by this).
1804+ /* Clear weakrefs to objects in the unreachable set. See the comments
1805+ * above handle_weakref_callbacks() for details.
18061806 */
18071807 clear_weakrefs (& final_unreachable );
18081808
0 commit comments