diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index 2124e76514f1af..988f828d5a7750 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -165,6 +165,7 @@ typedef struct { // Lowest two bits are used for flags documented later. // Those bits are made available by the struct's minimum alignment. uintptr_t _gc_prev; + uintptr_t _visited; } PyGC_Head; #define _PyGC_Head_UNUSED PyGC_Head @@ -181,6 +182,8 @@ struct gc_collection_stats { Py_ssize_t collected; /* total number of uncollectable objects (put into gc.garbage) */ Py_ssize_t uncollectable; + Py_ssize_t tracked_tuples; + Py_ssize_t untracked_tuples; }; /* Running stats per generation */ @@ -191,6 +194,12 @@ struct gc_generation_stats { Py_ssize_t collected; /* total number of uncollectable objects (put into gc.garbage) */ Py_ssize_t uncollectable; + Py_ssize_t tracked_tuples; + Py_ssize_t total_tracked_tuples; + Py_ssize_t untracked_tuples; + Py_ssize_t total_untracked_tuples; + Py_ssize_t total_tuples; + Py_ssize_t tuples_by_size[33]; }; enum _GCPhase { diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 77560e5da66b03..2cbb7a32bb5de4 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -937,8 +937,8 @@ extern int _PyType_CacheInitForSpecialization(PyHeapTypeObject *type, # define MANAGED_DICT_OFFSET (((Py_ssize_t)sizeof(PyObject *))*-1) # define MANAGED_WEAKREF_OFFSET (((Py_ssize_t)sizeof(PyObject *))*-2) #else -# define MANAGED_DICT_OFFSET (((Py_ssize_t)sizeof(PyObject *))*-3) -# define MANAGED_WEAKREF_OFFSET (((Py_ssize_t)sizeof(PyObject *))*-4) +# define MANAGED_DICT_OFFSET (((Py_ssize_t)sizeof(PyObject *))*-4) +# define MANAGED_WEAKREF_OFFSET (((Py_ssize_t)sizeof(PyObject *))*-5) #endif typedef union { diff --git a/Include/internal/pycore_tuple.h b/Include/internal/pycore_tuple.h index be1961cbf77a2d..e2dc13ba17903d 100644 --- a/Include/internal/pycore_tuple.h +++ b/Include/internal/pycore_tuple.h @@ -11,7 +11,7 @@ extern "C" { #include "pycore_object.h" // _PyObject_GC_IS_TRACKED #include "pycore_structs.h" // _PyStackRef -extern void _PyTuple_MaybeUntrack(PyObject *); +extern int _PyTuple_MaybeUntrack(PyObject *); extern void _PyTuple_DebugMallocStats(FILE *out); /* runtime lifecycle */ diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index 1fa4bae638a1fe..9d158a66b67813 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -22,6 +22,44 @@ class tuple "PyTupleObject *" "&PyTuple_Type" static inline int maybe_freelist_push(PyTupleObject *); +static uint8_t +_log2_int(Py_ssize_t size) +{ + if (size == 0) { + return 0; + } + + const uint64_t b[] = {0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000, 0xFFFFFFFF00000000}; + const uint64_t S[] = {1, 2, 4, 8, 16, 32}; + + int64_t v = size; + uint8_t r = 0; // result of log2(v) will go here + for (int i = 5; i >= 0; i--) // unroll for speed... + { + if (v & b[i]) + { + v >>= S[i]; + r |= S[i]; + } + } + +#ifdef Py_DEBUG + uint8_t x = (uint8_t)log2((double)size); + assert(x == r); +#endif + + return r + 1; +} + +static void +_count_tuple(Py_ssize_t size) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + interp->gc.generation_stats[0].total_tuples += 1; + uint8_t size_index = _log2_int(size); + interp->gc.generation_stats[0].tuples_by_size[size_index] += 1; +} + /* Allocate an uninitialized tuple object. Before making it public, following steps must be done: @@ -46,6 +84,7 @@ tuple_alloc(Py_ssize_t size) PyTupleObject *op = _Py_FREELIST_POP(PyTupleObject, tuples[index]); if (op != NULL) { _PyTuple_RESET_HASH_CACHE(op); + _count_tuple(size); return op; } } @@ -57,6 +96,7 @@ tuple_alloc(Py_ssize_t size) PyTupleObject *result = PyObject_GC_NewVar(PyTupleObject, &PyTuple_Type, size); if (result != NULL) { _PyTuple_RESET_HASH_CACHE(result); + _count_tuple(size); } return result; } @@ -68,6 +108,7 @@ tuple_alloc(Py_ssize_t size) static inline PyObject * tuple_get_empty(void) { + _count_tuple(0); return (PyObject *)&_Py_SINGLETON(tuple_empty); } @@ -134,14 +175,14 @@ PyTuple_SetItem(PyObject *op, Py_ssize_t i, PyObject *newitem) return 0; } -void +int _PyTuple_MaybeUntrack(PyObject *op) { PyTupleObject *t; Py_ssize_t i, n; if (!PyTuple_CheckExact(op) || !_PyObject_GC_IS_TRACKED(op)) - return; + return 0; t = (PyTupleObject *) op; n = Py_SIZE(t); for (i = 0; i < n; i++) { @@ -151,9 +192,10 @@ _PyTuple_MaybeUntrack(PyObject *op) them yet. */ if (!elt || _PyObject_GC_MAY_BE_TRACKED(elt)) - return; + return 0; } _PyObject_GC_UNTRACK(op); + return 1; } PyObject * @@ -175,12 +217,18 @@ PyTuple_Pack(Py_ssize_t n, ...) return NULL; } items = result->ob_item; + bool track = false; for (i = 0; i < n; i++) { o = va_arg(vargs, PyObject *); items[i] = Py_NewRef(o); + if (!track && _PyObject_GC_MAY_BE_TRACKED(items[i])) { + track = true; + } } va_end(vargs); - _PyObject_GC_TRACK(result); + if (track) { + _PyObject_GC_TRACK(result); + } return (PyObject *)result; } @@ -376,12 +424,18 @@ PyTuple_FromArray(PyObject *const *src, Py_ssize_t n) if (tuple == NULL) { return NULL; } + bool track = false; PyObject **dst = tuple->ob_item; for (Py_ssize_t i = 0; i < n; i++) { PyObject *item = src[i]; dst[i] = Py_NewRef(item); + if (!track && _PyObject_GC_MAY_BE_TRACKED(dst[i])) { + track = true; + } + } + if (track) { + _PyObject_GC_TRACK(tuple); } - _PyObject_GC_TRACK(tuple); return (PyObject *)tuple; } @@ -395,11 +449,17 @@ _PyTuple_FromStackRefStealOnSuccess(const _PyStackRef *src, Py_ssize_t n) if (tuple == NULL) { return NULL; } + bool track = false; PyObject **dst = tuple->ob_item; for (Py_ssize_t i = 0; i < n; i++) { dst[i] = PyStackRef_AsPyObjectSteal(src[i]); + if (!track && _PyObject_GC_MAY_BE_TRACKED(dst[i])) { + track = true; + } + } + if (track) { + _PyObject_GC_TRACK(tuple); } - _PyObject_GC_TRACK(tuple); return (PyObject *)tuple; } @@ -416,12 +476,18 @@ _PyTuple_FromArraySteal(PyObject *const *src, Py_ssize_t n) } return NULL; } + bool track = false; PyObject **dst = tuple->ob_item; for (Py_ssize_t i = 0; i < n; i++) { PyObject *item = src[i]; dst[i] = item; + if (!track && _PyObject_GC_MAY_BE_TRACKED(item)) { + track = true; + } + } + if (track) { + _PyObject_GC_TRACK(tuple); } - _PyObject_GC_TRACK(tuple); return (PyObject *)tuple; } @@ -494,7 +560,9 @@ tuple_concat(PyObject *aa, PyObject *bb) dest[i] = Py_NewRef(v); } - _PyObject_GC_TRACK(np); + if (_PyObject_GC_IS_TRACKED(a) || _PyObject_GC_IS_TRACKED(b)) { + _PyObject_GC_TRACK(np); + } return (PyObject *)np; } @@ -543,7 +611,9 @@ tuple_repeat(PyObject *self, Py_ssize_t n) _Py_memory_repeat((char *)np->ob_item, sizeof(PyObject *)*output_size, sizeof(PyObject *)*input_size); } - _PyObject_GC_TRACK(np); + if (_PyObject_GC_IS_TRACKED(a)) { + _PyObject_GC_TRACK(np); + } return (PyObject *) np; } @@ -821,7 +891,9 @@ tuple_subscript(PyObject *op, PyObject* item) dest[i] = it; } - _PyObject_GC_TRACK(result); + if (_PyObject_GC_IS_TRACKED(self)) { + _PyObject_GC_TRACK(result); + } return (PyObject *)result; } } diff --git a/Python/gc.c b/Python/gc.c index 79c7476f4a9a74..27a36a94c200c1 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -70,6 +70,18 @@ gc_clear_collecting(PyGC_Head *g) g->_gc_prev &= ~PREV_MASK_COLLECTING; } +static inline int +gc_is_visited(PyGC_Head *g) +{ + return (int)g->_visited; +} + +static inline void +gc_set_is_visited(PyGC_Head *g) +{ + g->_visited = 1; +} + static inline Py_ssize_t gc_get_refs(PyGC_Head *g) { @@ -753,18 +765,39 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) * and is much faster than a more complex approach that * would untrack all relevant tuples. */ -static void +static Py_ssize_t untrack_tuples(PyGC_Head *head) { + Py_ssize_t untracked = 0; PyGC_Head *gc = GC_NEXT(head); while (gc != head) { PyObject *op = FROM_GC(gc); PyGC_Head *next = GC_NEXT(gc); if (PyTuple_CheckExact(op)) { - _PyTuple_MaybeUntrack(op); + untracked += _PyTuple_MaybeUntrack(op); + } + gc = next; + } + return untracked; +} + +static Py_ssize_t +count_tuples(PyGC_Head *head) +{ + Py_ssize_t tuples = 0; + PyGC_Head *gc = GC_NEXT(head); + while (gc != head) { + PyObject *op = FROM_GC(gc); + PyGC_Head *next = GC_NEXT(gc); + if (!gc_is_visited(gc)) { + if (PyTuple_CheckExact(op)) { + tuples += 1; + } + gc_set_is_visited(gc); } gc = next; } + return tuples; } /* Return true if object has a pre-PEP 442 finalization method. */ @@ -1376,7 +1409,8 @@ gc_collect_young(PyThreadState *tstate, validate_spaces(gcstate); PyGC_Head *young = &gcstate->young.head; PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; - untrack_tuples(young); + stats->tracked_tuples += count_tuples(young); + stats->untracked_tuples += untrack_tuples(young); GC_STAT_ADD(0, collections, 1); PyGC_Head survivors; @@ -1654,7 +1688,8 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) GC_STAT_ADD(1, collections, 1); GCState *gcstate = &tstate->interp->gc; gcstate->work_to_do += assess_work_to_do(gcstate); - untrack_tuples(&gcstate->young.head); + stats->tracked_tuples += count_tuples(&gcstate->young.head); + stats->untracked_tuples += untrack_tuples(&gcstate->young.head); if (gcstate->phase == GC_PHASE_MARK) { Py_ssize_t objects_marked = mark_at_start(tstate); GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); @@ -1716,7 +1751,8 @@ gc_collect_full(PyThreadState *tstate, PyGC_Head *young = &gcstate->young.head; PyGC_Head *pending = &gcstate->old[gcstate->visited_space^1].head; PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; - untrack_tuples(young); + stats->tracked_tuples += count_tuples(young); + stats->untracked_tuples += untrack_tuples(young); /* merge all generations into visited */ gc_list_merge(young, pending); gc_list_validate_space(pending, 1-gcstate->visited_space); @@ -1756,7 +1792,8 @@ gc_collect_region(PyThreadState *tstate, gc_list_init(&unreachable); deduce_unreachable(from, &unreachable); validate_consistent_old_space(from); - untrack_tuples(from); + stats->tracked_tuples += count_tuples(from); + stats->untracked_tuples += untrack_tuples(from); /* Move reachable objects to next generation. */ validate_consistent_old_space(to); @@ -2098,12 +2135,33 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason) default: Py_UNREACHABLE(); } + gcstate->generation_stats[0].total_tracked_tuples += stats.tracked_tuples; + gcstate->generation_stats[0].total_untracked_tuples += stats.untracked_tuples; + gcstate->generation_stats[generation].tracked_tuples += stats.tracked_tuples; + gcstate->generation_stats[generation].untracked_tuples += stats.untracked_tuples; if (PyDTrace_GC_DONE_ENABLED()) { PyDTrace_GC_DONE(stats.uncollectable + stats.collected); } if (reason != _Py_GC_REASON_SHUTDOWN) { invoke_gc_callback(gcstate, "stop", generation, &stats); } + else { + if (true) { + FILE *out = stderr; + + fprintf(out, "GC[%d] total tuples : %zd\n", 0, gcstate->generation_stats[0].total_tuples); + fprintf(out, "GC[%d] total tracked_tuples : %zd\n", 0, gcstate->generation_stats[0].total_tracked_tuples); + fprintf(out, "GC[%d] total untracked_tuples : %zd\n", 0, gcstate->generation_stats[0].total_untracked_tuples); + for (int i = 0; i < 33; i++) { + fprintf(out, "GC[%d] by size %d : %zd\n", 0, i, gcstate->generation_stats[0].tuples_by_size[i]); + } + + for (int i = 0; i < NUM_GENERATIONS; i++) { + fprintf(out, "GC[%d] tracked_tuples : %zd\n", i, gcstate->generation_stats[i].tracked_tuples); + fprintf(out, "GC[%d] untracked_tuples: %zd\n", i, gcstate->generation_stats[i].untracked_tuples); + } + } + } _PyErr_SetRaisedException(tstate, exc); GC_STAT_ADD(generation, objects_collected, stats.collected); #ifdef Py_STATS @@ -2296,6 +2354,7 @@ _PyObject_GC_Link(PyObject *op) GCState *gcstate = &tstate->interp->gc; gc->_gc_next = 0; gc->_gc_prev = 0; + gc->_visited = 0; gcstate->young.count++; /* number of allocated GC objects */ gcstate->heap_size++; if (gcstate->young.count > gcstate->young.threshold &&