Skip to content

Commit d1a434f

Browse files
authored
[3.14] GH-139951: Fix major GC performance regression. Backport of GH-140262 (GH-140447)
* Count number of actually tracked objects, instead of trackable objects. This ensures that untracking tuples has the desired effect of reducing GC overhead * Do not track most untrackable tuples during creation. This prevents large numbers of small tuples causing execessive GCs.
1 parent 0fdae5f commit d1a434f

File tree

5 files changed

+94
-36
lines changed

5 files changed

+94
-36
lines changed

Include/internal/pycore_gc.h

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,12 @@ static inline void _PyGC_CLEAR_FINALIZED(PyObject *op) {
205205
#endif
206206
}
207207

208+
extern void _Py_ScheduleGC(PyThreadState *tstate);
209+
210+
#ifndef Py_GIL_DISABLED
211+
extern void _Py_TriggerGC(struct _gc_runtime_state *gcstate);
212+
#endif
213+
208214

209215
/* Tell the GC to track this object.
210216
*
@@ -238,14 +244,19 @@ static inline void _PyObject_GC_TRACK(
238244
"object is in generation which is garbage collected",
239245
filename, lineno, __func__);
240246

241-
PyInterpreterState *interp = _PyInterpreterState_GET();
242-
PyGC_Head *generation0 = &interp->gc.young.head;
247+
struct _gc_runtime_state *gcstate = &_PyInterpreterState_GET()->gc;
248+
PyGC_Head *generation0 = &gcstate->young.head;
243249
PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev);
244250
_PyGCHead_SET_NEXT(last, gc);
245251
_PyGCHead_SET_PREV(gc, last);
246-
uintptr_t not_visited = 1 ^ interp->gc.visited_space;
252+
uintptr_t not_visited = 1 ^ gcstate->visited_space;
247253
gc->_gc_next = ((uintptr_t)generation0) | not_visited;
248254
generation0->_gc_prev = (uintptr_t)gc;
255+
gcstate->young.count++; /* number of tracked GC objects */
256+
gcstate->heap_size++;
257+
if (gcstate->young.count > gcstate->young.threshold) {
258+
_Py_TriggerGC(gcstate);
259+
}
249260
#endif
250261
}
251262

@@ -280,6 +291,11 @@ static inline void _PyObject_GC_UNTRACK(
280291
_PyGCHead_SET_PREV(next, prev);
281292
gc->_gc_next = 0;
282293
gc->_gc_prev &= _PyGC_PREV_MASK_FINALIZED;
294+
struct _gc_runtime_state *gcstate = &_PyInterpreterState_GET()->gc;
295+
if (gcstate->young.count > 0) {
296+
gcstate->young.count--;
297+
}
298+
gcstate->heap_size--;
283299
#endif
284300
}
285301

@@ -343,7 +359,6 @@ extern PyObject *_PyGC_GetReferrers(PyInterpreterState *interp, PyObject *objs);
343359

344360
// Functions to clear types free lists
345361
extern void _PyGC_ClearAllFreeLists(PyInterpreterState *interp);
346-
extern void _Py_ScheduleGC(PyThreadState *tstate);
347362
extern void _Py_RunGC(PyThreadState *tstate);
348363

349364
union _PyStackRef;

Lib/test/test_gc.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1329,6 +1329,7 @@ def setUp(self):
13291329
def tearDown(self):
13301330
gc.disable()
13311331

1332+
@unittest.skipIf(Py_GIL_DISABLED, "requires GC generations or increments")
13321333
def test_bug1055820c(self):
13331334
# Corresponds to temp2c.py in the bug report. This is pretty
13341335
# elaborate.
@@ -1390,10 +1391,11 @@ def callback(ignored):
13901391
# The free-threaded build doesn't have multiple generations, so
13911392
# just trigger a GC manually.
13921393
gc.collect()
1394+
assert not detector.gc_happened
13931395
while not detector.gc_happened:
13941396
i += 1
1395-
if i > 10000:
1396-
self.fail("gc didn't happen after 10000 iterations")
1397+
if i > 100000:
1398+
self.fail("gc didn't happen after 100000 iterations")
13971399
self.assertEqual(len(ouch), 0)
13981400
junk.append([]) # this will eventually trigger gc
13991401

@@ -1464,8 +1466,8 @@ def __del__(self):
14641466
gc.collect()
14651467
while not detector.gc_happened:
14661468
i += 1
1467-
if i > 10000:
1468-
self.fail("gc didn't happen after 10000 iterations")
1469+
if i > 50000:
1470+
self.fail("gc didn't happen after 50000 iterations")
14691471
self.assertEqual(len(ouch), 0)
14701472
junk.append([]) # this will eventually trigger gc
14711473

@@ -1482,8 +1484,8 @@ def test_indirect_calls_with_gc_disabled(self):
14821484
detector = GC_Detector()
14831485
while not detector.gc_happened:
14841486
i += 1
1485-
if i > 10000:
1486-
self.fail("gc didn't happen after 10000 iterations")
1487+
if i > 100000:
1488+
self.fail("gc didn't happen after 100000 iterations")
14871489
junk.append([]) # this will eventually trigger gc
14881490

14891491
try:
@@ -1493,11 +1495,11 @@ def test_indirect_calls_with_gc_disabled(self):
14931495
detector = GC_Detector()
14941496
while not detector.gc_happened:
14951497
i += 1
1496-
if i > 10000:
1498+
if i > 100000:
14971499
break
14981500
junk.append([]) # this may eventually trigger gc (if it is enabled)
14991501

1500-
self.assertEqual(i, 10001)
1502+
self.assertEqual(i, 100001)
15011503
finally:
15021504
gc.enable()
15031505

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
Fixes a regression in GC performance for a growing heap composed mostly of
2+
small tuples.
3+
4+
* Counts number of actually tracked objects, instead of trackable objects.
5+
This ensures that untracking tuples has the desired effect of reducing GC overhead.
6+
* Does not track most untrackable tuples during creation.
7+
This prevents large numbers of small tuples causing excessive GCs.

Objects/tupleobject.c

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -156,13 +156,26 @@ _PyTuple_MaybeUntrack(PyObject *op)
156156
_PyObject_GC_UNTRACK(op);
157157
}
158158

159+
/* Fast, but conservative check if an object maybe tracked
160+
May return true for an object that is not tracked,
161+
Will always return true for an object that is tracked.
162+
This is a temporary workaround until _PyObject_GC_IS_TRACKED
163+
becomes fast and safe to call on non-GC objects.
164+
*/
165+
static bool
166+
maybe_tracked(PyObject *ob)
167+
{
168+
return _PyType_IS_GC(Py_TYPE(ob));
169+
}
170+
159171
PyObject *
160172
PyTuple_Pack(Py_ssize_t n, ...)
161173
{
162174
Py_ssize_t i;
163175
PyObject *o;
164176
PyObject **items;
165177
va_list vargs;
178+
bool track = false;
166179

167180
if (n == 0) {
168181
return tuple_get_empty();
@@ -177,10 +190,15 @@ PyTuple_Pack(Py_ssize_t n, ...)
177190
items = result->ob_item;
178191
for (i = 0; i < n; i++) {
179192
o = va_arg(vargs, PyObject *);
193+
if (!track && maybe_tracked(o)) {
194+
track = true;
195+
}
180196
items[i] = Py_NewRef(o);
181197
}
182198
va_end(vargs);
183-
_PyObject_GC_TRACK(result);
199+
if (track) {
200+
_PyObject_GC_TRACK(result);
201+
}
184202
return (PyObject *)result;
185203
}
186204

@@ -377,11 +395,17 @@ _PyTuple_FromArray(PyObject *const *src, Py_ssize_t n)
377395
return NULL;
378396
}
379397
PyObject **dst = tuple->ob_item;
398+
bool track = false;
380399
for (Py_ssize_t i = 0; i < n; i++) {
381400
PyObject *item = src[i];
401+
if (!track && maybe_tracked(item)) {
402+
track = true;
403+
}
382404
dst[i] = Py_NewRef(item);
383405
}
384-
_PyObject_GC_TRACK(tuple);
406+
if (track) {
407+
_PyObject_GC_TRACK(tuple);
408+
}
385409
return (PyObject *)tuple;
386410
}
387411

@@ -396,10 +420,17 @@ _PyTuple_FromStackRefStealOnSuccess(const _PyStackRef *src, Py_ssize_t n)
396420
return NULL;
397421
}
398422
PyObject **dst = tuple->ob_item;
423+
bool track = false;
399424
for (Py_ssize_t i = 0; i < n; i++) {
400-
dst[i] = PyStackRef_AsPyObjectSteal(src[i]);
425+
PyObject *item = PyStackRef_AsPyObjectSteal(src[i]);
426+
if (!track && maybe_tracked(item)) {
427+
track = true;
428+
}
429+
dst[i] = item;
430+
}
431+
if (track) {
432+
_PyObject_GC_TRACK(tuple);
401433
}
402-
_PyObject_GC_TRACK(tuple);
403434
return (PyObject *)tuple;
404435
}
405436

Python/gc.c

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1590,7 +1590,7 @@ assess_work_to_do(GCState *gcstate)
15901590
scale_factor = 2;
15911591
}
15921592
intptr_t new_objects = gcstate->young.count;
1593-
intptr_t max_heap_fraction = new_objects*3/2;
1593+
intptr_t max_heap_fraction = new_objects*2;
15941594
intptr_t heap_fraction = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor;
15951595
if (heap_fraction > max_heap_fraction) {
15961596
heap_fraction = max_heap_fraction;
@@ -1605,6 +1605,9 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
16051605
GC_STAT_ADD(1, collections, 1);
16061606
GCState *gcstate = &tstate->interp->gc;
16071607
gcstate->work_to_do += assess_work_to_do(gcstate);
1608+
if (gcstate->work_to_do < 0) {
1609+
return;
1610+
}
16081611
untrack_tuples(&gcstate->young.head);
16091612
if (gcstate->phase == GC_PHASE_MARK) {
16101613
Py_ssize_t objects_marked = mark_at_start(tstate);
@@ -1647,7 +1650,6 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
16471650
gc_collect_region(tstate, &increment, &survivors, stats);
16481651
gc_list_merge(&survivors, visited);
16491652
assert(gc_list_is_empty(&increment));
1650-
gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor;
16511653
gcstate->work_to_do -= increment_size;
16521654

16531655
add_stats(gcstate, 1, stats);
@@ -2231,28 +2233,29 @@ _Py_ScheduleGC(PyThreadState *tstate)
22312233
}
22322234

22332235
void
2234-
_PyObject_GC_Link(PyObject *op)
2236+
_Py_TriggerGC(struct _gc_runtime_state *gcstate)
22352237
{
2236-
PyGC_Head *gc = AS_GC(op);
2237-
// gc must be correctly aligned
2238-
_PyObject_ASSERT(op, ((uintptr_t)gc & (sizeof(uintptr_t)-1)) == 0);
2239-
22402238
PyThreadState *tstate = _PyThreadState_GET();
2241-
GCState *gcstate = &tstate->interp->gc;
2242-
gc->_gc_next = 0;
2243-
gc->_gc_prev = 0;
2244-
gcstate->young.count++; /* number of allocated GC objects */
2245-
gcstate->heap_size++;
2246-
if (gcstate->young.count > gcstate->young.threshold &&
2247-
gcstate->enabled &&
2248-
gcstate->young.threshold &&
2239+
if (gcstate->enabled &&
2240+
gcstate->young.threshold != 0 &&
22492241
!_Py_atomic_load_int_relaxed(&gcstate->collecting) &&
22502242
!_PyErr_Occurred(tstate))
22512243
{
22522244
_Py_ScheduleGC(tstate);
22532245
}
22542246
}
22552247

2248+
void
2249+
_PyObject_GC_Link(PyObject *op)
2250+
{
2251+
PyGC_Head *gc = AS_GC(op);
2252+
// gc must be correctly aligned
2253+
_PyObject_ASSERT(op, ((uintptr_t)gc & (sizeof(uintptr_t)-1)) == 0);
2254+
gc->_gc_next = 0;
2255+
gc->_gc_prev = 0;
2256+
2257+
}
2258+
22562259
void
22572260
_Py_RunGC(PyThreadState *tstate)
22582261
{
@@ -2359,6 +2362,11 @@ PyObject_GC_Del(void *op)
23592362
PyGC_Head *g = AS_GC(op);
23602363
if (_PyObject_GC_IS_TRACKED(op)) {
23612364
gc_list_remove(g);
2365+
GCState *gcstate = get_gc_state();
2366+
if (gcstate->young.count > 0) {
2367+
gcstate->young.count--;
2368+
}
2369+
gcstate->heap_size--;
23622370
#ifdef Py_DEBUG
23632371
PyObject *exc = PyErr_GetRaisedException();
23642372
if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0,
@@ -2372,11 +2380,6 @@ PyObject_GC_Del(void *op)
23722380
PyErr_SetRaisedException(exc);
23732381
#endif
23742382
}
2375-
GCState *gcstate = get_gc_state();
2376-
if (gcstate->young.count > 0) {
2377-
gcstate->young.count--;
2378-
}
2379-
gcstate->heap_size--;
23802383
PyObject_Free(((char *)op)-presize);
23812384
}
23822385

0 commit comments

Comments
 (0)