Skip to content

Commit 3978e35

Browse files
committed
Re-work QSBR deferred advance and processing.
1 parent ce9232b commit 3978e35

File tree

3 files changed

+74
-50
lines changed

3 files changed

+74
-50
lines changed

Include/internal/pycore_qsbr.h

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,15 @@ struct _qsbr_thread_state {
4848
// Thread state (or NULL)
4949
PyThreadState *tstate;
5050

51-
// Used to defer advancing write sequence a fixed number of times
52-
int deferrals;
51+
// Number of held items added by this thread since last process
52+
int deferred_count;
5353

5454
// Estimate for the amount of memory that is held by this thread since
55-
// the last non-deferred advance.
56-
size_t memory_deferred;
55+
// the last process
56+
size_t deferred_memory;
57+
58+
// Sequence number at time of last "should process" check.
59+
uint64_t seq_last_check;
5760

5861
// Is this thread state allocated?
5962
bool allocated;
@@ -113,11 +116,20 @@ _Py_qbsr_goal_reached(struct _qsbr_thread_state *qsbr, uint64_t goal)
113116
extern uint64_t
114117
_Py_qsbr_advance(struct _qsbr_shared *shared);
115118

116-
// Batches requests to advance the write sequence. This advances the write
117-
// sequence every N calls, which reduces overhead but increases time to
118-
// reclamation. Returns the new goal.
119+
// Advance the write sequence as required and return the sequence goal to use
120+
// for memory to be freed. If the sequence is advanced, this goal is the new
121+
// sequence value, otherwise it is the next sequence value. In either case,
122+
// the goal is higher that any write sequence value already observed by readers.
123+
//
124+
// The 'size' argument is the size in bytes of the memory scheduled to be
125+
// freed. If that size is not available, pass zero as the value.
119126
extern uint64_t
120-
_Py_qsbr_deferred_advance(struct _qsbr_thread_state *qsbr);
127+
_Py_qsbr_advance_with_size(struct _qsbr_thread_state *qsbr, size_t size);
128+
129+
// Return true if memory held by QSBR should be processed to determine if it
130+
// can be safely freed.
131+
extern bool
132+
_Py_qsbr_should_process(struct _qsbr_thread_state *qsbr);
121133

122134
// Have the read sequences advanced to the given goal? If this returns true,
123135
// it safe to reclaim any memory tagged with the goal (or earlier goal).

Objects/obmalloc.c

Lines changed: 11 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,10 @@ _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force)
139139

140140
_PyMem_mi_page_clear_qsbr(page);
141141
page->retire_expire = 0;
142-
page->qsbr_goal = _Py_qsbr_deferred_advance(tstate->qsbr);
142+
143+
size_t bsize = mi_page_block_size(page);
144+
page->qsbr_goal = _Py_qsbr_advance_with_size(tstate->qsbr, page->capacity*bsize);
145+
143146
llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node);
144147
return false;
145148
}
@@ -1141,27 +1144,6 @@ free_work_item(uintptr_t ptr, delayed_dealloc_cb cb, void *state)
11411144
}
11421145
}
11431146

1144-
#ifdef Py_GIL_DISABLED
1145-
static int
1146-
should_advance_qsbr(_PyThreadStateImpl *tstate, size_t size)
1147-
{
1148-
// If the deferred memory exceeds 1 MiB, we force an advance in the
1149-
// shared QSBR sequence number to limit excess memory usage.
1150-
static const size_t QSBR_DEFERRED_LIMIT = 1024 * 1024;
1151-
if (size > QSBR_DEFERRED_LIMIT) {
1152-
tstate->qsbr->memory_deferred = 0;
1153-
return 1;
1154-
}
1155-
1156-
tstate->qsbr->memory_deferred += size;
1157-
if (tstate->qsbr->memory_deferred > QSBR_DEFERRED_LIMIT) {
1158-
tstate->qsbr->memory_deferred = 0;
1159-
return 1;
1160-
}
1161-
return 0;
1162-
}
1163-
#endif
1164-
11651147
static void
11661148
free_delayed(uintptr_t ptr, size_t size)
11671149
{
@@ -1221,20 +1203,12 @@ free_delayed(uintptr_t ptr, size_t size)
12211203
}
12221204

12231205
assert(buf != NULL && buf->wr_idx < WORK_ITEMS_PER_CHUNK);
1224-
uint64_t seq;
1225-
int force_advance = should_advance_qsbr(tstate, size);
1226-
if (force_advance) {
1227-
seq = _Py_qsbr_advance(tstate->qsbr->shared);
1228-
}
1229-
else {
1230-
seq = _Py_qsbr_deferred_advance(tstate->qsbr);
1231-
}
1206+
uint64_t seq = _Py_qsbr_advance_with_size(tstate->qsbr, size);
12321207
buf->array[buf->wr_idx].ptr = ptr;
12331208
buf->array[buf->wr_idx].qsbr_goal = seq;
12341209
buf->wr_idx++;
1235-
if (buf->wr_idx == WORK_ITEMS_PER_CHUNK || force_advance) {
1236-
_PyMem_ProcessDelayed((PyThreadState *)tstate);
1237-
}
1210+
1211+
_PyMem_ProcessDelayed((PyThreadState *)tstate);
12381212
#endif
12391213
}
12401214

@@ -1326,8 +1300,11 @@ maybe_process_interp_queue(struct _Py_mem_interp_free_queue *queue,
13261300
void
13271301
_PyMem_ProcessDelayed(PyThreadState *tstate)
13281302
{
1329-
PyInterpreterState *interp = tstate->interp;
13301303
_PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
1304+
if (!_Py_qsbr_should_process(tstate_impl->qsbr)) {
1305+
return;
1306+
}
1307+
PyInterpreterState *interp = tstate->interp;
13311308

13321309
// Process thread-local work
13331310
process_queue(&tstate_impl->mem_free_queue, tstate_impl, true, NULL, NULL);

Python/qsbr.c

Lines changed: 43 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,14 @@
4141
// Starting size of the array of qsbr thread states
4242
#define MIN_ARRAY_SIZE 8
4343

44-
// For _Py_qsbr_deferred_advance(): the number of deferrals before advancing
45-
// the write sequence.
46-
#define QSBR_DEFERRED_LIMIT 10
44+
// For should_advance_qsbr(): the number of deferred items before advancing
45+
// the write sequence. This is based on WORK_ITEMS_PER_CHUNK. We ideally
46+
// want to process a chunk before it overflows.
47+
#define QSBR_DEFERRED_LIMIT 127
48+
49+
// If the deferred memory exceeds 1 MiB, we force an advance in the
50+
// shared QSBR sequence number to limit excess memory usage.
51+
#define QSBR_MEM_LIMIT 1024*1024
4752

4853
// Allocate a QSBR thread state from the freelist
4954
static struct _qsbr_thread_state *
@@ -113,17 +118,47 @@ _Py_qsbr_advance(struct _qsbr_shared *shared)
113118
// NOTE: with 64-bit sequence numbers, we don't have to worry too much
114119
// about the wr_seq getting too far ahead of rd_seq, but if we ever use
115120
// 32-bit sequence numbers, we'll need to be more careful.
116-
return _Py_atomic_add_uint64(&shared->wr_seq, QSBR_INCR) + QSBR_INCR;
121+
return _Py_atomic_add_uint64(&shared->wr_seq, QSBR_INCR);
122+
}
123+
124+
static int
125+
should_advance_qsbr(struct _qsbr_thread_state *qsbr, size_t size)
126+
{
127+
qsbr->deferred_count++;
128+
qsbr->deferred_memory += size;
129+
if (qsbr->deferred_count >= QSBR_DEFERRED_LIMIT ||
130+
qsbr->deferred_memory > QSBR_MEM_LIMIT) {
131+
qsbr->deferred_count = 0;
132+
qsbr->deferred_memory = 0;
133+
return 1;
134+
}
135+
return 0;
117136
}
118137

119138
uint64_t
120-
_Py_qsbr_deferred_advance(struct _qsbr_thread_state *qsbr)
139+
_Py_qsbr_advance_with_size(struct _qsbr_thread_state *qsbr, size_t size)
121140
{
122-
if (++qsbr->deferrals < QSBR_DEFERRED_LIMIT) {
141+
if (should_advance_qsbr(qsbr, size)) {
142+
// Advance the write sequence and return the updated value as the goal.
143+
return _Py_qsbr_advance(qsbr->shared);
144+
}
145+
else {
146+
// Don't advance, return the next sequence value as the goal.
123147
return _Py_qsbr_shared_current(qsbr->shared) + QSBR_INCR;
124148
}
125-
qsbr->deferrals = 0;
126-
return _Py_qsbr_advance(qsbr->shared);
149+
}
150+
151+
bool
152+
_Py_qsbr_should_process(struct _qsbr_thread_state *qsbr)
153+
{
154+
if (qsbr->seq_last_check == qsbr->seq) {
155+
// If 'seq' for this thread hasn't advanced, it is unlikely that any
156+
// deferred memory is ready to be freed. Wait longer before trying
157+
// to process.
158+
return false;
159+
}
160+
qsbr->seq_last_check = qsbr->seq;
161+
return true;
127162
}
128163

129164
static uint64_t

0 commit comments

Comments
 (0)