Skip to content

Commit 7ea28ff

Browse files
committed
Revise based on review feedback.
* Keep separate count of mimalloc page memory that is deferred from collection. This memory doesn't get freed by _PyMem_ProcessDelayed(). We want to advance the write sequence if there is too much of it but calling _PyMem_ProcessDelayed() is not helpful. * Use `process_seq` variable to schedule the next call to `_PyMem_ProcessDelayed()`. * Rename advance functions to have "deferred" in name. * Move `_Py_qsbr_should_process()` call up one level.
1 parent 0b276ab commit 7ea28ff

File tree

3 files changed

+68
-43
lines changed

3 files changed

+68
-43
lines changed

Include/internal/pycore_qsbr.h

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,14 @@ struct _qsbr_thread_state {
5555
// the last write sequence advance
5656
size_t deferred_memory;
5757

58-
// Sequence number at time of last "should process" check.
59-
uint64_t seq_last_check;
58+
// Amount of memory in mimalloc pages deferred from collection. When
59+
// deferred, they are prevented from being used for a different size class
60+
// and in a different thread.
61+
size_t deferred_page_memory;
62+
63+
// If non-zero, processing if deferred memory should be performed if the
64+
// read sequence has reached this value.
65+
uint64_t process_seq;
6066

6167
// Is this thread state allocated?
6268
bool allocated;
@@ -66,7 +72,7 @@ struct _qsbr_thread_state {
6672
// Padding to avoid false sharing
6773
struct _qsbr_pad {
6874
struct _qsbr_thread_state qsbr;
69-
char __padding[64 - sizeof(struct _qsbr_thread_state)];
75+
char __padding[128 - sizeof(struct _qsbr_thread_state)];
7076
};
7177

7278
// Per-interpreter state
@@ -117,14 +123,19 @@ extern uint64_t
117123
_Py_qsbr_advance(struct _qsbr_shared *shared);
118124

119125
// Advance the write sequence as required and return the sequence goal to use
120-
// for memory to be freed. If the sequence is advanced, this goal is the new
121-
// sequence value, otherwise it is the next sequence value. In either case,
122-
// the goal is higher that any write sequence value already observed by readers.
123-
//
124-
// The 'size' argument is the size in bytes of the memory scheduled to be
125-
// freed. If that size is not available, pass zero as the value.
126+
// for memory to be freed. The 'free_size' argument is the size in bytes of
127+
// the memory scheduled to be freed. If that size is not available, pass zero
128+
// as the value.
129+
extern uint64_t
130+
_Py_qsbr_deferred_advance_for_free(struct _qsbr_thread_state *qsbr,
131+
size_t free_size);
132+
133+
// Advance the write sequence as required and return the sequence goal to use
134+
// for a mimalloc page to be collected. The 'page_size' argument is the size
135+
// of the mimalloc page being deferred from collection.
126136
extern uint64_t
127-
_Py_qsbr_advance_with_size(struct _qsbr_thread_state *qsbr, size_t size);
137+
_Py_qsbr_deferred_advance_for_page(struct _qsbr_thread_state *qsbr,
138+
size_t page_size);
128139

129140
// Return true if memory held by QSBR should be processed to determine if it
130141
// can be safely freed.

Objects/obmalloc.c

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force)
141141
page->retire_expire = 0;
142142

143143
size_t bsize = mi_page_block_size(page);
144-
page->qsbr_goal = _Py_qsbr_advance_with_size(tstate->qsbr, page->capacity*bsize);
144+
page->qsbr_goal = _Py_qsbr_deferred_advance_for_page(tstate->qsbr, page->capacity*bsize);
145145

146146
llist_insert_tail(&tstate->mimalloc.page_list, &page->qsbr_node);
147147
return false;
@@ -1203,12 +1203,14 @@ free_delayed(uintptr_t ptr, size_t size)
12031203
}
12041204

12051205
assert(buf != NULL && buf->wr_idx < WORK_ITEMS_PER_CHUNK);
1206-
uint64_t seq = _Py_qsbr_advance_with_size(tstate->qsbr, size);
1206+
uint64_t seq = _Py_qsbr_deferred_advance_for_free(tstate->qsbr, size);
12071207
buf->array[buf->wr_idx].ptr = ptr;
12081208
buf->array[buf->wr_idx].qsbr_goal = seq;
12091209
buf->wr_idx++;
12101210

1211-
_PyMem_ProcessDelayed((PyThreadState *)tstate);
1211+
if (_Py_qsbr_should_process(tstate->qsbr)) {
1212+
_PyMem_ProcessDelayed((PyThreadState *)tstate);
1213+
}
12121214
#endif
12131215
}
12141216

@@ -1227,7 +1229,10 @@ _PyObject_XDecRefDelayed(PyObject *ptr)
12271229
{
12281230
assert(!((uintptr_t)ptr & 0x01));
12291231
if (ptr != NULL) {
1230-
free_delayed(((uintptr_t)ptr)|0x01, 64);
1232+
// We use 0 as the size since we don't have an easy way to know the
1233+
// actual size. If we are freeing many objects, the write sequence
1234+
// will be advanced due to QSBR_DEFERRED_LIMIT.
1235+
free_delayed(((uintptr_t)ptr)|0x01, 0);
12311236
}
12321237
}
12331238
#endif
@@ -1300,11 +1305,8 @@ maybe_process_interp_queue(struct _Py_mem_interp_free_queue *queue,
13001305
void
13011306
_PyMem_ProcessDelayed(PyThreadState *tstate)
13021307
{
1303-
_PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
1304-
if (!_Py_qsbr_should_process(tstate_impl->qsbr)) {
1305-
return;
1306-
}
13071308
PyInterpreterState *interp = tstate->interp;
1309+
_PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate;
13081310

13091311
// Process thread-local work
13101312
process_queue(&tstate_impl->mem_free_queue, tstate_impl, true, NULL, NULL);

Python/qsbr.c

Lines changed: 37 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -41,14 +41,19 @@
4141
// Starting size of the array of qsbr thread states
4242
#define MIN_ARRAY_SIZE 8
4343

44-
// For should_advance_qsbr(): the number of deferred items before advancing
44+
// For deferred advance on free: the number of deferred items before advancing
4545
// the write sequence. This is based on WORK_ITEMS_PER_CHUNK. We ideally
4646
// want to process a chunk before it overflows.
4747
#define QSBR_DEFERRED_LIMIT 127
4848

4949
// If the deferred memory exceeds 1 MiB, we force an advance in the
5050
// shared QSBR sequence number to limit excess memory usage.
51-
#define QSBR_MEM_LIMIT 1024*1024
51+
#define QSBR_FREE_MEM_LIMIT 1024*1024
52+
53+
// If we are deferring collection of more than this amount of memory for
54+
// mimalloc pages, advance the write sequence. Advancing allows these
55+
// pages to be re-used in a different thread or for a different size class.
56+
#define QSBR_PAGE_MEM_LIMIT 4096*10
5257

5358
// Allocate a QSBR thread state from the freelist
5459
static struct _qsbr_thread_state *
@@ -121,43 +126,50 @@ _Py_qsbr_advance(struct _qsbr_shared *shared)
121126
return _Py_atomic_add_uint64(&shared->wr_seq, QSBR_INCR);
122127
}
123128

124-
static int
125-
should_advance_qsbr(struct _qsbr_thread_state *qsbr, size_t size)
129+
uint64_t
130+
_Py_qsbr_deferred_advance_for_page(struct _qsbr_thread_state *qsbr, size_t page_size)
126131
{
127-
qsbr->deferred_count++;
128-
qsbr->deferred_memory += size;
129-
if (qsbr->deferred_count >= QSBR_DEFERRED_LIMIT ||
130-
qsbr->deferred_memory > QSBR_MEM_LIMIT) {
131-
qsbr->deferred_count = 0;
132-
qsbr->deferred_memory = 0;
133-
return 1;
132+
qsbr->deferred_page_memory += page_size;
133+
if (qsbr->deferred_page_memory > QSBR_PAGE_MEM_LIMIT) {
134+
qsbr->deferred_page_memory = 0;
135+
// Advance the write sequence and return the updated value as the goal.
136+
return _Py_qsbr_advance(qsbr->shared);
134137
}
135-
return 0;
138+
// Don't advance, return the next sequence value as the goal.
139+
return _Py_qsbr_shared_current(qsbr->shared) + QSBR_INCR;
136140
}
137141

138142
uint64_t
139-
_Py_qsbr_advance_with_size(struct _qsbr_thread_state *qsbr, size_t size)
143+
_Py_qsbr_deferred_advance_for_free(struct _qsbr_thread_state *qsbr, size_t free_size)
140144
{
141-
if (should_advance_qsbr(qsbr, size)) {
142-
// Advance the write sequence and return the updated value as the goal.
143-
return _Py_qsbr_advance(qsbr->shared);
144-
}
145-
else {
146-
// Don't advance, return the next sequence value as the goal.
147-
return _Py_qsbr_shared_current(qsbr->shared) + QSBR_INCR;
145+
qsbr->deferred_count++;
146+
qsbr->deferred_memory += free_size;
147+
if (qsbr->deferred_count >= QSBR_DEFERRED_LIMIT ||
148+
qsbr->deferred_memory > QSBR_FREE_MEM_LIMIT) {
149+
qsbr->deferred_count = 0;
150+
qsbr->deferred_memory = 0;
151+
// Advance the write sequence
152+
uint64_t seq = _Py_qsbr_advance(qsbr->shared);
153+
if (qsbr->process_seq == 0) {
154+
// Process the queue of deferred frees when the read sequence
155+
// reaches this value. We don't process immediately because
156+
// we want to give readers a chance to advance their sequence.
157+
qsbr->process_seq = seq;
158+
}
159+
// Return current (just advanced) sequence as the goal.
160+
return seq;
148161
}
162+
// Don't advance, return the next sequence value as the goal.
163+
return _Py_qsbr_shared_current(qsbr->shared) + QSBR_INCR;
149164
}
150165

151166
bool
152167
_Py_qsbr_should_process(struct _qsbr_thread_state *qsbr)
153168
{
154-
if (qsbr->seq_last_check == qsbr->seq) {
155-
// If 'seq' for this thread hasn't advanced, it is unlikely that any
156-
// deferred memory is ready to be freed. Wait longer before trying
157-
// to process.
169+
if (qsbr->process_seq == 0 || qsbr->seq < qsbr->process_seq) {
158170
return false;
159171
}
160-
qsbr->seq_last_check = qsbr->seq;
172+
qsbr->process_seq = 0;
161173
return true;
162174
}
163175

0 commit comments

Comments
 (0)