@@ -123,6 +123,33 @@ _PyMem_mi_page_is_safe_to_free(mi_page_t *page)
123123
124124}
125125
126+ #ifdef  Py_GIL_DISABLED 
127+ 
128+ // If we are deferring collection of more than this amount of memory for 
129+ // mimalloc pages, advance the write sequence.  Advancing allows these 
130+ // pages to be re-used in a different thread or for a different size class. 
131+ #define  QSBR_PAGE_MEM_LIMIT  4096*20
132+ 
133+ // Return true if the global write sequence should be advanced for a mimalloc 
134+ // page that is deferred from collection. 
135+ static  bool 
136+ should_advance_qsbr_for_page (struct  _qsbr_thread_state  * qsbr , mi_page_t  * page )
137+ {
138+     size_t  bsize  =  mi_page_block_size (page );
139+     size_t  page_size  =  page -> capacity * bsize ;
140+     if  (page_size  >  QSBR_PAGE_MEM_LIMIT ) {
141+         qsbr -> deferred_page_memory  =  0 ;
142+         return  true;
143+     }
144+     qsbr -> deferred_page_memory  +=  page_size ;
145+     if  (qsbr -> deferred_page_memory  >  QSBR_PAGE_MEM_LIMIT ) {
146+         qsbr -> deferred_page_memory  =  0 ;
147+         return  true;
148+     }
149+     return  false;
150+ }
151+ #endif 
152+ 
126153static  bool 
127154_PyMem_mi_page_maybe_free (mi_page_t  * page , mi_page_queue_t  * pq , bool  force )
128155{
@@ -138,7 +165,14 @@ _PyMem_mi_page_maybe_free(mi_page_t *page, mi_page_queue_t *pq, bool force)
138165
139166        _PyMem_mi_page_clear_qsbr (page );
140167        page -> retire_expire  =  0 ;
141-         page -> qsbr_goal  =  _Py_qsbr_deferred_advance (tstate -> qsbr );
168+ 
169+         if  (should_advance_qsbr_for_page (tstate -> qsbr , page )) {
170+             page -> qsbr_goal  =  _Py_qsbr_advance (tstate -> qsbr -> shared );
171+         }
172+         else  {
173+             page -> qsbr_goal  =  _Py_qsbr_shared_next (tstate -> qsbr -> shared );
174+         }
175+ 
142176        llist_insert_tail (& tstate -> mimalloc .page_list , & page -> qsbr_node );
143177        return  false;
144178    }
@@ -1103,8 +1137,44 @@ free_work_item(uintptr_t ptr)
11031137    }
11041138}
11051139
1140+ 
1141+ #ifdef  Py_GIL_DISABLED 
1142+ 
1143+ // For deferred advance on free: the number of deferred items before advancing 
1144+ // the write sequence.  This is based on WORK_ITEMS_PER_CHUNK.  We ideally 
1145+ // want to process a chunk before it overflows. 
1146+ #define  QSBR_DEFERRED_LIMIT  127
1147+ 
1148+ // If the deferred memory exceeds 1 MiB, advance the write sequence.  This 
1149+ // helps limit memory usage due to QSBR delaying frees too long. 
1150+ #define  QSBR_FREE_MEM_LIMIT  1024*1024
1151+ 
1152+ // Return true if the global write sequence should be advanced for a deferred 
1153+ // memory free. 
1154+ static  bool 
1155+ should_advance_qsbr_for_free (struct  _qsbr_thread_state  * qsbr , size_t  size )
1156+ {
1157+     if  (size  >  QSBR_FREE_MEM_LIMIT ) {
1158+         qsbr -> deferred_count  =  0 ;
1159+         qsbr -> deferred_memory  =  0 ;
1160+         qsbr -> should_process  =  true;
1161+         return  true;
1162+     }
1163+     qsbr -> deferred_count ++ ;
1164+     qsbr -> deferred_memory  +=  size ;
1165+     if  (qsbr -> deferred_count  >  QSBR_DEFERRED_LIMIT  || 
1166+             qsbr -> deferred_memory  >  QSBR_FREE_MEM_LIMIT ) {
1167+         qsbr -> deferred_count  =  0 ;
1168+         qsbr -> deferred_memory  =  0 ;
1169+         qsbr -> should_process  =  true;
1170+         return  true;
1171+     }
1172+     return  false;
1173+ }
1174+ #endif 
1175+ 
11061176static  void 
1107- free_delayed (uintptr_t  ptr )
1177+ free_delayed (uintptr_t  ptr ,  size_t   size )
11081178{
11091179#ifndef  Py_GIL_DISABLED 
11101180    free_work_item (ptr );
@@ -1145,31 +1215,43 @@ free_delayed(uintptr_t ptr)
11451215    }
11461216
11471217    assert (buf  !=  NULL  &&  buf -> wr_idx  <  WORK_ITEMS_PER_CHUNK );
1148-     uint64_t  seq  =  _Py_qsbr_deferred_advance (tstate -> qsbr );
1218+     uint64_t  seq ;
1219+     if  (should_advance_qsbr_for_free (tstate -> qsbr , size )) {
1220+         seq  =  _Py_qsbr_advance (tstate -> qsbr -> shared );
1221+     }
1222+     else  {
1223+         seq  =  _Py_qsbr_shared_next (tstate -> qsbr -> shared );
1224+     }
11491225    buf -> array [buf -> wr_idx ].ptr  =  ptr ;
11501226    buf -> array [buf -> wr_idx ].qsbr_goal  =  seq ;
11511227    buf -> wr_idx ++ ;
11521228
11531229    if  (buf -> wr_idx  ==  WORK_ITEMS_PER_CHUNK ) {
1230+         // Normally the processing of delayed items is done from the eval 
1231+         // breaker.  Processing here is a safety measure to ensure too much 
1232+         // work does not accumulate. 
11541233        _PyMem_ProcessDelayed ((PyThreadState  * )tstate );
11551234    }
11561235#endif 
11571236}
11581237
11591238void 
1160- _PyMem_FreeDelayed (void  * ptr )
1239+ _PyMem_FreeDelayed (void  * ptr ,  size_t   size )
11611240{
11621241    assert (!((uintptr_t )ptr  &  0x01 ));
11631242    if  (ptr  !=  NULL ) {
1164-         free_delayed ((uintptr_t )ptr );
1243+         free_delayed ((uintptr_t )ptr ,  size );
11651244    }
11661245}
11671246
11681247void 
11691248_PyObject_FreeDelayed (void  * ptr )
11701249{
11711250    assert (!((uintptr_t )ptr  &  0x01 ));
1172-     free_delayed (((uintptr_t )ptr )|0x01 );
1251+     // We use 0 as the size since we don't have an easy way to know the 
1252+     // actual size.  If we are freeing many objects, the write sequence 
1253+     // will be advanced due to QSBR_DEFERRED_LIMIT. 
1254+     free_delayed (((uintptr_t )ptr )|0x01 , 0 );
11731255}
11741256
11751257static  struct  _mem_work_chunk  * 
@@ -1239,6 +1321,8 @@ _PyMem_ProcessDelayed(PyThreadState *tstate)
12391321    PyInterpreterState  * interp  =  tstate -> interp ;
12401322    _PyThreadStateImpl  * tstate_impl  =  (_PyThreadStateImpl  * )tstate ;
12411323
1324+     tstate_impl -> qsbr -> should_process  =  false;
1325+ 
12421326    // Process thread-local work 
12431327    process_queue (& tstate_impl -> mem_free_queue , tstate_impl -> qsbr , true);
12441328
0 commit comments