@@ -218,92 +218,131 @@ PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname
218218}
219219
220220/*
221- Function versions
222- -----------------
221+ (This is purely internal documentation. There are no public APIs here.)
223222
224- Function versions are used to detect when a function object has been
225- updated, invalidating inline cache data used by the `CALL` bytecode
226- (notably `CALL_PY_EXACT_ARGS` and a few other `CALL` specializations).
223+ Function (and code) versions
224+ ----------------------------
227225
228- They are also used by the Tier 2 superblock creation code to find
229- the function being called (and from there the code object).
226+ The Tier 1 specializer generates CALL variants that can be invalidated
227+ by changes to critical function attributes:
230228
231- How does a function's `func_version` field get initialized?
229+ - __code__
230+ - __defaults__
231+ - __kwdefaults__
232+ - __closure__
232233
233- - `PyFunction_New` and friends initialize it to 0.
234- - The `MAKE_FUNCTION` instruction sets it from the code's `co_version`.
235- - It is reset to 0 when various attributes like `__code__` are set.
236- - A new version is allocated by `_PyFunction_GetVersionForCurrentState`
237- when the specializer needs a version and the version is 0.
234+ For this purpose function objects have a 32-bit func_version member
235+ that the specializer writes to the specialized instruction's inline
236+ cache and which is checked by a guard on the specialized instructions.
238237
239- The latter allocates versions using a counter in the interpreter state,
240- `interp->func_state.next_version`.
241- When the counter wraps around to 0, no more versions are allocated .
242- There is one other special case: functions with a non-standard
243- `vectorcall` field are not given a version .
238+ The MAKE_FUNCTION bytecode sets func_version from the code object's
239+ co_version field. The latter is initialized from a counter in the
240+ interpreter state (interp->func_state.next_version) and never changes .
241+ When this counter overflows, it remains zero and the specializer loses
242+ the ability to specialize calls to new functions .
244243
245- When the function version is 0, the `CALL` bytecode is not specialized.
244+ The func_version is reset to zero when any of the critical attributes
245+ is modified; after this point the specializer will no longer specialize
246+ calls to this function, and the guard will always fail.
246247
247- Code object versions
248- --------------------
248+ The function and code version cache
249+ -----------------------------------
249250
250- So where to code objects get their `co_version`?
251- They share the same counter, `interp->func_state.next_version`.
251+ The Tier 2 optimizer now has a problem, since it needs to find the
252+ function and code objects given only the version number from the inline
253+ cache. Our solution is to maintain a cache mapping version numbers to
254+ function and code objects. To limit the cache size we could hash
255+ the version number, but for now we simply use it modulo the table size.
256+
257+ There are some corner cases (e.g. generator expressions) where we will
258+ be unable to find the function object in the cache but we can still
259+ find the code object. For this reason the cache stores both the
260+ function object and the code object.
261+
262+ The cache doesn't contain strong references; cache entries are
263+ invalidated whenever the function or code object is deallocated.
264+
265+ Invariants
266+ ----------
267+
268+ These should hold at any time except when one of the cache-mutating
269+ functions is running.
270+
271+ - For any slot s at index i:
272+ - s->func == NULL or s->func->func_version % FUNC_VERSION_CACHE_SIZE == i
273+ - s->code == NULL or s->code->co_version % FUNC_VERSION_CACHE_SIZE == i
274+ if s->func != NULL, then s->func->func_code == s->code
252275
253- Code objects get a new `co_version` allocated from this counter upon
254- creation. Since code objects are nominally immutable, `co_version` can
255- not be invalidated. The only way it can be 0 is when 2**32 or more
256- code objects have been created during the process's lifetime.
257- (The counter isn't reset by `fork()`, extending the lifetime.)
258276*/
259277
260278void
261279_PyFunction_SetVersion (PyFunctionObject * func , uint32_t version )
262280{
263281 PyInterpreterState * interp = _PyInterpreterState_GET ();
264282 if (func -> func_version != 0 ) {
265- PyFunctionObject * * slot =
283+ struct _func_version_cache_item * slot =
266284 interp -> func_state .func_version_cache
267285 + (func -> func_version % FUNC_VERSION_CACHE_SIZE );
268- if (* slot == func ) {
269- * slot = NULL ;
286+ if (slot -> func == func ) {
287+ slot -> func = NULL ;
288+ // Leave slot->code alone, there may be use for it.
270289 }
271290 }
272291 func -> func_version = version ;
273292 if (version != 0 ) {
274- interp -> func_state .func_version_cache [
275- version % FUNC_VERSION_CACHE_SIZE ] = func ;
293+ struct _func_version_cache_item * slot =
294+ interp -> func_state .func_version_cache
295+ + (version % FUNC_VERSION_CACHE_SIZE );
296+ slot -> func = func ;
297+ slot -> code = func -> func_code ;
298+ }
299+ }
300+
301+ void
302+ _PyFunction_ClearCodeByVersion (uint32_t version )
303+ {
304+ PyInterpreterState * interp = _PyInterpreterState_GET ();
305+ struct _func_version_cache_item * slot =
306+ interp -> func_state .func_version_cache
307+ + (version % FUNC_VERSION_CACHE_SIZE );
308+ if (slot -> code ) {
309+ assert (PyCode_Check (slot -> code ));
310+ PyCodeObject * code = (PyCodeObject * )slot -> code ;
311+ if (code -> co_version == version ) {
312+ slot -> code = NULL ;
313+ slot -> func = NULL ;
314+ }
276315 }
277316}
278317
279318PyFunctionObject *
280- _PyFunction_LookupByVersion (uint32_t version )
319+ _PyFunction_LookupByVersion (uint32_t version , PyObject * * p_code )
281320{
282321 PyInterpreterState * interp = _PyInterpreterState_GET ();
283- PyFunctionObject * func = interp -> func_state .func_version_cache [
284- version % FUNC_VERSION_CACHE_SIZE ];
285- if (func != NULL && func -> func_version == version ) {
286- return func ;
322+ struct _func_version_cache_item * slot =
323+ interp -> func_state .func_version_cache
324+ + (version % FUNC_VERSION_CACHE_SIZE );
325+ if (slot -> code ) {
326+ assert (PyCode_Check (slot -> code ));
327+ PyCodeObject * code = (PyCodeObject * )slot -> code ;
328+ if (code -> co_version == version ) {
329+ * p_code = slot -> code ;
330+ }
331+ }
332+ else {
333+ * p_code = NULL ;
334+ }
335+ if (slot -> func && slot -> func -> func_version == version ) {
336+ assert (slot -> func -> func_code == slot -> code );
337+ return slot -> func ;
287338 }
288339 return NULL ;
289340}
290341
291342uint32_t
292343_PyFunction_GetVersionForCurrentState (PyFunctionObject * func )
293344{
294- if (func -> func_version != 0 ) {
295- return func -> func_version ;
296- }
297- if (func -> vectorcall != _PyFunction_Vectorcall ) {
298- return 0 ;
299- }
300- PyInterpreterState * interp = _PyInterpreterState_GET ();
301- if (interp -> func_state .next_version == 0 ) {
302- return 0 ;
303- }
304- uint32_t v = interp -> func_state .next_version ++ ;
305- _PyFunction_SetVersion (func , v );
306- return v ;
345+ return func -> func_version ;
307346}
308347
309348PyObject *
@@ -507,7 +546,6 @@ PyFunction_SetAnnotations(PyObject *op, PyObject *annotations)
507546 "non-dict annotations" );
508547 return -1 ;
509548 }
510- _PyFunction_SetVersion ((PyFunctionObject * )op , 0 );
511549 Py_XSETREF (((PyFunctionObject * )op )-> func_annotations , annotations );
512550 return 0 ;
513551}
@@ -731,7 +769,6 @@ func_set_annotations(PyFunctionObject *op, PyObject *value, void *Py_UNUSED(igno
731769 "__annotations__ must be set to a dict object" );
732770 return -1 ;
733771 }
734- _PyFunction_SetVersion (op , 0 );
735772 Py_XSETREF (op -> func_annotations , Py_XNewRef (value ));
736773 return 0 ;
737774}
0 commit comments