@@ -255,6 +255,9 @@ managed_static_type_state_init(PyInterpreterState *interp, PyTypeObject *self,
255255    assert (state -> type  ==  NULL );
256256    state -> type  =  self ;
257257    state -> isbuiltin  =  isbuiltin ;
258+ #ifdef  Py_GIL_DISABLED 
259+     state -> local_cache .tp_version_tag  =  self -> tp_version_tag ;
260+ #endif 
258261
259262    /* state->tp_subclasses is left NULL until init_subclasses() sets it. */ 
260263    /* state->tp_weaklist is left NULL until insert_head() or insert_after() 
@@ -290,6 +293,12 @@ managed_static_type_state_clear(PyInterpreterState *interp, PyTypeObject *self,
290293    assert (state -> type  !=  NULL );
291294    state -> type  =  NULL ;
292295    assert (state -> tp_weaklist  ==  NULL );  // It was already cleared out. 
296+ #ifdef  Py_GIL_DISABLED 
297+     for  (Py_ssize_t  i  =  0 ; i < LOCAL_TYPE_CACHE_SIZE ; i ++ ) {
298+         Py_CLEAR (state -> local_cache .entries [i ].name );
299+         state -> local_cache .entries [i ].value  =  NULL ;
300+     }
301+ #endif 
293302
294303    (void )_Py_atomic_add_int64 (
295304            & _PyRuntime .types .managed_static .types [full_index ].interp_count , -1 );
@@ -1021,6 +1030,57 @@ set_version_unlocked(PyTypeObject *tp, unsigned int version)
10211030#endif 
10221031}
10231032
1033+ static  void 
1034+ clear_spec_cache (PyTypeObject  * type )
1035+ {
1036+     if  (PyType_HasFeature (type , Py_TPFLAGS_HEAPTYPE )) {
1037+         // This field *must* be invalidated if the type is modified (see the 
1038+         // comment on struct _specialization_cache): 
1039+         PyHeapTypeObject  * heap_type  =  (PyHeapTypeObject  * )type ;
1040+         FT_ATOMIC_STORE_PTR_RELAXED (
1041+             heap_type -> _spec_cache .getitem , NULL );
1042+ #ifdef  Py_GIL_DISABLED 
1043+         struct  local_type_cache  * cache  =  heap_type -> _spec_cache .local_type_cache ;
1044+         if  (cache  !=  NULL ) {
1045+             FT_ATOMIC_STORE_PTR_RELAXED (
1046+                 heap_type -> _spec_cache .local_type_cache , NULL );
1047+ 
1048+             for  (Py_ssize_t  i  =  0 ; i < LOCAL_TYPE_CACHE_SIZE ; i ++ ) {
1049+                 PyObject  * name  =  _Py_atomic_load_ptr_relaxed (& cache -> entries [i ].name );
1050+                 if  (name  !=  NULL ) {
1051+                     _Py_atomic_store_ptr_release (& cache -> entries [i ].name , NULL );
1052+                     Py_DECREF (name );
1053+                 }
1054+             }
1055+             _PyMem_FreeDelayed (cache );
1056+         }
1057+ #endif 
1058+     }
1059+ }
1060+ 
1061+ static  void 
1062+ clear_spec_cache_recursive (PyTypeObject  * type )
1063+ {
1064+     if  (PyType_HasFeature (type , Py_TPFLAGS_HEAPTYPE )) {
1065+         clear_spec_cache (type );
1066+         PyObject  * subclasses  =  lookup_tp_subclasses (type );
1067+         if  (subclasses  !=  NULL ) {
1068+             assert (PyDict_CheckExact (subclasses ));
1069+ 
1070+             Py_ssize_t  i  =  0 ;
1071+             PyObject  * ref ;
1072+             while  (PyDict_Next (subclasses , & i , NULL , & ref )) {
1073+                 PyTypeObject  * subclass  =  type_from_ref (ref );
1074+                 if  (subclass  ==  NULL ) {
1075+                     continue ;
1076+                 }
1077+                 clear_spec_cache (subclass );
1078+                 Py_DECREF (subclass );
1079+             }
1080+         }
1081+     }
1082+ }
1083+ 
10241084static  void 
10251085type_modified_unlocked (PyTypeObject  * type )
10261086{
@@ -1083,12 +1143,7 @@ type_modified_unlocked(PyTypeObject *type)
10831143    }
10841144
10851145    set_version_unlocked (type , 0 ); /* 0 is not a valid version tag */ 
1086-     if  (PyType_HasFeature (type , Py_TPFLAGS_HEAPTYPE )) {
1087-         // This field *must* be invalidated if the type is modified (see the 
1088-         // comment on struct _specialization_cache): 
1089-         FT_ATOMIC_STORE_PTR_RELAXED (
1090-             ((PyHeapTypeObject  * )type )-> _spec_cache .getitem , NULL );
1091-     }
1146+     clear_spec_cache (type );
10921147}
10931148
10941149void 
@@ -1165,12 +1220,7 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) {
11651220    assert (!(type -> tp_flags  &  _Py_TPFLAGS_STATIC_BUILTIN ));
11661221    set_version_unlocked (type , 0 );  /* 0 is not a valid version tag */ 
11671222    type -> tp_versions_used  =  _Py_ATTR_CACHE_UNUSED ;
1168-     if  (PyType_HasFeature (type , Py_TPFLAGS_HEAPTYPE )) {
1169-         // This field *must* be invalidated if the type is modified (see the 
1170-         // comment on struct _specialization_cache): 
1171-         FT_ATOMIC_STORE_PTR_RELAXED (
1172-             ((PyHeapTypeObject  * )type )-> _spec_cache .getitem , NULL );
1173-     }
1223+     clear_spec_cache (type );
11741224}
11751225
11761226/* 
@@ -5542,6 +5592,121 @@ _PyTypes_AfterFork(void)
55425592#endif 
55435593}
55445594
5595+ #ifdef  Py_GIL_DISABLED 
5596+ 
5597+ static  bool 
5598+ can_cache_locally (PyTypeObject  * type , PyObject  * name ) {
5599+     // We don't cache types in l1 for anything which is a custom get attr, it's likely 
5600+     // to have many dynamic attributes (think modules and metaclasses). 
5601+     // We also only cache interned or immortal strings. 
5602+     return  type -> tp_getattro  ==  PyObject_GenericGetAttr  && 
5603+            (PyUnicode_CHECK_INTERNED (name ) !=  SSTATE_NOT_INTERNED  ||  _Py_IsImmortal (name ));
5604+ }
5605+ 
5606+ static  inline  struct  local_type_cache  * 
5607+ get_local_type_cache (PyTypeObject  * type , unsigned int   assigned_version )
5608+ {
5609+     unsigned long  flags  =  FT_ATOMIC_LOAD_ULONG_RELAXED (type -> tp_flags );
5610+ 
5611+     if  (flags  &  Py_TPFLAGS_HEAPTYPE ) {
5612+         PyHeapTypeObject  * heap_type  =  (PyHeapTypeObject  * )type ;
5613+         struct  local_type_cache  * local_cache  =  _Py_atomic_load_ptr_acquire (& heap_type -> _spec_cache .local_type_cache );
5614+         if  (local_cache  ==  NULL  &&  assigned_version ) {
5615+             local_cache  =  PyMem_Calloc (1 , sizeof (struct  local_type_cache ));
5616+             local_cache -> tp_version_tag  =  assigned_version ;
5617+             _Py_atomic_store_ptr_release (& heap_type -> _spec_cache .local_type_cache , local_cache );
5618+         }
5619+         return  local_cache ;
5620+     } else  if  (flags  &  _Py_TPFLAGS_STATIC_BUILTIN ) {
5621+         PyInterpreterState  * interp  =  _PyInterpreterState_GET ();
5622+         managed_static_type_state  * state  =  managed_static_type_state_get (interp , type );
5623+         return  & state -> local_cache ;
5624+     }
5625+     return  NULL ;
5626+ }
5627+ 
5628+ #define  HASH_NAME (name ) (((Py_ssize_t)(name)) >> 6)
5629+ 
5630+ static  bool 
5631+ try_local_cache_lookup (PyTypeObject  * type , PyObject  * name , PyObject  * * value , unsigned int   * version )
5632+ {
5633+     if  (!can_cache_locally (type , name )) {
5634+         return  false;
5635+     }
5636+ 
5637+     struct  local_type_cache  * local_cache  =  get_local_type_cache (type , 0 );
5638+     if  (local_cache  ==  NULL ) {
5639+         return  false;
5640+     }
5641+ 
5642+     Py_ssize_t  index  =  HASH_NAME (name ) % LOCAL_TYPE_CACHE_SIZE ;
5643+     Py_ssize_t  cur  =  index ;
5644+     do  {
5645+         struct  local_type_cache_entry  * entry  =  & local_cache -> entries [cur ];
5646+         PyObject  * entry_name  =  _Py_atomic_load_ptr_acquire (& entry -> name );
5647+         if  (entry_name  ==  name ) {
5648+             // Value is set as maybe weakref'd, and the per-type cache never replaces 
5649+             // values so we get away w/ a simple incref here. 
5650+             PyObject  * entry_value  =  _Py_atomic_load_ptr_relaxed (& entry -> value );
5651+             if  (entry_value  !=  NULL  &&  !_Py_TryIncref (entry_value )) {
5652+                 break ;
5653+             }
5654+             * value  =  entry_value ;
5655+ 
5656+             if  (version ) {
5657+                 * version  =  _Py_atomic_load_uint_relaxed (& local_cache -> tp_version_tag );
5658+             }
5659+ 
5660+             return  true;
5661+         }
5662+         else  if  (entry_name  ==  NULL ) {
5663+             break ;
5664+         }
5665+         cur  =  (cur  +  LOCAL_TYPE_CACHE_PROBE ) % LOCAL_TYPE_CACHE_SIZE ;
5666+     } while  (cur  !=  index );
5667+     return  false;
5668+ }
5669+ 
5670+ static  bool 
5671+ cache_local_type_lookup (PyTypeObject  * type , PyObject  * name ,
5672+                         PyObject  * res , unsigned int   assigned_version )
5673+ {
5674+     if  (!can_cache_locally (type , name ) || 
5675+         type -> tp_versions_used  >= MAX_VERSIONS_PER_CLASS ) {
5676+         return  false;
5677+     }
5678+ 
5679+     struct  local_type_cache  * local_cache  =  get_local_type_cache (type , assigned_version );
5680+     if  (local_cache  ==  NULL  || 
5681+         local_cache -> cache_count  >= LOCAL_TYPE_CACHE_MAX_ENTRIES ) {
5682+         return  false;
5683+     }
5684+ 
5685+     Py_ssize_t  index  =  HASH_NAME (name ) % LOCAL_TYPE_CACHE_SIZE ;
5686+     Py_ssize_t  cur  =  index ;
5687+     do  {
5688+         struct  local_type_cache_entry  * entry  =  & local_cache -> entries [cur ];
5689+         PyObject  * entry_name  =  _Py_atomic_load_ptr_relaxed (& entry -> name );
5690+         if  (entry_name  ==  NULL ) {
5691+             if  (res  !=  NULL ) {
5692+                 // Reads from other threads can proceed lock-free. 
5693+                 _PyObject_SetMaybeWeakref (res );
5694+             }
5695+ 
5696+             // Value is written first, then name, so when name is read the 
5697+             // value is always present. 
5698+             _Py_atomic_store_ptr_relaxed (& entry -> value , res );
5699+             _Py_atomic_store_ptr_release (& entry -> name , Py_NewRef (name ));
5700+             local_cache -> cache_count ++ ;
5701+             return  true;
5702+         }
5703+         cur  =  (cur  +  LOCAL_TYPE_CACHE_PROBE ) % LOCAL_TYPE_CACHE_SIZE ;
5704+     } while  (cur  !=  index );
5705+     return  false;
5706+ }
5707+ 
5708+ #endif 
5709+ 
55455710/* Internal API to look for a name through the MRO. 
55465711   This returns a strong reference, and doesn't set an exception! 
55475712   If nonzero, version is set to the value of type->tp_version at the time of 
@@ -5551,13 +5716,22 @@ PyObject *
55515716_PyType_LookupRefAndVersion (PyTypeObject  * type , PyObject  * name , unsigned int   * version )
55525717{
55535718    PyObject  * res ;
5719+ 
5720+ #ifdef  Py_GIL_DISABLED 
5721+     // Free-threaded, try a completely lock-free per-type L1 cache first 
5722+     if  (try_local_cache_lookup (type , name , & res , version )) {
5723+         return  res ;
5724+     }
5725+ #endif 
5726+ 
55545727    int  error ;
55555728    PyInterpreterState  * interp  =  _PyInterpreterState_GET ();
5556- 
55575729    unsigned int   h  =  MCACHE_HASH_METHOD (type , name );
55585730    struct  type_cache  * cache  =  get_type_cache ();
55595731    struct  type_cache_entry  * entry  =  & cache -> hashtable [h ];
5732+ 
55605733#ifdef  Py_GIL_DISABLED 
5734+     // Fall back to global L2 cache which requires sequence locks 
55615735    // synchronize-with other writing threads by doing an acquire load on the sequence 
55625736    while  (1 ) {
55635737        uint32_t  sequence  =  _PySeqLock_BeginRead (& entry -> sequence );
@@ -5574,6 +5748,7 @@ _PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *ve
55745748                    if  (version  !=  NULL ) {
55755749                        * version  =  entry_version ;
55765750                    }
5751+ 
55775752                    return  value ;
55785753                }
55795754                Py_XDECREF (value );
@@ -5612,12 +5787,20 @@ _PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *ve
56125787
56135788    int  has_version  =  0 ;
56145789    unsigned int   assigned_version  =  0 ;
5790+ 
5791+     bool  locally_cached  =  false;
56155792    BEGIN_TYPE_LOCK ();
5793+ 
56165794    res  =  find_name_in_mro (type , name , & error );
56175795    if  (MCACHE_CACHEABLE_NAME (name )) {
56185796        has_version  =  assign_version_tag (interp , type );
56195797        assigned_version  =  type -> tp_version_tag ;
56205798    }
5799+ 
5800+ #ifdef  Py_GIL_DISABLED 
5801+     locally_cached  =  has_version  &&  !error  && 
5802+                      cache_local_type_lookup (type , name , res , assigned_version );
5803+ #endif 
56215804    END_TYPE_LOCK ();
56225805
56235806    /* Only put NULL results into cache if there was no error. */ 
@@ -5640,9 +5823,10 @@ _PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *ve
56405823        return  NULL ;
56415824    }
56425825
5643-     if  (has_version ) {
5826+     if  (has_version   &&  ! locally_cached ) {
56445827#if  Py_GIL_DISABLED 
56455828        update_cache_gil_disabled (entry , name , assigned_version , res );
5829+ 
56465830#else 
56475831        PyObject  * old_value  =  update_cache (entry , name , assigned_version , res );
56485832        Py_DECREF (old_value );
@@ -5907,6 +6091,8 @@ type_update_dict(PyTypeObject *type, PyDictObject *dict, PyObject *name,
59076091        return  -1 ;
59086092    }
59096093
6094+     clear_spec_cache_recursive (type );
6095+ 
59106096    /* Clear the VALID_VERSION flag of 'type' and all its 
59116097        subclasses.  This could possibly be unified with the 
59126098        update_subclasses() recursion in update_slot(), but carefully: 
@@ -6164,6 +6350,7 @@ type_dealloc(PyObject *self)
61646350    }
61656351    Py_XDECREF (et -> ht_module );
61666352    PyMem_Free (et -> _ht_tpname );
6353+     clear_spec_cache (type );
61676354#ifdef  Py_GIL_DISABLED 
61686355    assert (et -> unique_id  ==  _Py_INVALID_UNIQUE_ID );
61696356#endif 
0 commit comments