Skip to content

Commit e7db011

Browse files
committed
Add type-local type cache
1 parent bcc9a5d commit e7db011

File tree

4 files changed

+231
-14
lines changed

4 files changed

+231
-14
lines changed

Include/cpython/object.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,9 @@ struct _specialization_cache {
259259
PyObject *getitem;
260260
uint32_t getitem_version;
261261
PyObject *init;
262+
#ifdef Py_GIL_DISABLED
263+
struct local_type_cache *local_type_cache;
264+
#endif
262265
};
263266

264267
/* The *real* layout of a type object when allocated on the heap */

Include/internal/pycore_typeobject.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,28 @@ struct type_cache {
7171
struct type_cache_entry hashtable[1 << MCACHE_SIZE_EXP];
7272
};
7373

74+
#ifdef Py_GIL_DISABLED
75+
76+
// Type attribute lookup cache which is type-specific. Only used
77+
// for heap types where we store a small additional cache in free-threaded
78+
// builds which can be accessed without any locking.
79+
#define LOCAL_TYPE_CACHE_SIZE 32
80+
#define LOCAL_TYPE_CACHE_MAX_ENTRIES 24
81+
#define LOCAL_TYPE_CACHE_PROBE 3
82+
83+
struct local_type_cache_entry {
84+
PyObject *name; // reference to exactly a str or NULL
85+
PyObject *value; // owned reference or NULL
86+
};
87+
88+
struct local_type_cache {
89+
unsigned int tp_version_tag;
90+
unsigned int cache_count;
91+
struct local_type_cache_entry entries[LOCAL_TYPE_CACHE_SIZE];
92+
};
93+
94+
#endif
95+
7496
typedef struct {
7597
PyTypeObject *type;
7698
int isbuiltin;
@@ -85,6 +107,9 @@ typedef struct {
85107
are also some diagnostic uses for the list of weakrefs,
86108
so we still keep it. */
87109
PyObject *tp_weaklist;
110+
#ifdef Py_GIL_DISABLED
111+
struct local_type_cache local_cache;
112+
#endif
88113
} managed_static_type_state;
89114

90115
#define TYPE_VERSION_CACHE_SIZE (1<<12) /* Must be a power of 2 */

Lib/test/test_sys.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1731,6 +1731,7 @@ def delx(self): del self.__x
17311731
s = vsize(fmt)
17321732
check(int, s)
17331733
typeid = 'n' if support.Py_GIL_DISABLED else ''
1734+
local_type_cache = 'P' if support.Py_GIL_DISABLED else ''
17341735
# class
17351736
s = vsize(fmt + # PyTypeObject
17361737
'4P' # PyAsyncMethods
@@ -1741,6 +1742,7 @@ def delx(self): del self.__x
17411742
'7P'
17421743
'1PIP' # Specializer cache
17431744
+ typeid # heap type id (free-threaded only)
1745+
+ local_type_cache # local type cache (free-threaded only)
17441746
)
17451747
class newstyleclass(object): pass
17461748
# Separate block for PyDictKeysObject with 8 keys and 5 entries

Objects/typeobject.c

Lines changed: 201 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,9 @@ managed_static_type_state_init(PyInterpreterState *interp, PyTypeObject *self,
255255
assert(state->type == NULL);
256256
state->type = self;
257257
state->isbuiltin = isbuiltin;
258+
#ifdef Py_GIL_DISABLED
259+
state->local_cache.tp_version_tag = self->tp_version_tag;
260+
#endif
258261

259262
/* state->tp_subclasses is left NULL until init_subclasses() sets it. */
260263
/* state->tp_weaklist is left NULL until insert_head() or insert_after()
@@ -290,6 +293,12 @@ managed_static_type_state_clear(PyInterpreterState *interp, PyTypeObject *self,
290293
assert(state->type != NULL);
291294
state->type = NULL;
292295
assert(state->tp_weaklist == NULL); // It was already cleared out.
296+
#ifdef Py_GIL_DISABLED
297+
for (Py_ssize_t i = 0; i<LOCAL_TYPE_CACHE_SIZE; i++) {
298+
Py_CLEAR(state->local_cache.entries[i].name);
299+
state->local_cache.entries[i].value = NULL;
300+
}
301+
#endif
293302

294303
(void)_Py_atomic_add_int64(
295304
&_PyRuntime.types.managed_static.types[full_index].interp_count, -1);
@@ -1021,6 +1030,57 @@ set_version_unlocked(PyTypeObject *tp, unsigned int version)
10211030
#endif
10221031
}
10231032

1033+
static void
1034+
clear_spec_cache(PyTypeObject *type)
1035+
{
1036+
if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
1037+
// This field *must* be invalidated if the type is modified (see the
1038+
// comment on struct _specialization_cache):
1039+
PyHeapTypeObject *heap_type = (PyHeapTypeObject *)type;
1040+
FT_ATOMIC_STORE_PTR_RELAXED(
1041+
heap_type->_spec_cache.getitem, NULL);
1042+
#ifdef Py_GIL_DISABLED
1043+
struct local_type_cache *cache = heap_type->_spec_cache.local_type_cache;
1044+
if (cache != NULL) {
1045+
FT_ATOMIC_STORE_PTR_RELAXED(
1046+
heap_type->_spec_cache.local_type_cache, NULL);
1047+
1048+
for (Py_ssize_t i = 0; i<LOCAL_TYPE_CACHE_SIZE; i++) {
1049+
PyObject *name = _Py_atomic_load_ptr_relaxed(&cache->entries[i].name);
1050+
if (name != NULL) {
1051+
_Py_atomic_store_ptr_release(&cache->entries[i].name, NULL);
1052+
Py_DECREF(name);
1053+
}
1054+
}
1055+
_PyMem_FreeDelayed(cache);
1056+
}
1057+
#endif
1058+
}
1059+
}
1060+
1061+
static void
1062+
clear_spec_cache_recursive(PyTypeObject *type)
1063+
{
1064+
if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
1065+
clear_spec_cache(type);
1066+
PyObject *subclasses = lookup_tp_subclasses(type);
1067+
if (subclasses != NULL) {
1068+
assert(PyDict_CheckExact(subclasses));
1069+
1070+
Py_ssize_t i = 0;
1071+
PyObject *ref;
1072+
while (PyDict_Next(subclasses, &i, NULL, &ref)) {
1073+
PyTypeObject *subclass = type_from_ref(ref);
1074+
if (subclass == NULL) {
1075+
continue;
1076+
}
1077+
clear_spec_cache(subclass);
1078+
Py_DECREF(subclass);
1079+
}
1080+
}
1081+
}
1082+
}
1083+
10241084
static void
10251085
type_modified_unlocked(PyTypeObject *type)
10261086
{
@@ -1083,12 +1143,7 @@ type_modified_unlocked(PyTypeObject *type)
10831143
}
10841144

10851145
set_version_unlocked(type, 0); /* 0 is not a valid version tag */
1086-
if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
1087-
// This field *must* be invalidated if the type is modified (see the
1088-
// comment on struct _specialization_cache):
1089-
FT_ATOMIC_STORE_PTR_RELAXED(
1090-
((PyHeapTypeObject *)type)->_spec_cache.getitem, NULL);
1091-
}
1146+
clear_spec_cache(type);
10921147
}
10931148

10941149
void
@@ -1165,12 +1220,7 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) {
11651220
assert(!(type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN));
11661221
set_version_unlocked(type, 0); /* 0 is not a valid version tag */
11671222
type->tp_versions_used = _Py_ATTR_CACHE_UNUSED;
1168-
if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
1169-
// This field *must* be invalidated if the type is modified (see the
1170-
// comment on struct _specialization_cache):
1171-
FT_ATOMIC_STORE_PTR_RELAXED(
1172-
((PyHeapTypeObject *)type)->_spec_cache.getitem, NULL);
1173-
}
1223+
clear_spec_cache(type);
11741224
}
11751225

11761226
/*
@@ -5542,6 +5592,121 @@ _PyTypes_AfterFork(void)
55425592
#endif
55435593
}
55445594

5595+
#ifdef Py_GIL_DISABLED
5596+
5597+
static bool
5598+
can_cache_locally(PyTypeObject *type, PyObject *name) {
5599+
// We don't cache types in l1 for anything which is a custom get attr, it's likely
5600+
// to have many dynamic attributes (think modules and metaclasses).
5601+
// We also only cache interned or immortal strings.
5602+
return type->tp_getattro == PyObject_GenericGetAttr &&
5603+
(PyUnicode_CHECK_INTERNED(name) != SSTATE_NOT_INTERNED || _Py_IsImmortal(name));
5604+
}
5605+
5606+
static inline struct local_type_cache *
5607+
get_local_type_cache(PyTypeObject *type, unsigned int assigned_version)
5608+
{
5609+
unsigned long flags = FT_ATOMIC_LOAD_ULONG_RELAXED(type->tp_flags);
5610+
5611+
if (flags & Py_TPFLAGS_HEAPTYPE) {
5612+
PyHeapTypeObject *heap_type = (PyHeapTypeObject *)type;
5613+
struct local_type_cache *local_cache = _Py_atomic_load_ptr_acquire(&heap_type->_spec_cache.local_type_cache);
5614+
if (local_cache == NULL && assigned_version) {
5615+
local_cache = PyMem_Calloc(1, sizeof(struct local_type_cache));
5616+
local_cache->tp_version_tag = assigned_version;
5617+
_Py_atomic_store_ptr_release(&heap_type->_spec_cache.local_type_cache, local_cache);
5618+
}
5619+
return local_cache;
5620+
} else if (flags & _Py_TPFLAGS_STATIC_BUILTIN) {
5621+
PyInterpreterState *interp = _PyInterpreterState_GET();
5622+
managed_static_type_state *state = managed_static_type_state_get(interp, type);
5623+
return &state->local_cache;
5624+
}
5625+
return NULL;
5626+
}
5627+
5628+
#define HASH_NAME(name) (((Py_ssize_t)(name)) >> 6)
5629+
5630+
static bool
5631+
try_local_cache_lookup(PyTypeObject *type, PyObject *name, PyObject **value, unsigned int *version)
5632+
{
5633+
if (!can_cache_locally(type, name)) {
5634+
return false;
5635+
}
5636+
5637+
struct local_type_cache *local_cache = get_local_type_cache(type, 0);
5638+
if (local_cache == NULL) {
5639+
return false;
5640+
}
5641+
5642+
Py_ssize_t index = HASH_NAME(name) % LOCAL_TYPE_CACHE_SIZE;
5643+
Py_ssize_t cur = index;
5644+
do {
5645+
struct local_type_cache_entry *entry = &local_cache->entries[cur];
5646+
PyObject *entry_name = _Py_atomic_load_ptr_acquire(&entry->name);
5647+
if (entry_name == name) {
5648+
// Value is set as maybe weakref'd, and the per-type cache never replaces
5649+
// values so we get away w/ a simple incref here.
5650+
PyObject *entry_value = _Py_atomic_load_ptr_relaxed(&entry->value);
5651+
if (entry_value != NULL && !_Py_TryIncref(entry_value)) {
5652+
break;
5653+
}
5654+
*value = entry_value;
5655+
5656+
if (version) {
5657+
*version = _Py_atomic_load_uint_relaxed(&local_cache->tp_version_tag);
5658+
}
5659+
5660+
return true;
5661+
}
5662+
else if (entry_name == NULL) {
5663+
break;
5664+
}
5665+
cur = (cur + LOCAL_TYPE_CACHE_PROBE) % LOCAL_TYPE_CACHE_SIZE;
5666+
} while (cur != index);
5667+
return false;
5668+
}
5669+
5670+
static bool
5671+
cache_local_type_lookup(PyTypeObject *type, PyObject *name,
5672+
PyObject *res, unsigned int assigned_version)
5673+
{
5674+
if (!can_cache_locally(type, name) ||
5675+
type->tp_versions_used >= MAX_VERSIONS_PER_CLASS) {
5676+
return false;
5677+
}
5678+
5679+
struct local_type_cache *local_cache = get_local_type_cache(type, assigned_version);
5680+
if (local_cache == NULL ||
5681+
local_cache->cache_count >= LOCAL_TYPE_CACHE_MAX_ENTRIES) {
5682+
return false;
5683+
}
5684+
5685+
Py_ssize_t index = HASH_NAME(name) % LOCAL_TYPE_CACHE_SIZE;
5686+
Py_ssize_t cur = index;
5687+
do {
5688+
struct local_type_cache_entry *entry = &local_cache->entries[cur];
5689+
PyObject *entry_name = _Py_atomic_load_ptr_relaxed(&entry->name);
5690+
if (entry_name == NULL) {
5691+
if (res != NULL) {
5692+
// Reads from other threads can proceed lock-free.
5693+
_PyObject_SetMaybeWeakref(res);
5694+
}
5695+
5696+
// Value is written first, then name, so when name is read the
5697+
// value is always present.
5698+
_Py_atomic_store_ptr_relaxed(&entry->value, res);
5699+
_Py_atomic_store_ptr_release(&entry->name, Py_NewRef(name));
5700+
local_cache->cache_count++;
5701+
return true;
5702+
}
5703+
cur = (cur + LOCAL_TYPE_CACHE_PROBE) % LOCAL_TYPE_CACHE_SIZE;
5704+
} while (cur != index);
5705+
return false;
5706+
}
5707+
5708+
#endif
5709+
55455710
/* Internal API to look for a name through the MRO.
55465711
This returns a strong reference, and doesn't set an exception!
55475712
If nonzero, version is set to the value of type->tp_version at the time of
@@ -5551,13 +5716,22 @@ PyObject *
55515716
_PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *version)
55525717
{
55535718
PyObject *res;
5719+
5720+
#ifdef Py_GIL_DISABLED
5721+
// Free-threaded, try a completely lock-free per-type L1 cache first
5722+
if (try_local_cache_lookup(type, name, &res, version)) {
5723+
return res;
5724+
}
5725+
#endif
5726+
55545727
int error;
55555728
PyInterpreterState *interp = _PyInterpreterState_GET();
5556-
55575729
unsigned int h = MCACHE_HASH_METHOD(type, name);
55585730
struct type_cache *cache = get_type_cache();
55595731
struct type_cache_entry *entry = &cache->hashtable[h];
5732+
55605733
#ifdef Py_GIL_DISABLED
5734+
// Fall back to global L2 cache which requires sequence locks
55615735
// synchronize-with other writing threads by doing an acquire load on the sequence
55625736
while (1) {
55635737
uint32_t sequence = _PySeqLock_BeginRead(&entry->sequence);
@@ -5574,6 +5748,7 @@ _PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *ve
55745748
if (version != NULL) {
55755749
*version = entry_version;
55765750
}
5751+
55775752
return value;
55785753
}
55795754
Py_XDECREF(value);
@@ -5612,12 +5787,20 @@ _PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *ve
56125787

56135788
int has_version = 0;
56145789
unsigned int assigned_version = 0;
5790+
5791+
bool locally_cached = false;
56155792
BEGIN_TYPE_LOCK();
5793+
56165794
res = find_name_in_mro(type, name, &error);
56175795
if (MCACHE_CACHEABLE_NAME(name)) {
56185796
has_version = assign_version_tag(interp, type);
56195797
assigned_version = type->tp_version_tag;
56205798
}
5799+
5800+
#ifdef Py_GIL_DISABLED
5801+
locally_cached = has_version && !error &&
5802+
cache_local_type_lookup(type, name, res, assigned_version);
5803+
#endif
56215804
END_TYPE_LOCK();
56225805

56235806
/* Only put NULL results into cache if there was no error. */
@@ -5640,9 +5823,10 @@ _PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *ve
56405823
return NULL;
56415824
}
56425825

5643-
if (has_version) {
5826+
if (has_version && !locally_cached) {
56445827
#if Py_GIL_DISABLED
56455828
update_cache_gil_disabled(entry, name, assigned_version, res);
5829+
56465830
#else
56475831
PyObject *old_value = update_cache(entry, name, assigned_version, res);
56485832
Py_DECREF(old_value);
@@ -5907,6 +6091,8 @@ type_update_dict(PyTypeObject *type, PyDictObject *dict, PyObject *name,
59076091
return -1;
59086092
}
59096093

6094+
clear_spec_cache_recursive(type);
6095+
59106096
/* Clear the VALID_VERSION flag of 'type' and all its
59116097
subclasses. This could possibly be unified with the
59126098
update_subclasses() recursion in update_slot(), but carefully:
@@ -6164,6 +6350,7 @@ type_dealloc(PyObject *self)
61646350
}
61656351
Py_XDECREF(et->ht_module);
61666352
PyMem_Free(et->_ht_tpname);
6353+
clear_spec_cache(type);
61676354
#ifdef Py_GIL_DISABLED
61686355
assert(et->unique_id == _Py_INVALID_UNIQUE_ID);
61696356
#endif

0 commit comments

Comments
 (0)