Skip to content

Commit 51fdcf7

Browse files
committed
Add type-local type cache
1 parent 303043f commit 51fdcf7

File tree

4 files changed

+207
-14
lines changed

4 files changed

+207
-14
lines changed

Include/cpython/object.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,9 @@ struct _specialization_cache {
259259
PyObject *getitem;
260260
uint32_t getitem_version;
261261
PyObject *init;
262+
#ifdef Py_GIL_DISABLED
263+
struct local_type_cache *local_type_cache;
264+
#endif
262265
};
263266

264267
/* The *real* layout of a type object when allocated on the heap */

Include/internal/pycore_typeobject.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,28 @@ struct type_cache {
7171
struct type_cache_entry hashtable[1 << MCACHE_SIZE_EXP];
7272
};
7373

74+
#ifdef Py_GIL_DISABLED
75+
76+
// Type attribute lookup cache which is type-specific. Only used
77+
// for heap types where we store a small additional cache in free-threaded
78+
// builds which can be accessed without any locking.
79+
#define LOCAL_TYPE_CACHE_SIZE 32
80+
#define LOCAL_TYPE_CACHE_MAX_ENTRIES 24
81+
#define LOCAL_TYPE_CACHE_PROBE 3
82+
83+
struct local_type_cache_entry {
84+
PyObject *name; // reference to exactly a str or NULL
85+
PyObject *value; // owned reference or NULL
86+
};
87+
88+
struct local_type_cache {
89+
unsigned int tp_version_tag;
90+
unsigned int cache_count;
91+
struct local_type_cache_entry entries[LOCAL_TYPE_CACHE_SIZE];
92+
};
93+
94+
#endif
95+
7496
typedef struct {
7597
PyTypeObject *type;
7698
int isbuiltin;
@@ -85,6 +107,9 @@ typedef struct {
85107
are also some diagnostic uses for the list of weakrefs,
86108
so we still keep it. */
87109
PyObject *tp_weaklist;
110+
#ifdef Py_GIL_DISABLED
111+
struct local_type_cache local_cache;
112+
#endif
88113
} managed_static_type_state;
89114

90115
#define TYPE_VERSION_CACHE_SIZE (1<<12) /* Must be a power of 2 */

Lib/test/test_sys.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1731,6 +1731,7 @@ def delx(self): del self.__x
17311731
s = vsize(fmt)
17321732
check(int, s)
17331733
typeid = 'n' if support.Py_GIL_DISABLED else ''
1734+
local_type_cache = 'P' if support.Py_GIL_DISABLED else ''
17341735
# class
17351736
s = vsize(fmt + # PyTypeObject
17361737
'4P' # PyAsyncMethods
@@ -1741,6 +1742,7 @@ def delx(self): del self.__x
17411742
'7P'
17421743
'1PIP' # Specializer cache
17431744
+ typeid # heap type id (free-threaded only)
1745+
+ local_type_cache # local type cache (free-threaded only)
17441746
)
17451747
class newstyleclass(object): pass
17461748
# Separate block for PyDictKeysObject with 8 keys and 5 entries

Objects/typeobject.c

Lines changed: 177 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,9 @@ managed_static_type_state_init(PyInterpreterState *interp, PyTypeObject *self,
255255
assert(state->type == NULL);
256256
state->type = self;
257257
state->isbuiltin = isbuiltin;
258+
#ifdef Py_GIL_DISABLED
259+
state->local_cache.tp_version_tag = self->tp_version_tag;
260+
#endif
258261

259262
/* state->tp_subclasses is left NULL until init_subclasses() sets it. */
260263
/* state->tp_weaklist is left NULL until insert_head() or insert_after()
@@ -290,6 +293,12 @@ managed_static_type_state_clear(PyInterpreterState *interp, PyTypeObject *self,
290293
assert(state->type != NULL);
291294
state->type = NULL;
292295
assert(state->tp_weaklist == NULL); // It was already cleared out.
296+
#ifdef Py_GIL_DISABLED
297+
for (Py_ssize_t i = 0; i<LOCAL_TYPE_CACHE_SIZE; i++) {
298+
Py_CLEAR(state->local_cache.entries[i].name);
299+
state->local_cache.entries[i].value = NULL;
300+
}
301+
#endif
293302

294303
(void)_Py_atomic_add_int64(
295304
&_PyRuntime.types.managed_static.types[full_index].interp_count, -1);
@@ -1021,6 +1030,37 @@ set_version_unlocked(PyTypeObject *tp, unsigned int version)
10211030
#endif
10221031
}
10231032

1033+
static void
1034+
clear_spec_cache(PyTypeObject *type)
1035+
{
1036+
if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
1037+
// This field *must* be invalidated if the type is modified (see the
1038+
// comment on struct _specialization_cache):
1039+
PyHeapTypeObject *heap_type = (PyHeapTypeObject *)type;
1040+
FT_ATOMIC_STORE_PTR_RELAXED(
1041+
heap_type->_spec_cache.getitem, NULL);
1042+
#ifdef Py_GIL_DISABLED
1043+
struct local_type_cache *cache = heap_type->_spec_cache.local_type_cache;
1044+
if (cache != NULL) {
1045+
for (Py_ssize_t i = 0; i<LOCAL_TYPE_CACHE_SIZE; i++) {
1046+
PyObject *name = cache->entries[i].name;
1047+
if (name != NULL) {
1048+
// Readers can be racing with the local type cache when a value is being replaced
1049+
// in the type, and they can try and incref it after it has been decref'd, so
1050+
// we eagerly clear these out.
1051+
_Py_atomic_store_ptr_release(&cache->entries[i].name, Py_NewRef(name));
1052+
_Py_atomic_store_ptr_relaxed(&cache->entries[i].value, NULL);
1053+
Py_DECREF(name);
1054+
}
1055+
}
1056+
_PyMem_FreeDelayed(cache);
1057+
FT_ATOMIC_STORE_PTR_RELAXED(
1058+
heap_type->_spec_cache.local_type_cache, NULL);
1059+
}
1060+
#endif
1061+
}
1062+
}
1063+
10241064
static void
10251065
type_modified_unlocked(PyTypeObject *type)
10261066
{
@@ -1083,12 +1123,7 @@ type_modified_unlocked(PyTypeObject *type)
10831123
}
10841124

10851125
set_version_unlocked(type, 0); /* 0 is not a valid version tag */
1086-
if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
1087-
// This field *must* be invalidated if the type is modified (see the
1088-
// comment on struct _specialization_cache):
1089-
FT_ATOMIC_STORE_PTR_RELAXED(
1090-
((PyHeapTypeObject *)type)->_spec_cache.getitem, NULL);
1091-
}
1126+
clear_spec_cache(type);
10921127
}
10931128

10941129
void
@@ -1165,12 +1200,7 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) {
11651200
assert(!(type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN));
11661201
set_version_unlocked(type, 0); /* 0 is not a valid version tag */
11671202
type->tp_versions_used = _Py_ATTR_CACHE_UNUSED;
1168-
if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
1169-
// This field *must* be invalidated if the type is modified (see the
1170-
// comment on struct _specialization_cache):
1171-
FT_ATOMIC_STORE_PTR_RELAXED(
1172-
((PyHeapTypeObject *)type)->_spec_cache.getitem, NULL);
1173-
}
1203+
clear_spec_cache(type);
11741204
}
11751205

11761206
/*
@@ -5542,6 +5572,119 @@ _PyTypes_AfterFork(void)
55425572
#endif
55435573
}
55445574

5575+
#ifdef Py_GIL_DISABLED
5576+
5577+
static bool
5578+
can_cache_locally(PyTypeObject *type, PyObject *name) {
5579+
// We don't cache types in l1 for anything which is a custom get attr, it's likely
5580+
// to have many dynamic attributes (think modules and metaclasses).
5581+
// We also only cache interned or immortal strings.
5582+
return type->tp_getattro == PyObject_GenericGetAttr &&
5583+
(PyUnicode_CHECK_INTERNED(name) != SSTATE_NOT_INTERNED || _Py_IsImmortal(name));
5584+
}
5585+
5586+
static struct local_type_cache *
5587+
get_local_type_cache(PyTypeObject *type, unsigned int assigned_version)
5588+
{
5589+
unsigned long flags = FT_ATOMIC_LOAD_ULONG_RELAXED(type->tp_flags);
5590+
5591+
if (flags & Py_TPFLAGS_HEAPTYPE) {
5592+
PyHeapTypeObject *heap_type = (PyHeapTypeObject *)type;
5593+
struct local_type_cache *local_cache = heap_type->_spec_cache.local_type_cache;
5594+
if (local_cache == NULL && assigned_version) {
5595+
local_cache = PyMem_Calloc(1, sizeof(struct local_type_cache));
5596+
local_cache->tp_version_tag = assigned_version;
5597+
heap_type->_spec_cache.local_type_cache = local_cache;
5598+
}
5599+
return local_cache;
5600+
} else if (flags & _Py_TPFLAGS_STATIC_BUILTIN) {
5601+
PyInterpreterState *interp = _PyInterpreterState_GET();
5602+
managed_static_type_state *state = managed_static_type_state_get(interp, type);
5603+
return &state->local_cache;
5604+
}
5605+
return NULL;
5606+
}
5607+
5608+
#define HASH_NAME(name) (((Py_ssize_t)(name)) >> LOCAL_TYPE_CACHE_PROBE)
5609+
5610+
static bool
5611+
try_local_cache_lookup(PyTypeObject *type, PyObject *name, PyObject **value, unsigned int *version)
5612+
{
5613+
if (!can_cache_locally(type, name)) {
5614+
return false;
5615+
}
5616+
5617+
struct local_type_cache *local_cache = get_local_type_cache(type, 0);
5618+
if (local_cache == NULL) {
5619+
return false;
5620+
}
5621+
5622+
Py_ssize_t index = HASH_NAME(name) % LOCAL_TYPE_CACHE_SIZE;
5623+
Py_ssize_t cur = index;
5624+
do {
5625+
struct local_type_cache_entry *entry = &local_cache->entries[cur];
5626+
PyObject *entry_name = _Py_atomic_load_ptr_acquire(&entry->name);
5627+
if (entry_name == name) {
5628+
// Value is set as maybe weakref'd, and the per-type cache never replaces
5629+
// values so we get away w/ a simple incref here.
5630+
PyObject *entry_value = _Py_atomic_load_ptr_relaxed(&entry->value);
5631+
Py_XINCREF(entry_value);
5632+
*value = entry_value;
5633+
5634+
if (version) {
5635+
*version = local_cache->tp_version_tag;
5636+
}
5637+
5638+
return true;
5639+
}
5640+
else if (entry_name == NULL) {
5641+
break;
5642+
}
5643+
cur = (cur + LOCAL_TYPE_CACHE_PROBE) % LOCAL_TYPE_CACHE_SIZE;
5644+
} while(cur != index);
5645+
return false;
5646+
}
5647+
5648+
static bool
5649+
cache_local_type_lookup(PyTypeObject *type, PyObject *name,
5650+
PyObject *res, unsigned int assigned_version)
5651+
{
5652+
if (!can_cache_locally(type, name) ||
5653+
type->tp_versions_used >= MAX_VERSIONS_PER_CLASS) {
5654+
return false;
5655+
}
5656+
5657+
struct local_type_cache *local_cache = get_local_type_cache(type, assigned_version);
5658+
if (local_cache == NULL ||
5659+
local_cache->cache_count >= LOCAL_TYPE_CACHE_MAX_ENTRIES) {
5660+
return false;
5661+
}
5662+
5663+
Py_ssize_t index = HASH_NAME(name) % LOCAL_TYPE_CACHE_SIZE;
5664+
Py_ssize_t cur = index;
5665+
do {
5666+
struct local_type_cache_entry *entry = &local_cache->entries[cur];
5667+
PyObject *entry_name = _Py_atomic_load_ptr_relaxed(&entry->name);
5668+
if (entry_name == NULL) {
5669+
if (res != NULL) {
5670+
// Reads from other threads can proceed lock-free.
5671+
_PyObject_SetMaybeWeakref(res);
5672+
}
5673+
5674+
// Value is written first, then name, so when name is read the
5675+
// value is always present.
5676+
_Py_atomic_store_ptr_relaxed(&entry->value, res);
5677+
_Py_atomic_store_ptr_release(&entry->name, Py_NewRef(name));
5678+
local_cache->cache_count++;
5679+
return true;
5680+
}
5681+
cur = (cur + LOCAL_TYPE_CACHE_PROBE) % LOCAL_TYPE_CACHE_SIZE;
5682+
} while (cur != index);
5683+
return false;
5684+
}
5685+
5686+
#endif
5687+
55455688
/* Internal API to look for a name through the MRO.
55465689
This returns a strong reference, and doesn't set an exception!
55475690
If nonzero, version is set to the value of type->tp_version at the time of
@@ -5551,13 +5694,22 @@ PyObject *
55515694
_PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *version)
55525695
{
55535696
PyObject *res;
5697+
5698+
#ifdef Py_GIL_DISABLED
5699+
// Free-threaded, try a completely lock-free per-type L1 cache first
5700+
if (try_local_cache_lookup(type, name, &res, version)) {
5701+
return res;
5702+
}
5703+
#endif
5704+
55545705
int error;
55555706
PyInterpreterState *interp = _PyInterpreterState_GET();
5556-
55575707
unsigned int h = MCACHE_HASH_METHOD(type, name);
55585708
struct type_cache *cache = get_type_cache();
55595709
struct type_cache_entry *entry = &cache->hashtable[h];
5710+
55605711
#ifdef Py_GIL_DISABLED
5712+
// Fall back to global L2 cache which requires sequence locks
55615713
// synchronize-with other writing threads by doing an acquire load on the sequence
55625714
while (1) {
55635715
uint32_t sequence = _PySeqLock_BeginRead(&entry->sequence);
@@ -5574,6 +5726,7 @@ _PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *ve
55745726
if (version != NULL) {
55755727
*version = entry_version;
55765728
}
5729+
55775730
return value;
55785731
}
55795732
Py_XDECREF(value);
@@ -5612,12 +5765,20 @@ _PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *ve
56125765

56135766
int has_version = 0;
56145767
unsigned int assigned_version = 0;
5768+
5769+
bool locally_cached = false;
56155770
BEGIN_TYPE_LOCK();
5771+
56165772
res = find_name_in_mro(type, name, &error);
56175773
if (MCACHE_CACHEABLE_NAME(name)) {
56185774
has_version = assign_version_tag(interp, type);
56195775
assigned_version = type->tp_version_tag;
56205776
}
5777+
5778+
#ifdef Py_GIL_DISABLED
5779+
locally_cached = has_version && !error &&
5780+
cache_local_type_lookup(type, name, res, assigned_version);
5781+
#endif
56215782
END_TYPE_LOCK();
56225783

56235784
/* Only put NULL results into cache if there was no error. */
@@ -5640,9 +5801,10 @@ _PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *ve
56405801
return NULL;
56415802
}
56425803

5643-
if (has_version) {
5804+
if (has_version && !locally_cached) {
56445805
#if Py_GIL_DISABLED
56455806
update_cache_gil_disabled(entry, name, assigned_version, res);
5807+
56465808
#else
56475809
PyObject *old_value = update_cache(entry, name, assigned_version, res);
56485810
Py_DECREF(old_value);
@@ -6164,6 +6326,7 @@ type_dealloc(PyObject *self)
61646326
}
61656327
Py_XDECREF(et->ht_module);
61666328
PyMem_Free(et->_ht_tpname);
6329+
clear_spec_cache(type);
61676330
#ifdef Py_GIL_DISABLED
61686331
assert(et->unique_id == _Py_INVALID_UNIQUE_ID);
61696332
#endif

0 commit comments

Comments
 (0)