Skip to content

Commit f6f1dd5

Browse files
committed
Add type-local type cache
1 parent 303043f commit f6f1dd5

File tree

4 files changed

+201
-14
lines changed

4 files changed

+201
-14
lines changed

Include/cpython/object.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,9 @@ struct _specialization_cache {
259259
PyObject *getitem;
260260
uint32_t getitem_version;
261261
PyObject *init;
262+
#ifdef Py_GIL_DISABLED
263+
struct local_type_cache *local_type_cache;
264+
#endif
262265
};
263266

264267
/* The *real* layout of a type object when allocated on the heap */

Include/internal/pycore_typeobject.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,28 @@ struct type_cache {
7171
struct type_cache_entry hashtable[1 << MCACHE_SIZE_EXP];
7272
};
7373

74+
#ifdef Py_GIL_DISABLED
75+
76+
// Type attribute lookup cache which is type-specific. Only used
77+
// for heap types where we store a small additional cache in free-threaded
78+
// builds which can be accessed without any locking.
79+
#define LOCAL_TYPE_CACHE_SIZE 32
80+
#define LOCAL_TYPE_CACHE_MAX_ENTRIES 24
81+
#define LOCAL_TYPE_CACHE_PROBE 3
82+
83+
struct local_type_cache_entry {
84+
PyObject *name; // reference to exactly a str or NULL
85+
PyObject *value; // owned reference or NULL
86+
};
87+
88+
struct local_type_cache {
89+
unsigned int tp_version_tag;
90+
unsigned int cache_count;
91+
struct local_type_cache_entry entries[LOCAL_TYPE_CACHE_SIZE];
92+
};
93+
94+
#endif
95+
7496
typedef struct {
7597
PyTypeObject *type;
7698
int isbuiltin;
@@ -85,6 +107,9 @@ typedef struct {
85107
are also some diagnostic uses for the list of weakrefs,
86108
so we still keep it. */
87109
PyObject *tp_weaklist;
110+
#ifdef Py_GIL_DISABLED
111+
struct local_type_cache local_cache;
112+
#endif
88113
} managed_static_type_state;
89114

90115
#define TYPE_VERSION_CACHE_SIZE (1<<12) /* Must be a power of 2 */

Lib/test/test_sys.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1731,6 +1731,7 @@ def delx(self): del self.__x
17311731
s = vsize(fmt)
17321732
check(int, s)
17331733
typeid = 'n' if support.Py_GIL_DISABLED else ''
1734+
local_type_cache = 'P' if support.Py_GIL_DISABLED else ''
17341735
# class
17351736
s = vsize(fmt + # PyTypeObject
17361737
'4P' # PyAsyncMethods
@@ -1741,6 +1742,7 @@ def delx(self): del self.__x
17411742
'7P'
17421743
'1PIP' # Specializer cache
17431744
+ typeid # heap type id (free-threaded only)
1745+
+ local_type_cache # local type cache (free-threaded only)
17441746
)
17451747
class newstyleclass(object): pass
17461748
# Separate block for PyDictKeysObject with 8 keys and 5 entries

Objects/typeobject.c

Lines changed: 171 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,9 @@ managed_static_type_state_init(PyInterpreterState *interp, PyTypeObject *self,
255255
assert(state->type == NULL);
256256
state->type = self;
257257
state->isbuiltin = isbuiltin;
258+
#ifdef Py_GIL_DISABLED
259+
state->local_cache.tp_version_tag = self->tp_version_tag;
260+
#endif
258261

259262
/* state->tp_subclasses is left NULL until init_subclasses() sets it. */
260263
/* state->tp_weaklist is left NULL until insert_head() or insert_after()
@@ -290,6 +293,12 @@ managed_static_type_state_clear(PyInterpreterState *interp, PyTypeObject *self,
290293
assert(state->type != NULL);
291294
state->type = NULL;
292295
assert(state->tp_weaklist == NULL); // It was already cleared out.
296+
#ifdef Py_GIL_DISABLED
297+
for (Py_ssize_t i = 0; i<LOCAL_TYPE_CACHE_SIZE; i++) {
298+
Py_CLEAR(state->local_cache.entries[i].name);
299+
state->local_cache.entries[i].value = NULL;
300+
}
301+
#endif
293302

294303
(void)_Py_atomic_add_int64(
295304
&_PyRuntime.types.managed_static.types[full_index].interp_count, -1);
@@ -1021,6 +1030,31 @@ set_version_unlocked(PyTypeObject *tp, unsigned int version)
10211030
#endif
10221031
}
10231032

1033+
static void
1034+
clear_spec_cache(PyTypeObject *type)
1035+
{
1036+
if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
1037+
// This field *must* be invalidated if the type is modified (see the
1038+
// comment on struct _specialization_cache):
1039+
PyHeapTypeObject *heap_type = (PyHeapTypeObject *)type;
1040+
FT_ATOMIC_STORE_PTR_RELAXED(
1041+
heap_type->_spec_cache.getitem, NULL);
1042+
#ifdef Py_GIL_DISABLED
1043+
struct local_type_cache *cache = heap_type->_spec_cache.local_type_cache;
1044+
if (cache != NULL) {
1045+
for (Py_ssize_t i = 0; i<LOCAL_TYPE_CACHE_SIZE; i++) {
1046+
if (cache->entries[i].name != NULL) {
1047+
Py_CLEAR(cache->entries[i].name);
1048+
}
1049+
}
1050+
_PyMem_FreeDelayed(cache);
1051+
FT_ATOMIC_STORE_PTR_RELAXED(
1052+
heap_type->_spec_cache.local_type_cache, NULL);
1053+
}
1054+
#endif
1055+
}
1056+
}
1057+
10241058
static void
10251059
type_modified_unlocked(PyTypeObject *type)
10261060
{
@@ -1083,12 +1117,7 @@ type_modified_unlocked(PyTypeObject *type)
10831117
}
10841118

10851119
set_version_unlocked(type, 0); /* 0 is not a valid version tag */
1086-
if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
1087-
// This field *must* be invalidated if the type is modified (see the
1088-
// comment on struct _specialization_cache):
1089-
FT_ATOMIC_STORE_PTR_RELAXED(
1090-
((PyHeapTypeObject *)type)->_spec_cache.getitem, NULL);
1091-
}
1120+
clear_spec_cache(type);
10921121
}
10931122

10941123
void
@@ -1165,12 +1194,7 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) {
11651194
assert(!(type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN));
11661195
set_version_unlocked(type, 0); /* 0 is not a valid version tag */
11671196
type->tp_versions_used = _Py_ATTR_CACHE_UNUSED;
1168-
if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
1169-
// This field *must* be invalidated if the type is modified (see the
1170-
// comment on struct _specialization_cache):
1171-
FT_ATOMIC_STORE_PTR_RELAXED(
1172-
((PyHeapTypeObject *)type)->_spec_cache.getitem, NULL);
1173-
}
1197+
clear_spec_cache(type);
11741198
}
11751199

11761200
/*
@@ -5542,6 +5566,119 @@ _PyTypes_AfterFork(void)
55425566
#endif
55435567
}
55445568

5569+
#ifdef Py_GIL_DISABLED
5570+
5571+
static bool
5572+
can_cache_locally(PyTypeObject *type, PyObject *name) {
5573+
// We don't cache types in l1 for anything which is a custom get attr, it's likely
5574+
// to have many dynamic attributes (think modules and metaclasses).
5575+
// We also only cache interned or immortal strings.
5576+
return type->tp_getattro == PyObject_GenericGetAttr &&
5577+
(PyUnicode_CHECK_INTERNED(name) != SSTATE_NOT_INTERNED || _Py_IsImmortal(name));
5578+
}
5579+
5580+
static struct local_type_cache *
5581+
get_local_type_cache(PyTypeObject *type, unsigned int assigned_version)
5582+
{
5583+
unsigned long flags = FT_ATOMIC_LOAD_ULONG_RELAXED(type->tp_flags);
5584+
5585+
if (flags & Py_TPFLAGS_HEAPTYPE) {
5586+
PyHeapTypeObject *heap_type = (PyHeapTypeObject *)type;
5587+
struct local_type_cache *local_cache = heap_type->_spec_cache.local_type_cache;
5588+
if (local_cache == NULL && assigned_version) {
5589+
local_cache = PyMem_Calloc(1, sizeof(struct local_type_cache));
5590+
local_cache->tp_version_tag = assigned_version;
5591+
heap_type->_spec_cache.local_type_cache = local_cache;
5592+
}
5593+
return local_cache;
5594+
} else if (flags & _Py_TPFLAGS_STATIC_BUILTIN) {
5595+
PyInterpreterState *interp = _PyInterpreterState_GET();
5596+
managed_static_type_state *state = managed_static_type_state_get(interp, type);
5597+
return &state->local_cache;
5598+
}
5599+
return NULL;
5600+
}
5601+
5602+
#define HASH_NAME(name) (((Py_ssize_t)(name)) >> LOCAL_TYPE_CACHE_PROBE)
5603+
5604+
static bool
5605+
try_local_cache_lookup(PyTypeObject *type, PyObject *name, PyObject **value, unsigned int *version)
5606+
{
5607+
if (!can_cache_locally(type, name)) {
5608+
return false;
5609+
}
5610+
5611+
struct local_type_cache *local_cache = get_local_type_cache(type, 0);
5612+
if (local_cache == NULL) {
5613+
return false;
5614+
}
5615+
5616+
Py_ssize_t index = HASH_NAME(name) % LOCAL_TYPE_CACHE_SIZE;
5617+
Py_ssize_t cur = index;
5618+
do {
5619+
struct local_type_cache_entry *entry = &local_cache->entries[cur];
5620+
PyObject *entry_name = _Py_atomic_load_ptr_acquire(&entry->name);
5621+
if (entry_name == name) {
5622+
// Value is set as maybe weakref'd, and the per-type cache never replaces
5623+
// values so we get away w/ a simple incref here.
5624+
PyObject *entry_value = _Py_atomic_load_ptr_relaxed(&entry->value);
5625+
Py_XINCREF(entry_value);
5626+
*value = entry_value;
5627+
5628+
if (version) {
5629+
*version = local_cache->tp_version_tag;
5630+
}
5631+
5632+
return true;
5633+
}
5634+
else if (entry_name == NULL) {
5635+
break;
5636+
}
5637+
cur = (cur + LOCAL_TYPE_CACHE_PROBE) % LOCAL_TYPE_CACHE_SIZE;
5638+
} while(cur != index);
5639+
return false;
5640+
}
5641+
5642+
static bool
5643+
cache_local_type_lookup(PyTypeObject *type, PyObject *name,
5644+
PyObject *res, unsigned int assigned_version)
5645+
{
5646+
if (!can_cache_locally(type, name) ||
5647+
type->tp_versions_used >= MAX_VERSIONS_PER_CLASS) {
5648+
return false;
5649+
}
5650+
5651+
struct local_type_cache *local_cache = get_local_type_cache(type, assigned_version);
5652+
if (local_cache == NULL ||
5653+
local_cache->cache_count >= LOCAL_TYPE_CACHE_MAX_ENTRIES) {
5654+
return false;
5655+
}
5656+
5657+
Py_ssize_t index = HASH_NAME(name) % LOCAL_TYPE_CACHE_SIZE;
5658+
Py_ssize_t cur = index;
5659+
do {
5660+
struct local_type_cache_entry *entry = &local_cache->entries[cur];
5661+
PyObject *entry_name = _Py_atomic_load_ptr_relaxed(&entry->name);
5662+
if (entry_name == NULL) {
5663+
if (res != NULL) {
5664+
// Reads from other threads can proceed lock-free.
5665+
_PyObject_SetMaybeWeakref(res);
5666+
}
5667+
5668+
// Value is written first, then name, so when name is read the
5669+
// value is always present.
5670+
_Py_atomic_store_ptr_relaxed(&entry->value, res);
5671+
_Py_atomic_store_ptr_release(&entry->name, Py_NewRef(name));
5672+
local_cache->cache_count++;
5673+
return true;
5674+
}
5675+
cur = (cur + LOCAL_TYPE_CACHE_PROBE) % LOCAL_TYPE_CACHE_SIZE;
5676+
} while (cur != index);
5677+
return false;
5678+
}
5679+
5680+
#endif
5681+
55455682
/* Internal API to look for a name through the MRO.
55465683
This returns a strong reference, and doesn't set an exception!
55475684
If nonzero, version is set to the value of type->tp_version at the time of
@@ -5551,13 +5688,22 @@ PyObject *
55515688
_PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *version)
55525689
{
55535690
PyObject *res;
5691+
5692+
#ifdef Py_GIL_DISABLED
5693+
// Free-threaded, try a completely lock-free per-type L1 cache first
5694+
if (try_local_cache_lookup(type, name, &res, version)) {
5695+
return res;
5696+
}
5697+
#endif
5698+
55545699
int error;
55555700
PyInterpreterState *interp = _PyInterpreterState_GET();
5556-
55575701
unsigned int h = MCACHE_HASH_METHOD(type, name);
55585702
struct type_cache *cache = get_type_cache();
55595703
struct type_cache_entry *entry = &cache->hashtable[h];
5704+
55605705
#ifdef Py_GIL_DISABLED
5706+
// Fall back to global L2 cache which requires sequence locks
55615707
// synchronize-with other writing threads by doing an acquire load on the sequence
55625708
while (1) {
55635709
uint32_t sequence = _PySeqLock_BeginRead(&entry->sequence);
@@ -5574,6 +5720,7 @@ _PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *ve
55745720
if (version != NULL) {
55755721
*version = entry_version;
55765722
}
5723+
55775724
return value;
55785725
}
55795726
Py_XDECREF(value);
@@ -5612,12 +5759,20 @@ _PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *ve
56125759

56135760
int has_version = 0;
56145761
unsigned int assigned_version = 0;
5762+
5763+
bool locally_cached = false;
56155764
BEGIN_TYPE_LOCK();
5765+
56165766
res = find_name_in_mro(type, name, &error);
56175767
if (MCACHE_CACHEABLE_NAME(name)) {
56185768
has_version = assign_version_tag(interp, type);
56195769
assigned_version = type->tp_version_tag;
56205770
}
5771+
5772+
#ifdef Py_GIL_DISABLED
5773+
locally_cached = has_version && !error &&
5774+
cache_local_type_lookup(type, name, res, assigned_version);
5775+
#endif
56215776
END_TYPE_LOCK();
56225777

56235778
/* Only put NULL results into cache if there was no error. */
@@ -5640,9 +5795,10 @@ _PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *ve
56405795
return NULL;
56415796
}
56425797

5643-
if (has_version) {
5798+
if (has_version && !locally_cached) {
56445799
#if Py_GIL_DISABLED
56455800
update_cache_gil_disabled(entry, name, assigned_version, res);
5801+
56465802
#else
56475803
PyObject *old_value = update_cache(entry, name, assigned_version, res);
56485804
Py_DECREF(old_value);
@@ -6164,6 +6320,7 @@ type_dealloc(PyObject *self)
61646320
}
61656321
Py_XDECREF(et->ht_module);
61666322
PyMem_Free(et->_ht_tpname);
6323+
clear_spec_cache(type);
61676324
#ifdef Py_GIL_DISABLED
61686325
assert(et->unique_id == _Py_INVALID_UNIQUE_ID);
61696326
#endif

0 commit comments

Comments
 (0)