Skip to content

Commit 58cf4b8

Browse files
authored
Merge pull request #353 from bluedynamics/optimize-c-lookups
Optimize C extension hot paths
2 parents 97f29ea + 394e9c4 commit 58cf4b8

File tree

1 file changed

+71
-33
lines changed

1 file changed

+71
-33
lines changed

src/zope/interface/_zope_interface_coptimizations.c

Lines changed: 71 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ static PyObject *str_uncached_lookupAll = NULL;
128128
static PyObject *str_uncached_subscriptions = NULL;
129129
static PyObject *strchanged = NULL;
130130
static PyObject *str__adapt__ = NULL;
131+
static PyObject *str_CALL_CUSTOM_ADAPT = NULL;
131132

132133
/* Static strings, used to invoke PyObject_GetItem
133134
*
@@ -166,6 +167,7 @@ define_static_strings()
166167
DEFINE_STATIC_STRING(_uncached_subscriptions);
167168
DEFINE_STATIC_STRING(changed);
168169
DEFINE_STATIC_STRING(__adapt__);
170+
DEFINE_STATIC_STRING(_CALL_CUSTOM_ADAPT);
169171
#undef DEFINE_STATIC_STRING
170172

171173
return 0;
@@ -898,10 +900,10 @@ IB__call__(PyObject* self, PyObject* args, PyObject* kwargs)
898900
will *never* be InterfaceBase, we're always subclassed by
899901
InterfaceClass). Instead, we cooperate with InterfaceClass in Python to
900902
set a flag in a new subclass when this is necessary. */
901-
/* Use Py_TYPE() macro instead of direct ob_type struct access.
902-
* Direct access is incompatible with free-threaded Python (PEP 703)
903-
* which uses atomic operations for type lookups. */
904-
if (PyDict_GetItemString(Py_TYPE(self)->tp_dict, "_CALL_CUSTOM_ADAPT")) {
903+
/* Use pre-interned string + Py_TYPE() instead of PyDict_GetItemString
904+
* with a C literal (which creates a temporary Python string each call)
905+
* and direct ob_type access (incompatible with free-threaded Python). */
906+
if (PyDict_GetItem(Py_TYPE(self)->tp_dict, str_CALL_CUSTOM_ADAPT)) {
905907
/* Doesn't matter what the value is. Simply being present is enough. */
906908
adapter = PyObject_CallMethodObjArgs(self, str__adapt__, obj, NULL);
907909
} else {
@@ -1240,9 +1242,12 @@ _getcache(LB* self, PyObject* provided, PyObject* name)
12401242
if (cache == NULL)
12411243
return NULL;
12421244

1243-
if (name != NULL && PyObject_IsTrue(name)) {
1245+
/* Use PyUnicode_GET_LENGTH for a direct struct field access instead
1246+
* of PyObject_IsTrue which dispatches through the generic truth
1247+
* protocol (type slot lookup -> sq_length or nb_bool). */
1248+
if (name != NULL && PyUnicode_GET_LENGTH(name) > 0) {
12441249
PyObject* subcache = _subcache(cache, name); /* strong ref */
1245-
Py_DECREF(cache);
1250+
Py_DECREF(cache); /* release provided-level cache ref */
12461251
cache = subcache;
12471252
}
12481253

@@ -1287,13 +1292,24 @@ _lookup(LB* self,
12871292
/* If `required` is a lazy sequence, it could have arbitrary side-effects,
12881293
such as clearing our caches. So we must not retrieve the cache until
12891294
after resolving it. */
1290-
required = PySequence_Tuple(required);
1291-
if (required == NULL)
1292-
return NULL;
1295+
/* Fast path: skip PySequence_Tuple allocation when required is
1296+
* already a tuple (the common case from Python callers).
1297+
* Py_INCREF so we own a reference in both branches — the else
1298+
* branch gets a new reference from PySequence_Tuple, so this
1299+
* branch must match, allowing a single Py_DECREF below. */
1300+
if (PyTuple_CheckExact(required)) {
1301+
Py_INCREF(required);
1302+
} else {
1303+
required = PySequence_Tuple(required);
1304+
if (required == NULL)
1305+
return NULL;
1306+
}
12931307

12941308
cache = _getcache(self, provided, name); /* strong ref */
1295-
if (cache == NULL)
1309+
if (cache == NULL) {
1310+
Py_DECREF(required);
12961311
return NULL;
1312+
}
12971313

12981314
if (PyTuple_GET_SIZE(required) == 1)
12991315
key = PyTuple_GET_ITEM(required, 0);
@@ -1570,15 +1586,21 @@ _lookupAll(LB* self, PyObject* required, PyObject* provided)
15701586
PyObject *cache, *result;
15711587

15721588
/* resolve before getting cache. See note in _lookup. */
1573-
required = PySequence_Tuple(required);
1574-
if (required == NULL)
1575-
return NULL;
1589+
if (PyTuple_CheckExact(required)) {
1590+
Py_INCREF(required);
1591+
} else {
1592+
required = PySequence_Tuple(required);
1593+
if (required == NULL)
1594+
return NULL;
1595+
}
15761596

15771597
ASSURE_DICT(self->_mcache);
15781598

15791599
cache = _subcache(self->_mcache, provided); /* strong ref */
1580-
if (cache == NULL)
1600+
if (cache == NULL) {
1601+
Py_DECREF(required);
15811602
return NULL;
1603+
}
15821604

15831605
/* Use PyDict_GetItemRef() for a strong reference. See _lookup(). */
15841606
{
@@ -1649,15 +1671,21 @@ _subscriptions(LB* self, PyObject* required, PyObject* provided)
16491671
PyObject *cache, *result;
16501672

16511673
/* resolve before getting cache. See note in _lookup. */
1652-
required = PySequence_Tuple(required);
1653-
if (required == NULL)
1654-
return NULL;
1674+
if (PyTuple_CheckExact(required)) {
1675+
Py_INCREF(required);
1676+
} else {
1677+
required = PySequence_Tuple(required);
1678+
if (required == NULL)
1679+
return NULL;
1680+
}
16551681

16561682
ASSURE_DICT(self->_scache);
16571683

16581684
cache = _subcache(self->_scache, provided); /* strong ref */
1659-
if (cache == NULL)
1685+
if (cache == NULL) {
1686+
Py_DECREF(required);
16601687
return NULL;
1688+
}
16611689

16621690
/* Use PyDict_GetItemRef() for a strong reference. See _lookup(). */
16631691
{
@@ -1890,23 +1918,33 @@ _verify(VB* self)
18901918
PyObject* changed_result;
18911919

18921920
if (self->_verify_ro != NULL && self->_verify_generations != NULL) {
1893-
PyObject* generations;
1894-
int changed;
1895-
1896-
generations = _generations_tuple(self->_verify_ro);
1897-
if (generations == NULL)
1898-
return -1;
1899-
1900-
changed = PyObject_RichCompareBool(
1901-
self->_verify_generations, generations, Py_NE);
1902-
Py_DECREF(generations);
1903-
if (changed == -1)
1904-
return -1;
1905-
1906-
if (changed == 0)
1907-
return 0;
1921+
int i, l;
1922+
l = PyTuple_GET_SIZE(self->_verify_ro);
1923+
1924+
/* Compare each registry's current _generation counter against the
1925+
* snapshot stored in _verify_generations, without allocating a
1926+
* temporary tuple. The old code built a full tuple via
1927+
* _generations_tuple() on every call and then compared it with
1928+
* RichCompareBool. This version compares in-place and exits
1929+
* early on the first mismatch. */
1930+
for (i = 0; i < l; i++) {
1931+
PyObject *reg = PyTuple_GET_ITEM(self->_verify_ro, i);
1932+
PyObject *current_gen = PyObject_GetAttr(reg, str_generation);
1933+
if (current_gen == NULL)
1934+
return -1;
1935+
1936+
PyObject *stored_gen = PyTuple_GET_ITEM(
1937+
self->_verify_generations, i);
1938+
int eq = PyObject_RichCompareBool(current_gen, stored_gen, Py_EQ);
1939+
Py_DECREF(current_gen);
1940+
1941+
if (eq < 0) return -1; /* error */
1942+
if (eq == 0) goto changed; /* mismatch — early exit */
1943+
}
1944+
return 0; /* all match, cache is still valid */
19081945
}
19091946

1947+
changed:
19101948
changed_result =
19111949
PyObject_CallMethodObjArgs(OBJECT(self), strchanged, Py_None, NULL);
19121950
if (changed_result == NULL)

0 commit comments

Comments
 (0)