From c897d9299db1b0ad27abae74c8dc63b9a15af1e8 Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Wed, 2 Apr 2025 20:01:05 +0530 Subject: [PATCH 1/3] gh-132013: use relaxed atomics in hash of frozenset (#132014) Use relaxed atomics in hash of `frozenset` to fix TSAN warning. --- Objects/setobject.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/Objects/setobject.c b/Objects/setobject.c index 81a34ad5ab12c5..6a73c515b0c4ea 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -718,18 +718,19 @@ _shuffle_bits(Py_uhash_t h) large primes with "interesting bit patterns" and that passed tests for good collision statistics on a variety of problematic datasets including powersets and graph structures (such as David Eppstein's - graph recipes in Lib/test/test_set.py) */ + graph recipes in Lib/test/test_set.py). + + This hash algorithm can be used on either a frozenset or a set. + When it is used on a set, it computes the hash value of the equivalent + frozenset without creating a new frozenset object. */ static Py_hash_t -frozenset_hash(PyObject *self) +frozenset_hash_impl(PyObject *self) { - PySetObject *so = (PySetObject *)self; + PySetObject *so = _PySet_CAST(self); Py_uhash_t hash = 0; setentry *entry; - if (so->hash != -1) - return so->hash; - /* Xor-in shuffled bits from every entry's hash field because xor is commutative and a frozenset hash should be independent of order. @@ -762,7 +763,21 @@ frozenset_hash(PyObject *self) if (hash == (Py_uhash_t)-1) hash = 590923713UL; - so->hash = hash; + return (Py_hash_t)hash; +} + +static Py_hash_t +frozenset_hash(PyObject *self) +{ + PySetObject *so = _PySet_CAST(self); + Py_uhash_t hash; + + if (FT_ATOMIC_LOAD_SSIZE_RELAXED(so->hash) != -1) { + return FT_ATOMIC_LOAD_SSIZE_RELAXED(so->hash); + } + + hash = frozenset_hash_impl(self); + FT_ATOMIC_STORE_SSIZE_RELAXED(so->hash, hash); return hash; } From 3a5b4e3494d374fc345d448e1646f86e4cea8bb6 Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Mon, 7 Apr 2025 21:59:24 +0530 Subject: [PATCH 2/3] gh-132213: use relaxed atomics for set hash (#132215) --- Objects/setobject.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/Objects/setobject.c b/Objects/setobject.c index 6a73c515b0c4ea..e5abfbe5765311 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -400,7 +400,7 @@ set_empty_to_minsize(PySetObject *so) FT_ATOMIC_STORE_SSIZE_RELAXED(so->used, 0); so->mask = PySet_MINSIZE - 1; so->table = so->smalltable; - so->hash = -1; + FT_ATOMIC_STORE_SSIZE_RELAXED(so->hash, -1); } static int @@ -1217,10 +1217,12 @@ set_swap_bodies(PySetObject *a, PySetObject *b) if (PyType_IsSubtype(Py_TYPE(a), &PyFrozenSet_Type) && PyType_IsSubtype(Py_TYPE(b), &PyFrozenSet_Type)) { - h = a->hash; a->hash = b->hash; b->hash = h; + h = FT_ATOMIC_LOAD_SSIZE_RELAXED(a->hash); + FT_ATOMIC_STORE_SSIZE_RELAXED(a->hash, FT_ATOMIC_LOAD_SSIZE_RELAXED(b->hash)); + FT_ATOMIC_STORE_SSIZE_RELAXED(b->hash, h); } else { - a->hash = -1; - b->hash = -1; + FT_ATOMIC_STORE_SSIZE_RELAXED(a->hash, -1); + FT_ATOMIC_STORE_SSIZE_RELAXED(b->hash, -1); } } @@ -2101,9 +2103,9 @@ set_richcompare(PySetObject *v, PyObject *w, int op) case Py_EQ: if (PySet_GET_SIZE(v) != PySet_GET_SIZE(w)) Py_RETURN_FALSE; - if (v->hash != -1 && - ((PySetObject *)w)->hash != -1 && - v->hash != ((PySetObject *)w)->hash) + Py_hash_t v_hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(v->hash); + Py_hash_t w_hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(((PySetObject *)w)->hash); + if (v_hash != -1 && w_hash != -1 && v_hash != w_hash) Py_RETURN_FALSE; return set_issubset(v, w); case Py_NE: From 790081a1dcaee8426b7024fb1ddcd5cd475632fb Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Sat, 12 Apr 2025 23:26:31 +0530 Subject: [PATCH 3/3] revert comment --- Objects/setobject.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/Objects/setobject.c b/Objects/setobject.c index e5abfbe5765311..7c5d6eb34cae08 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -718,11 +718,7 @@ _shuffle_bits(Py_uhash_t h) large primes with "interesting bit patterns" and that passed tests for good collision statistics on a variety of problematic datasets including powersets and graph structures (such as David Eppstein's - graph recipes in Lib/test/test_set.py). - - This hash algorithm can be used on either a frozenset or a set. - When it is used on a set, it computes the hash value of the equivalent - frozenset without creating a new frozenset object. */ + graph recipes in Lib/test/test_set.py). */ static Py_hash_t frozenset_hash_impl(PyObject *self)