Skip to content

Commit 87a5d30

Browse files
committed
Test PyUnstable_Unicode_GET_CACHED_HASH
1 parent 86d41ea commit 87a5d30

File tree

4 files changed

+42
-1
lines changed

4 files changed

+42
-1
lines changed

Doc/c-api/unicode.rst

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1867,7 +1867,7 @@ The following API is deprecated.
18671867
* :c:func:`PyUnicode_GET_LENGTH`
18681868
* :c:func:`PyObject_Length`
18691869
* * ``hash``
1870-
* N/A
1870+
* :c:func:`PyUnstable_Unicode_GET_CACHED_HASH`
18711871
* :c:func:`PyObject_Hash`
18721872
* * ``interned``
18731873
* :c:func:`PyUnicode_CHECK_INTERNED`
@@ -1971,3 +1971,17 @@ Unstable API
19711971
of Python (such as 3.16.0).
19721972
Users are encouraged to test with CPython pre-releases and use CPython
19731973
development channels to discuss replacement APIs.
1974+
1975+
1976+
.. c:function:: Py_hash_t PyUnstable_Unicode_GET_CACHED_HASH(PyObject *str)
1977+
1978+
Return the hash of *str*, as with :c:func:`PyObject_Hash`, if the hash
1979+
has been cached and is immediately available.
1980+
Otherwise, return ``-1`` *without* setting an exception.
1981+
1982+
The *str* argument must be a string (:c:expr:`PyUnicode_Check(obj)`
1983+
must be true). This is not checked.
1984+
1985+
This function never fails with an exception.
1986+
1987+
Note that there are no guarantees on when a Unicode object's hash is cached.

Include/cpython/unicodeobject.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,12 @@ static inline Py_ssize_t PyUnicode_GET_LENGTH(PyObject *op) {
312312
}
313313
#define PyUnicode_GET_LENGTH(op) PyUnicode_GET_LENGTH(_PyObject_CAST(op))
314314

315+
/* Returns the cached hash, or -1 if not cached yet. */
316+
static inline Py_hash_t
317+
PyUnstable_Unicode_GET_CACHED_HASH(PyObject *op) {
318+
return _PyASCIIObject_CAST(op)->hash;
319+
}
320+
315321
/* Write into the canonical representation, this function does not do any sanity
316322
checks and is intended for usage in loops. The caller should cache the
317323
kind and data pointers obtained from other function calls.

Lib/test/test_capi/test_unicode.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1739,6 +1739,20 @@ def test_pep393_utf8_caching_bug(self):
17391739
# Check that the second call returns the same result
17401740
self.assertEqual(getargs_s_hash(s), chr(k).encode() * (i + 1))
17411741

1742+
@support.cpython_only
1743+
@unittest.skipIf(_testcapi is None, 'need _testcapi module')
1744+
def test_GET_CACHED_HASH(self):
1745+
from _testcapi import unicode_GET_CACHED_HASH
1746+
content_bytes = b'some new string'
1747+
# avoid parser interning & constant folding
1748+
obj = str(content_bytes, 'ascii')
1749+
# impl detail: fresh strings do not have cached hash
1750+
self.assertEqual(unicode_GET_CACHED_HASH(obj), -1)
1751+
# impl detail: adding string to a dict caches its hash
1752+
{obj: obj}
1753+
# impl detail: ASCII string hashes are equal to bytes ones
1754+
self.assertEqual(unicode_GET_CACHED_HASH(obj), hash(content_bytes))
1755+
17421756

17431757
class PyUnicodeWriterTest(unittest.TestCase):
17441758
def create_writer(self, size):

Modules/_testcapi/unicode.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,12 @@ unicode_copycharacters(PyObject *self, PyObject *args)
220220
return Py_BuildValue("(Nn)", to_copy, copied);
221221
}
222222

223+
static PyObject*
224+
unicode_GET_CACHED_HASH(PyObject *self, PyObject *arg)
225+
{
226+
return PyLong_FromLong((long)PyUnstable_Unicode_GET_CACHED_HASH(arg));
227+
}
228+
223229

224230
// --- PyUnicodeWriter type -------------------------------------------------
225231

@@ -548,6 +554,7 @@ static PyMethodDef TestMethods[] = {
548554
{"unicode_asucs4copy", unicode_asucs4copy, METH_VARARGS},
549555
{"unicode_asutf8", unicode_asutf8, METH_VARARGS},
550556
{"unicode_copycharacters", unicode_copycharacters, METH_VARARGS},
557+
{"unicode_GET_CACHED_HASH", unicode_GET_CACHED_HASH, METH_O},
551558
{NULL},
552559
};
553560

0 commit comments

Comments
 (0)