From 0f31cab07ffdb142d74babd715c0fe220b052e82 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Tue, 17 Sep 2024 22:17:17 +0800 Subject: [PATCH 1/3] gh-124165: Add _Py_bit_length64() for 64-bit integer support Introduce a new function _Py_bit_length64() to handle 64-bit integers efficiently. This function utilizes __builtin_clzll() for GCC/Clang and _BitScanReverse64() for MSVC, ensuring compatibility across different platforms. For compilers that do not support these intrinsics, a fallback implementation using a lookup table is provided. This addition is necessary for safely computing the bit length of 64-bit integers, which is particularly important when working with types like Py_ssize_t on platforms where size_t is 64-bit, such as Windows. --- Include/internal/pycore_bitutils.h | 37 ++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/Include/internal/pycore_bitutils.h b/Include/internal/pycore_bitutils.h index 50f69377523818..352112eb98344d 100644 --- a/Include/internal/pycore_bitutils.h +++ b/Include/internal/pycore_bitutils.h @@ -180,6 +180,43 @@ _Py_bit_length(unsigned long x) } +static inline int +_Py_bit_length64(uint64_t x) +{ +#if (defined(__clang__) || defined(__GNUC__)) + if (x != 0) { + // __builtin_clzll() is available since GCC 3.4. + // Undefined behavior for x == 0. + return (int)sizeof(uint64_t) * 8 - __builtin_clzll(x); + } + else { + return 0; + } +#elif defined(_MSC_VER) && defined(_WIN64) + // _BitScanReverse64() is documented to search 64 bits. + unsigned long msb; + if (_BitScanReverse64(&msb, x)) { + return (int)msb + 1; + } + else { + return 0; + } +#else + const int BIT_LENGTH_TABLE[32] = { + 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 + }; + int msb = 0; + while (x >= 32) { + msb += 6; + x >>= 6; + } + msb += BIT_LENGTH_TABLE[x]; + return msb; +#endif +} + + #ifdef __cplusplus } #endif From 2f3b0ac83865c90c721e9890b403d0291a55e868 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Tue, 17 Sep 2024 23:50:59 +0800 Subject: [PATCH 2/3] gh-124165: Add test for _Py_bit_length64() Introduce a new test function, test_bit_length64(), which verifies the correctness of _Py_bit_length64(). --- Modules/_testinternalcapi.c | 42 +++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 0451688a46c75f..235f4755f66c41 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -227,6 +227,47 @@ test_bit_length(PyObject *self, PyObject *Py_UNUSED(args)) } +static int +check_bit_length64(uint64_t x, int expected) +{ + // Use volatile to prevent the compiler to optimize out the whole test + volatile uint64_t u = x; + int len = _Py_bit_length64(u); + if (len != expected) { + PyErr_Format(PyExc_AssertionError, + "_Py_bit_length(%lu) returns %i, expected %i", + x, len, expected); + return -1; + } + return 0; +} + + +static PyObject* +test_bit_length64(PyObject *self, PyObject *Py_UNUSED(args)) +{ +#define CHECK(X, RESULT) \ + do { \ + if (check_bit_length64(X, RESULT) < 0) { \ + return NULL; \ + } \ + } while (0) + + CHECK(0, 0); + CHECK(1, 1); + CHECK(0x1000, 13); + CHECK(0x1234, 13); + CHECK(0x54321, 19); + CHECK(0x7FFFFFFF, 31); + CHECK(0xFFFFFFFF, 32); + CHECK(0x7FFFFFFFFFFFFFFFULL, 63); + CHECK(0xFFFFFFFFFFFFFFFFULL, 64); + Py_RETURN_NONE; + +#undef CHECK +} + + #define TO_PTR(ch) ((void*)(uintptr_t)ch) #define FROM_PTR(ptr) ((uintptr_t)ptr) #define VALUE(key) (1 + ((int)(key) - 'a')) @@ -2056,6 +2097,7 @@ static PyMethodDef module_functions[] = { {"test_bswap", test_bswap, METH_NOARGS}, {"test_popcount", test_popcount, METH_NOARGS}, {"test_bit_length", test_bit_length, METH_NOARGS}, + {"test_bit_length64", test_bit_length64, METH_NOARGS}, {"test_hashtable", test_hashtable, METH_NOARGS}, {"get_config", test_get_config, METH_NOARGS}, {"set_config", test_set_config, METH_O}, From b62ecdbc1abf46bf57bb43693de795da770a1c65 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Tue, 17 Sep 2024 17:54:56 +0800 Subject: [PATCH 3/3] gh-124165: Optimize keep_top_bit() in heapq module by using _Py_bit_length() Optimize the keep_top_bit() function to use _Py_bit_length() instead of manually shifting bits. This allows for more efficient execution on hardware that supports specialized instructions such as bsr on x86. Additionally, the function is now marked as inline, as it is small and only used in one location, improving the potential for inlining by the compiler. --- Modules/_heapqmodule.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Modules/_heapqmodule.c b/Modules/_heapqmodule.c index 80fe9cff98509d..3d7434ddaa5626 100644 --- a/Modules/_heapqmodule.c +++ b/Modules/_heapqmodule.c @@ -12,6 +12,7 @@ annotated by François Pinard, and converted to C by Raymond Hettinger. #include "Python.h" #include "pycore_list.h" // _PyList_ITEMS() +#include "pycore_bitutils.h" // _Py_bit_length(), _Py_bit_length64() #include "clinic/_heapqmodule.c.h" @@ -279,16 +280,15 @@ _heapq_heappushpop_impl(PyObject *module, PyObject *heap, PyObject *item) return returnitem; } -static Py_ssize_t +static inline Py_ssize_t keep_top_bit(Py_ssize_t n) { - int i = 0; - - while (n > 1) { - n >>= 1; - i++; - } - return n << i; + Py_BUILD_ASSERT(sizeof(Py_ssize_t) <= sizeof(uint64_t)); +#if defined(SIZEOF_SIZE_T) && SIZEOF_SIZE_T <= 4 + return (Py_ssize_t)1 << (_Py_bit_length((unsigned long)n) - 1); +#else + return (Py_ssize_t)1 << (_Py_bit_length64((uint64_t)n) - 1); +#endif } /* Cache friendly version of heapify()