diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 324305417a600..5f991312b7e10 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -154,7 +154,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.api.types.is_file_like PR07,SA01" \ -i "pandas.api.types.is_float PR01,SA01" \ -i "pandas.api.types.is_float_dtype SA01" \ - -i "pandas.api.types.is_hashable PR01,RT03,SA01" \ + -i "pandas.api.types.is_hashable PR01,SA01" \ -i "pandas.api.types.is_int64_dtype SA01" \ -i "pandas.api.types.is_integer PR01,SA01" \ -i "pandas.api.types.is_integer_dtype SA01" \ diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi index daaaacee3487d..71539101860e6 100644 --- a/pandas/_libs/lib.pyi +++ b/pandas/_libs/lib.pyi @@ -48,6 +48,7 @@ def is_bool(obj: object) -> TypeGuard[bool | np.bool_]: ... def is_integer(obj: object) -> TypeGuard[int | np.integer]: ... def is_int_or_none(obj) -> bool: ... def is_float(obj: object) -> TypeGuard[float]: ... +def is_hashable(obj: object) -> TypeGuard[Hashable]: ... def is_interval_array(values: np.ndarray) -> bool: ... def is_datetime64_array(values: np.ndarray, skipna: bool = True) -> bool: ... def is_timedelta_or_timedelta64_array( diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index e1a2a0142c52e..c2f0c9f86bbfa 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -19,19 +19,30 @@ from cpython.datetime cimport ( time, timedelta, ) +from cpython.dict cimport PyDict_CheckExact +from cpython.float cimport PyFloat_CheckExact from cpython.iterator cimport PyIter_Check +from cpython.list cimport PyList_CheckExact +from cpython.long cimport PyLong_CheckExact from cpython.number cimport PyNumber_Check from cpython.object cimport ( Py_EQ, PyObject, + PyObject_Hash, PyObject_RichCompareBool, ) from cpython.ref cimport Py_INCREF from cpython.sequence cimport PySequence_Check +from cpython.set cimport ( + PyAnySet_CheckExact, + PyFrozenSet_CheckExact, +) from cpython.tuple cimport ( + PyTuple_CheckExact, PyTuple_New, PyTuple_SET_ITEM, ) +from cpython.unicode cimport PyUnicode_CheckExact from cython cimport ( Py_ssize_t, floating, @@ -1089,6 +1100,73 @@ def is_float(obj: object) -> bool: return util.is_float_object(obj) +cpdef bint is_hashable(object obj) noexcept: + """ + Return True if hash(obj) will succeed, False otherwise. + + Some types will pass a test against collections.abc.Hashable but fail when + they are actually hashed with hash(). + + Distinguish between these and other types by trying the call to hash() and + seeing if they raise TypeError. + + Returns + ------- + bool + + Examples + -------- + >>> import collections + >>> from pandas.api.types import is_hashable + >>> a = ([],) + >>> isinstance(a, collections.abc.Hashable) + True + >>> is_hashable(a) + False + """ + cdef: + bint is_none + bint is_long + bint is_float + bint is_unicode + bint is_tuple + bint is_frozen_set + bint is_dict + bint is_list + bint is_any_set + + # Perform all checks in order to be nice to the branch predictor + is_none = obj is None + is_long = PyLong_CheckExact(obj) + is_float = PyFloat_CheckExact(obj) + is_unicode = PyUnicode_CheckExact(obj) + is_tuple = PyTuple_CheckExact(obj) + is_frozen_set = PyFrozenSet_CheckExact(obj) + is_dict = PyDict_CheckExact(obj) + is_list = PyList_CheckExact(obj) + is_any_set = PyAnySet_CheckExact(obj) + + if is_none or is_long or is_float or is_unicode or is_frozen_set: + return True + + # tuple is hashable if and only if all elements are hashable + if is_tuple: + for o in obj: + if not is_hashable(o): + return False + return True + + if is_dict or is_list or is_any_set: + return False + + try: + PyObject_Hash(obj) + except TypeError: + return False + else: + return True + + def is_integer(obj: object) -> bool: """ Return True if given object is integer. diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py index f042911b53d2b..3006a3303e24d 100644 --- a/pandas/core/dtypes/inference.py +++ b/pandas/core/dtypes/inference.py @@ -13,8 +13,6 @@ from pandas._libs import lib if TYPE_CHECKING: - from collections.abc import Hashable - from pandas._typing import TypeGuard is_bool = lib.is_bool @@ -23,6 +21,8 @@ is_float = lib.is_float +is_hashable = lib.is_hashable + is_complex = lib.is_complex is_scalar = lib.is_scalar @@ -330,45 +330,6 @@ def is_named_tuple(obj: object) -> bool: return isinstance(obj, abc.Sequence) and hasattr(obj, "_fields") -def is_hashable(obj: object) -> TypeGuard[Hashable]: - """ - Return True if hash(obj) will succeed, False otherwise. - - Some types will pass a test against collections.abc.Hashable but fail when - they are actually hashed with hash(). - - Distinguish between these and other types by trying the call to hash() and - seeing if they raise TypeError. - - Returns - ------- - bool - - Examples - -------- - >>> import collections - >>> from pandas.api.types import is_hashable - >>> a = ([],) - >>> isinstance(a, collections.abc.Hashable) - True - >>> is_hashable(a) - False - """ - # Unfortunately, we can't use isinstance(obj, collections.abc.Hashable), - # which can be faster than calling hash. That is because numpy scalars - # fail this test. - - # Reconsider this decision once this numpy bug is fixed: - # https://github.com/numpy/numpy/issues/5562 - - try: - hash(obj) - except TypeError: - return False - else: - return True - - def is_sequence(obj: object) -> bool: """ Check if the object is a sequence of objects.