diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py index 928e44e51b3cf..a3d8456b516b1 100644 --- a/pandas/core/dtypes/inference.py +++ b/pandas/core/dtypes/inference.py @@ -376,10 +376,15 @@ def is_named_tuple(obj: object) -> bool: return isinstance(obj, abc.Sequence) and hasattr(obj, "_fields") -def is_hashable(obj: object) -> TypeGuard[Hashable]: +def is_hashable(obj: object, allow_slice: bool | None = None) -> TypeGuard[Hashable]: """ Return True if hash(obj) will succeed, False otherwise. + If `allow_slice` is False, objects that are slices or tuples containing slices + will always return False, even if hash(obj) would succeed. + If `allow_slice` is True or None, slices and tuples containing slices are treated as + hashable if hash(obj) does not raise TypeError. + Some types will pass a test against collections.abc.Hashable but fail when they are actually hashed with hash(). @@ -390,13 +395,17 @@ def is_hashable(obj: object) -> TypeGuard[Hashable]: ---------- obj : object The object to check for hashability. Any Python object can be passed here. + allow_slice : bool or None + If True or None, return True if the object is hashable (including slices). + If False, return True if the object is hashable and not a slice. Returns ------- bool True if object can be hashed (i.e., does not raise TypeError when - passed to hash()), and False otherwise (e.g., if object is mutable - like a list or dictionary). + passed to hash()) and passes the slice check according to 'allow_slice'. + False otherwise (e.g., if object is mutable like a list or dictionary + or if allow_slice is False and object is a slice or contains a slice). See Also -------- @@ -422,6 +431,17 @@ def is_hashable(obj: object) -> TypeGuard[Hashable]: # Reconsider this decision once this numpy bug is fixed: # https://github.com/numpy/numpy/issues/5562 + def _contains_slice(x: object) -> bool: + # Check if object is a slice or a tuple containing a slice + if isinstance(x, tuple): + return any(isinstance(v, slice) for v in x) + elif isinstance(x, slice): + return True + return False + + if allow_slice is False and _contains_slice(obj): + return False + try: hash(obj) except TypeError: diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index d0955912e12c8..116adcb883326 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -34,6 +34,7 @@ missing as libmissing, ops as libops, ) +from pandas.compat import PY312 from pandas.compat.numpy import np_version_gt2 from pandas.errors import Pandas4Warning @@ -452,16 +453,55 @@ class UnhashableClass2: def __hash__(self): raise TypeError("Not hashable") + class HashableSlice: + def __init__(self, start, stop, step=None): + self.slice = slice(start, stop, step) + + def __eq__(self, other): + return isinstance(other, HashableSlice) and self.slice == other.slice + + def __hash__(self): + return hash((self.slice.start, self.slice.stop, self.slice.step)) + + def __repr__(self): + return ( + f"HashableSlice({self.slice.start}, {self.slice.stop}, " + f"{self.slice.step})" + ) + hashable = (1, 3.14, np.float64(3.14), "a", (), (1,), HashableClass()) not_hashable = ([], UnhashableClass1()) abc_hashable_not_really_hashable = (([],), UnhashableClass2()) + hashable_slice = HashableSlice(1, 2) + tuple_with_slice = (slice(1, 2), 3) for i in hashable: assert inference.is_hashable(i) + assert inference.is_hashable(i, allow_slice=True) + assert inference.is_hashable(i, allow_slice=False) for i in not_hashable: assert not inference.is_hashable(i) + assert not inference.is_hashable(i, allow_slice=True) + assert not inference.is_hashable(i, allow_slice=False) for i in abc_hashable_not_really_hashable: assert not inference.is_hashable(i) + assert not inference.is_hashable(i, allow_slice=True) + assert not inference.is_hashable(i, allow_slice=False) + + assert inference.is_hashable(hashable_slice) + assert inference.is_hashable(hashable_slice, allow_slice=True) + assert inference.is_hashable(hashable_slice, allow_slice=False) + + if PY312: + for obj in [slice(1, 2), tuple_with_slice]: + assert inference.is_hashable(obj) + assert inference.is_hashable(obj, allow_slice=True) + assert not inference.is_hashable(obj, allow_slice=False) + else: + for obj in [slice(1, 2), tuple_with_slice]: + assert not inference.is_hashable(obj) + assert not inference.is_hashable(obj, allow_slice=True) + assert not inference.is_hashable(obj, allow_slice=False) # numpy.array is no longer collections.abc.Hashable as of # https://github.com/numpy/numpy/pull/5326, just test