diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index a619b08a8676f..348560bec8cb1 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -969,6 +969,8 @@ Indexing - Bug in reindexing of :class:`DataFrame` with :class:`PeriodDtype` columns in case of consolidated block (:issue:`60980`, :issue:`60273`) - Bug in :meth:`DataFrame.loc.__getitem__` and :meth:`DataFrame.iloc.__getitem__` with a :class:`CategoricalDtype` column with integer categories raising when trying to index a row containing a ``NaN`` entry (:issue:`58954`) - Bug in :meth:`Index.__getitem__` incorrectly raising with a 0-dim ``np.ndarray`` key (:issue:`55601`) +- Bug in indexing on a :class:`DatetimeIndex` with a ``timestamp[pyarrow]`` dtype or on a :class:`TimedeltaIndex` with a ``duration[pyarrow]`` dtype (:issue:`62277`) +- Missing ^^^^^^^ @@ -1137,6 +1139,7 @@ Other - Bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`) - Bug in :meth:`Series.dt` methods in :class:`ArrowDtype` that were returning incorrect values. (:issue:`57355`) - Bug in :meth:`Series.isin` raising ``TypeError`` when series is large (>10**6) and ``values`` contains NA (:issue:`60678`) +- Bug in :meth:`Series.map` with a ``timestamp[pyarrow]`` dtype or ``duration[pyarrow]`` dtype incorrectly returning all-``NaN`` entries (:issue:`61231`) - Bug in :meth:`Series.mode` where an exception was raised when taking the mode with nullable types with no null values in the series. (:issue:`58926`) - Bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`) - Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` throwing ``ValueError`` when ``regex=True`` and all NA values. (:issue:`60688`) @@ -1151,6 +1154,7 @@ Other - Bug in printing a :class:`Series` with a :class:`DataFrame` stored in :attr:`Series.attrs` raised a ``ValueError`` (:issue:`60568`) - Fixed bug where the :class:`DataFrame` constructor misclassified array-like objects with a ``.name`` attribute as :class:`Series` or :class:`Index` (:issue:`61443`) - Fixed regression in :meth:`DataFrame.from_records` not initializing subclasses properly (:issue:`57008`) +- .. ***DO NOT USE THIS SECTION*** diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 0087a8da49857..653a900fbfe45 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1616,6 +1616,10 @@ def map(self, mapper, na_action: Literal["ignore"] | None = None): if is_numeric_dtype(self.dtype): return map_array(self.to_numpy(), mapper, na_action=na_action) else: + # For "mM" cases, the super() method passes `self` without the + # to_numpy call, which inside map_array casts to ndarray[object]. + # Without the to_numpy() call, NA is preserved instead of changed + # to None. return super().map(mapper, na_action) @doc(ExtensionArray.duplicated) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index ce6ea1ed980dd..c2c0b1ba55b6c 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -33,7 +33,10 @@ ) from pandas.core.dtypes.common import is_scalar -from pandas.core.dtypes.dtypes import DatetimeTZDtype +from pandas.core.dtypes.dtypes import ( + ArrowDtype, + DatetimeTZDtype, +) from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.missing import is_valid_na_for_dtype @@ -384,6 +387,16 @@ def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: """ Can we compare values of the given dtype to our own? """ + if isinstance(dtype, ArrowDtype): + # GH#62277 + if dtype.kind != "M": + return False + + pa_dtype = dtype.pyarrow_dtype + if (pa_dtype.tz is None) ^ (self.tz is None): + return False + return True + if self.tz is not None: # If we have tz, we can compare to tzaware return isinstance(dtype, DatetimeTZDtype) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index fa3de46621643..ff25260c0391e 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -19,6 +19,7 @@ is_scalar, pandas_dtype, ) +from pandas.core.dtypes.dtypes import ArrowDtype from pandas.core.dtypes.generic import ABCSeries from pandas.core.arrays.timedeltas import TimedeltaArray @@ -194,6 +195,8 @@ def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: """ Can we compare values of the given dtype to our own? """ + if isinstance(dtype, ArrowDtype): + return dtype.kind == "m" return lib.is_np_dtype(dtype, "m") # aka self._data._is_recognized_dtype # ------------------------------------------------------------------- diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index c44345273466c..5877a38bbee11 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -10,6 +10,7 @@ from pandas._libs import index as libindex from pandas.compat.numpy import np_long +import pandas.util._test_decorators as td import pandas as pd from pandas import ( @@ -513,6 +514,26 @@ def test_contains_nonunique(self, vals): class TestGetIndexer: + @td.skip_if_no("pyarrow") + @pytest.mark.parametrize("as_td", [True, False]) + def test_get_indexer_pyarrow(self, as_td): + # GH#62277 + index = date_range("2016-01-01", periods=3) + target = index.astype("timestamp[ns][pyarrow]")[::-1] + if as_td: + # Test duration dtypes while we're here + index = index - index[0] + target = target - target[-1] + + result = index.get_indexer(target) + + expected = np.array([2, 1, 0], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + # Reversed op should work the same + result2 = target.get_indexer(index) + tm.assert_numpy_array_equal(result2, expected) + def test_get_indexer_date_objs(self): rng = date_range("1/1/2000", periods=20) diff --git a/pandas/tests/series/methods/test_map.py b/pandas/tests/series/methods/test_map.py index 104f779d2d213..c641684719060 100644 --- a/pandas/tests/series/methods/test_map.py +++ b/pandas/tests/series/methods/test_map.py @@ -9,6 +9,7 @@ import pytest from pandas.errors import Pandas4Warning +import pandas.util._test_decorators as td import pandas as pd from pandas import ( @@ -653,3 +654,26 @@ def test_map_engine_not_executor(): with pytest.raises(ValueError, match="Not a valid engine: 'something'"): s.map(lambda x: x, engine="something") + + +@td.skip_if_no("pyarrow") +@pytest.mark.parametrize("as_td", [True, False]) +def test_map_pyarrow_timestamp(as_td): + # GH#61231 + dti = date_range("2018-01-01 00:00:00", "2018-01-07 00:00:00") + ser = Series(dti, dtype="timestamp[ns][pyarrow]", name="a") + if as_td: + # duration dtype + ser = ser - ser[0] + + mapper = {date: i for i, date in enumerate(ser)} + + res_series = ser.map(mapper) + expected = Series(range(len(ser)), name="a", dtype="int64") + tm.assert_series_equal(res_series, expected) + + res_index = Index(ser).map(mapper) + # For now (as of 2025-09-06) at least, we do inference on Index.map that + # we don't for Series.map + expected_index = Index(expected).astype("int64[pyarrow]") + tm.assert_index_equal(res_index, expected_index)