Revert old approach: remove min/max methods from ArrowExtensionArray

skonda29 · skonda29 · commit 117175bdc6e7 · 2025-07-30T08:07:56.000-07:00
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
@@ -12,7 +12,6 @@
     overload,
 )
 import unicodedata
-import warnings
 
 import numpy as np
 
@@ -23,12 +22,11 @@
     timezones,
 )
 from pandas.compat import (
-    HAS_PYARROW,
-    pa_version_under12p1,
+    pa_version_under10p1,
+    pa_version_under11p0,
     pa_version_under13p0,
 )
 from pandas.util._decorators import doc
-from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.cast import (
     can_hold_element,
@@ -65,7 +63,6 @@
 from pandas.core.arrays.masked import BaseMaskedArray
 from pandas.core.arrays.string_ import StringDtype
 import pandas.core.common as com
-from pandas.core.construction import extract_array
 from pandas.core.indexers import (
     check_array_indexer,
     unpack_tuple_and_ellipses,
@@ -77,7 +74,7 @@
 from pandas.io._util import _arrow_dtype_mapping
 from pandas.tseries.frequencies import to_offset
 
-if HAS_PYARROW:
+if not pa_version_under10p1:
     import pyarrow as pa
     import pyarrow.compute as pc
 
@@ -211,6 +208,16 @@ def floordiv_compat(
     from pandas.core.arrays.timedeltas import TimedeltaArray
 
 
+def get_unit_from_pa_dtype(pa_dtype) -> str:
+    # https://github.com/pandas-dev/pandas/pull/50998#discussion_r1100344804
+    if pa_version_under11p0:
+        unit = str(pa_dtype).split("[", 1)[-1][:-1]
+        if unit not in ["s", "ms", "us", "ns"]:
+            raise ValueError(pa_dtype)
+        return unit
+    return pa_dtype.unit
+
+
 def to_pyarrow_type(
     dtype: ArrowDtype | pa.DataType | Dtype | None,
 ) -> pa.DataType | None:
@@ -293,7 +300,7 @@ class ArrowExtensionArray(
     _dtype: ArrowDtype
 
     def __init__(self, values: pa.Array | pa.ChunkedArray) -> None:
-        if pa_version_under12p1:
+        if pa_version_under10p1:
             msg = "pyarrow>=10.0.1 is required for PyArrow backed ArrowExtensionArray."
             raise ImportError(msg)
         if isinstance(values, pa.Array):
@@ -503,33 +510,6 @@ def _box_pa_array(
                 value = to_timedelta(value, unit=pa_type.unit).as_unit(pa_type.unit)
                 value = value.to_numpy()
 
-            if pa_type is not None and pa.types.is_timestamp(pa_type):
-                # Use DatetimeArray to exclude Decimal(NaN) (GH#61774) and
-                #  ensure constructor treats tznaive the same as non-pyarrow
-                #  dtypes (GH#61775)
-                from pandas.core.arrays.datetimes import (
-                    DatetimeArray,
-                    tz_to_dtype,
-                )
-
-                pass_dtype = tz_to_dtype(tz=pa_type.tz, unit=pa_type.unit)
-                value = extract_array(value, extract_numpy=True)
-                if isinstance(value, DatetimeArray):
-                    dta = value
-                else:
-                    dta = DatetimeArray._from_sequence(
-                        value, copy=copy, dtype=pass_dtype
-                    )
-                dta_mask = dta.isna()
-                value_i8 = cast("npt.NDArray", dta.view("i8"))
-                if not value_i8.flags["WRITEABLE"]:
-                    # e.g. test_setitem_frame_2d_values
-                    value_i8 = value_i8.copy()
-                    dta = DatetimeArray._from_sequence(value_i8, dtype=dta.dtype)
-                value_i8[dta_mask] = 0  # GH#61776 avoid __sub__ overflow
-                pa_array = pa.array(dta._ndarray, type=pa_type, mask=dta_mask)
-                return pa_array
-
             try:
                 pa_array = pa.array(value, type=pa_type, from_pandas=True)
             except (pa.ArrowInvalid, pa.ArrowTypeError):
@@ -854,25 +834,6 @@ def _logical_method(self, other, op) -> Self:
         # integer types. Otherwise these are boolean ops.
         if pa.types.is_integer(self._pa_array.type):
             return self._evaluate_op_method(other, op, ARROW_BIT_WISE_FUNCS)
-        elif (
-            (
-                pa.types.is_string(self._pa_array.type)
-                or pa.types.is_large_string(self._pa_array.type)
-            )
-            and op in (roperator.ror_, roperator.rand_, roperator.rxor)
-            and isinstance(other, np.ndarray)
-            and other.dtype == bool
-        ):
-            # GH#60234 backward compatibility for the move to StringDtype in 3.0
-            op_name = op.__name__[1:].strip("_")
-            warnings.warn(
-                f"'{op_name}' operations between boolean dtype and {self.dtype} are "
-                "deprecated and will raise in a future version. Explicitly "
-                "cast the strings to a boolean dtype before operating instead.",
-                FutureWarning,
-                stacklevel=find_stack_level(),
-            )
-            return op(other, self.astype(bool))
         else:
             return self._evaluate_op_method(other, op, ARROW_LOGICAL_FUNCS)
 
@@ -1238,6 +1199,10 @@ def factorize(
         null_encoding = "mask" if use_na_sentinel else "encode"
 
         data = self._pa_array
+        pa_type = data.type
+        if pa_version_under11p0 and pa.types.is_duration(pa_type):
+            # https://github.com/apache/arrow/issues/15226#issuecomment-1376578323
+            data = data.cast(pa.int64())
 
         if pa.types.is_dictionary(data.type):
             if null_encoding == "encode":
@@ -1262,6 +1227,8 @@ def factorize(
             )
             uniques = type(self)(combined.dictionary)
 
+        if pa_version_under11p0 and pa.types.is_duration(pa_type):
+            uniques = cast(ArrowExtensionArray, uniques.astype(self.dtype))
         return indices, uniques
 
     def reshape(self, *args, **kwargs):
@@ -1548,7 +1515,19 @@ def unique(self) -> Self:
         -------
         ArrowExtensionArray
         """
-        pa_result = pc.unique(self._pa_array)
+        pa_type = self._pa_array.type
+
+        if pa_version_under11p0 and pa.types.is_duration(pa_type):
+            # https://github.com/apache/arrow/issues/15226#issuecomment-1376578323
+            data = self._pa_array.cast(pa.int64())
+        else:
+            data = self._pa_array
+
+        pa_result = pc.unique(data)
+
+        if pa_version_under11p0 and pa.types.is_duration(pa_type):
+            pa_result = pa_result.cast(pa_type)
+
         return type(self)(pa_result)
 
     def value_counts(self, dropna: bool = True) -> Series:
@@ -1568,12 +1547,18 @@ def value_counts(self, dropna: bool = True) -> Series:
         --------
         Series.value_counts
         """
+        pa_type = self._pa_array.type
+        if pa_version_under11p0 and pa.types.is_duration(pa_type):
+            # https://github.com/apache/arrow/issues/15226#issuecomment-1376578323
+            data = self._pa_array.cast(pa.int64())
+        else:
+            data = self._pa_array
+
         from pandas import (
             Index,
             Series,
         )
 
-        data = self._pa_array
         vc = data.value_counts()
 
         values = vc.field(0)
@@ -1583,6 +1568,9 @@ def value_counts(self, dropna: bool = True) -> Series:
             values = values.filter(mask)
             counts = counts.filter(mask)
 
+        if pa_version_under11p0 and pa.types.is_duration(pa_type):
+            values = values.cast(pa_type)
+
         counts = ArrowExtensionArray(counts)
 
         index = Index(type(self)(values))
@@ -1876,7 +1864,8 @@ def pyarrow_meth(data, skip_nulls, min_count=0):  # type: ignore[misc]
             if pa.types.is_duration(pa_type):
                 result = result.cast(pa_type)
             elif pa.types.is_time(pa_type):
-                result = result.cast(pa.duration(pa_type.unit))
+                unit = get_unit_from_pa_dtype(pa_type)
+                result = result.cast(pa.duration(unit))
             elif pa.types.is_date(pa_type):
                 # go with closest available unit, i.e. "s"
                 result = result.cast(pa.duration("s"))
@@ -1957,10 +1946,8 @@ def _explode(self):
         fill_value = pa.scalar([None], type=self._pa_array.type)
         mask = counts == 0
         if mask.any():
-            # pc.if_else here is similar to `values[mask] = fill_value`
-            #  but this avoids an object-dtype round-trip.
-            pa_values = pc.if_else(~mask, values._pa_array, fill_value)
-            values = type(self)(pa_values)
+            values = values.copy()
+            values[mask] = fill_value
             counts = counts.copy()
             counts[mask] = 1
         values = values.fillna(fill_value)
@@ -2969,14 +2956,6 @@ def _dt_tz_convert(self, tz) -> Self:
         result = self._pa_array.cast(pa.timestamp(current_unit, tz))
         return type(self)(result)
 
-    def max(self, *, skipna: bool = True, axis: int | None = 0, **kwargs):
-        """Return the maximum value of the array."""
-        return self._reduce("max", skipna=skipna, **kwargs)
-
-    def min(self, *, skipna: bool = True, axis: int | None = 0, **kwargs):
-        """Return the minimum value of the array."""
-        return self._reduce("min", skipna=skipna, **kwargs)
-
 
 def transpose_homogeneous_pyarrow(
     arrays: Sequence[ArrowExtensionArray],
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -1612,7 +1612,7 @@ def _validate_key(self, key, axis: AxisInt) -> None:
 
             if len(arr):
                 # convert to numpy array for min/max with ExtensionArrays
-                if hasattr(arr, 'to_numpy'):
+                if hasattr(arr, "to_numpy"):
                     np_arr = arr.to_numpy()
                 else:
                     np_arr = np.asarray(arr)
diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py
@@ -530,16 +530,3 @@ def test_array_to_numpy_na():
     result = arr.to_numpy(na_value=True, dtype=bool)
     expected = np.array([True, True])
     tm.assert_numpy_array_equal(result, expected)
-
-
-def test_array_max_min():
-    pytest.importorskip("pyarrow")
-    # GH#61311
-    df = pd.DataFrame({"a": [1, 2], "c": [0, 2], "d": ["c", "a"]})
-    expected = df.iloc[:, df["c"]]
-    df_pyarrow = pd.DataFrame(
-        {"a": [1, 2], "c": [0, 2], "d": ["c", "a"]}
-    ).convert_dtypes(dtype_backend="pyarrow")
-    result = df_pyarrow.iloc[:, df_pyarrow["c"]]
-    expected_pyarrow = expected.convert_dtypes(dtype_backend="pyarrow")
-    tm.assert_frame_equal(result, expected_pyarrow)
diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py
@@ -1487,11 +1487,7 @@ def test_iloc_arrow_extension_array(self):
         # GH#61311
         pytest.importorskip("pyarrow")
 
-        df = DataFrame({
-            "a": [1, 2],
-            "c": [0, 2],
-            "d": ["c", "a"]
-        })
+        df = DataFrame({"a": [1, 2], "c": [0, 2], "d": ["c", "a"]})
 
         df_arrow = df.convert_dtypes(dtype_backend="pyarrow")
         result = df_arrow.iloc[:, df_arrow["c"]]