pandas-dev · sharkipelago · Aug 25, 2025 · Aug 25, 2025 · Aug 25, 2025 · Aug 25, 2025
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -204,6 +204,7 @@ Other enhancements
 - :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`)
 - :meth:`DataFrame.apply` supports using third-party execution engines like the Bodo.ai JIT compiler (:issue:`60668`)
 - :meth:`DataFrame.iloc` and :meth:`Series.iloc` now support boolean masks in ``__getitem__`` for more consistent indexing behavior (:issue:`60994`)
+- :meth:`DataFrame.rank` now preserves the ``dtype_backend`` for extension arrays (:issue:`52829`)
 - :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support Python's new-style format strings (e.g., ``"{:.6f}"``) for the ``float_format`` parameter, in addition to old-style ``%`` format strings and callables. This allows for more flexible and modern formatting of floating point numbers when exporting to CSV. (:issue:`49580`)
 - :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
 - :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -1091,7 +1091,6 @@ def rank(
         )
     else:
         raise TypeError("Array with ndim > 2 are not supported.")
-
     return ranks
 
 

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -2414,8 +2414,6 @@ def _rank(
         """
         See Series.rank.__doc__.
         """
-        if axis != 0:
-            raise NotImplementedError
 
         return rank(
             self,

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -9276,16 +9276,11 @@ def rank(
             msg = "na_option must be one of 'keep', 'top', or 'bottom'"
             raise ValueError(msg)
 
-        def ranker(data):
-            if data.ndim == 2:
-                # i.e. DataFrame, we cast to ndarray
-                values = data.values
-            else:
-                # i.e. Series, can dispatch to EA
-                values = data._values
-
-            if isinstance(values, ExtensionArray):
-                ranks = values._rank(
+        def ranker(blk_values):
+            if axis_int == 0:
+                blk_values = blk_values.T
+            if isinstance(blk_values, ExtensionArray):
+                ranks = blk_values._rank(
                     axis=axis_int,
                     method=method,
                     ascending=ascending,
@@ -9294,16 +9289,16 @@ def ranker(data):
                 )
             else:
                 ranks = algos.rank(
-                    values,
+                    blk_values,
                     axis=axis_int,
                     method=method,
                     ascending=ascending,
                     na_option=na_option,
                     pct=pct,
                 )
-
-            ranks_obj = self._constructor(ranks, **data._construct_axes_dict())
-            return ranks_obj.__finalize__(self, method="rank")
+            if axis_int == 0:
+                ranks = ranks.T
+            return ranks
 
         if numeric_only:
             if self.ndim == 1 and not is_numeric_dtype(self.dtype):
@@ -9316,7 +9311,16 @@ def ranker(data):
         else:
             data = self
 
-        return ranker(data)
+        should_transpose = axis_int == 1
+
+        if should_transpose:
+            data = data.T
+        applied = data._mgr.apply(ranker)
+        result = self._constructor_from_mgr(applied, axes=applied.axes)
+        if should_transpose:
+            result = result.T
+
+        return result.__finalize__(self, method="rank")
 
     @doc(_shared_docs["compare"], klass=_shared_doc_kwargs["klass"])
     def compare(

diff --git a/pandas/tests/frame/methods/test_rank.py b/pandas/tests/frame/methods/test_rank.py
@@ -10,11 +10,14 @@
     Infinity,
     NegInfinity,
 )
+import pandas.util._test_decorators as td
 
 from pandas import (
     DataFrame,
     Index,
     Series,
+    to_datetime,
+    to_timedelta,
 )
 import pandas._testing as tm
 
@@ -498,3 +501,96 @@ def test_rank_string_dtype(self, string_dtype_no_object):
             exp_dtype = "float64"
         expected = Series([1, 2, None, 3], dtype=exp_dtype)
         tm.assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "method,og_dtype,expected_dtype",
+        [
+            ("average", "UInt32", "Float64"),
+            ("average", "Float32", "Float64"),
+            pytest.param(
+                "average",
+                "int32[pyarrow]",
+                "double[pyarrow]",
+                marks=td.skip_if_no("pyarrow"),
+            ),
+            ("min", "Int32", "UInt64"),
+            ("min", "Float32", "UInt64"),
+            pytest.param(
+                "min",
+                "int32[pyarrow]",
+                "uint64[pyarrow]",
+                marks=td.skip_if_no("pyarrow"),
+            ),
+        ],
+    )
+    def test_rank_extension_array_dtype(self, method, og_dtype, expected_dtype):
+        # GH#52829
+        result = DataFrame([4, 89, 33], dtype=og_dtype).rank(method=method)
+        if method == "average":
+            expected = DataFrame([1.0, 3.0, 2.0], dtype=expected_dtype)
+        else:
+            expected = DataFrame([1, 3, 2], dtype=expected_dtype)
+        tm.assert_frame_equal(result, expected)
+
+    def test_rank_mixed_extension_array_dtype(self):
+        # GH#52829
+        pytest.importorskip("pyarrow")
+        result = DataFrame(
+            {
+                "base": Series([4, 5, 6]),
+                "pyarrow": Series([7, 8, 9], dtype="int32[pyarrow]"),
+            }
+        ).rank(method="min")
+        expected = DataFrame(
+            {
+                "base": Series([1.0, 2.0, 3.0], dtype="float64"),
+                "pyarrow": Series([1, 2, 3], dtype="uint64[pyarrow]"),
+            }
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_2d_extension_array_datetime(self):
+        # GH#52829
+        df = DataFrame(
+            {
+                "year": to_datetime(["2012-1-1", "2013-1-1", "2014-1-1"]),
+                "week": to_datetime(["2012-1-2", "2012-1-9", "2012-1-16"]),
+                "day": to_datetime(["2012-1-3", "2012-1-4", "2012-1-5"]),
+            }
+        )
+        axis0_expected = DataFrame(
+            {"year": [1.0, 2.0, 3.0], "week": [1.0, 2.0, 3.0], "day": [1.0, 2.0, 3.0]}
+        )
+        axis1_expected = DataFrame(
+            {"year": [1.0, 3.0, 3.0], "week": [2.0, 2.0, 2.0], "day": [3.0, 1.0, 1.0]}
+        )
+        tm.assert_frame_equal(df.rank(), axis0_expected)
+        tm.assert_frame_equal(df.rank(1), axis1_expected)
+
+    def test_2d_extension_array_timedelta(self):
+        # GH#52829
+        df = DataFrame(
+            {
+                "day": to_timedelta(["0 days", "1 day", "2 days"]),
+                "hourly": to_timedelta(["23 hours", "24 hours", "25 hours"]),
+                "minute": to_timedelta(
+                    ["1439 minutes", "1440 minutes", "1441 minutes"]
+                ),
+            }
+        )
+        axis0_expected = DataFrame(
+            {
+                "day": [1.0, 2.0, 3.0],
+                "hourly": [1.0, 2.0, 3.0],
+                "minute": [1.0, 2.0, 3.0],
+            }
+        )
+        axis1_expected = DataFrame(
+            {
+                "day": [1.0, 2.0, 3.0],
+                "hourly": [2.0, 2.0, 2.0],
+                "minute": [3.0, 2.0, 1.0],
+            }
+        )
+        tm.assert_frame_equal(df.rank(), axis0_expected)
+        tm.assert_frame_equal(df.rank(1), axis1_expected)
-Original file line number
+Diff line change
@@ Expand Up / @@ -1091,7 +1091,6 @@ def rank( @@
             )
         else:
             raise TypeError("Array with ndim > 2 are not supported.")
         return ranks
@@ Expand Down @@