Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ Other enhancements
- :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`)
- :meth:`DataFrame.apply` supports using third-party execution engines like the Bodo.ai JIT compiler (:issue:`60668`)
- :meth:`DataFrame.iloc` and :meth:`Series.iloc` now support boolean masks in ``__getitem__`` for more consistent indexing behavior (:issue:`60994`)
- :meth:`DataFrame.rank` now preserves the ``dtype_backend`` for extension arrays (:issue:`52829`)
- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support Python's new-style format strings (e.g., ``"{:.6f}"``) for the ``float_format`` parameter, in addition to old-style ``%`` format strings and callables. This allows for more flexible and modern formatting of floating point numbers when exporting to CSV. (:issue:`49580`)
- :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
- :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`)
Expand Down
1 change: 0 additions & 1 deletion pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -1091,7 +1091,6 @@ def rank(
)
else:
raise TypeError("Array with ndim > 2 are not supported.")

return ranks


Expand Down
2 changes: 0 additions & 2 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2414,8 +2414,6 @@ def _rank(
"""
See Series.rank.__doc__.
"""
if axis != 0:
raise NotImplementedError
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we have cases that get here?

Copy link
Contributor Author

@sharkipelago sharkipelago Sep 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Besides the two 2d array tests I just added , test_rank2() raises that Not Implemented Error when that line is there

=============================================== short test summary info ================================================
FAILED pandas/tests/frame/methods/test_rank.py::TestRank::test_rank2 - NotImplementedError
FAILED pandas/tests/frame/methods/test_rank.py::TestRank::test_2d_extension_array_datetime - NotImplementedError
FAILED pandas/tests/frame/methods/test_rank.py::TestRank::test_2d_extension_array_timedelta - NotImplementedError
============================================ 3 failed, 131 passed in 5.58s =============================================

Copy link
Contributor Author

@sharkipelago sharkipelago Sep 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the third "sub test" in test_rank2:

df = DataFrame([["b", "c", "a"], ["a", "c", "b"]])
expected = DataFrame([[2.0, 3.0, 1.0], [1, 3, 2]])
result = df.rank(1, numeric_only=False)


return rank(
self,
Expand Down
34 changes: 19 additions & 15 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -9276,16 +9276,11 @@ def rank(
msg = "na_option must be one of 'keep', 'top', or 'bottom'"
raise ValueError(msg)

def ranker(data):
if data.ndim == 2:
# i.e. DataFrame, we cast to ndarray
values = data.values
else:
# i.e. Series, can dispatch to EA
values = data._values

if isinstance(values, ExtensionArray):
ranks = values._rank(
def ranker(blk_values):
if axis_int == 0:
blk_values = blk_values.T
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think we can avoid transposing at the block level and only transpose at the dataframe level (L9317). then pass axis=blk_values.ndim-1 (which will require updating the EA method)

if isinstance(blk_values, ExtensionArray):
ranks = blk_values._rank(
axis=axis_int,
method=method,
ascending=ascending,
Expand All @@ -9294,16 +9289,16 @@ def ranker(data):
)
else:
ranks = algos.rank(
values,
blk_values,
axis=axis_int,
method=method,
ascending=ascending,
na_option=na_option,
pct=pct,
)

ranks_obj = self._constructor(ranks, **data._construct_axes_dict())
return ranks_obj.__finalize__(self, method="rank")
if axis_int == 0:
ranks = ranks.T
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this transpose should also be avoidable

return ranks

if numeric_only:
if self.ndim == 1 and not is_numeric_dtype(self.dtype):
Expand All @@ -9316,7 +9311,16 @@ def ranker(data):
else:
data = self

return ranker(data)
should_transpose = axis_int == 1

if should_transpose:
data = data.T
applied = data._mgr.apply(ranker)
result = self._constructor_from_mgr(applied, axes=applied.axes)
if should_transpose:
result = result.T

return result.__finalize__(self, method="rank")

@doc(_shared_docs["compare"], klass=_shared_doc_kwargs["klass"])
def compare(
Expand Down
96 changes: 96 additions & 0 deletions pandas/tests/frame/methods/test_rank.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,14 @@
Infinity,
NegInfinity,
)
import pandas.util._test_decorators as td

from pandas import (
DataFrame,
Index,
Series,
to_datetime,
to_timedelta,
)
import pandas._testing as tm

Expand Down Expand Up @@ -498,3 +501,96 @@ def test_rank_string_dtype(self, string_dtype_no_object):
exp_dtype = "float64"
expected = Series([1, 2, None, 3], dtype=exp_dtype)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize(
"method,og_dtype,expected_dtype",
[
("average", "UInt32", "Float64"),
("average", "Float32", "Float64"),
pytest.param(
"average",
"int32[pyarrow]",
"double[pyarrow]",
marks=td.skip_if_no("pyarrow"),
),
("min", "Int32", "UInt64"),
("min", "Float32", "UInt64"),
pytest.param(
"min",
"int32[pyarrow]",
"uint64[pyarrow]",
marks=td.skip_if_no("pyarrow"),
),
],
)
def test_rank_extension_array_dtype(self, method, og_dtype, expected_dtype):
# GH#52829
result = DataFrame([4, 89, 33], dtype=og_dtype).rank(method=method)
if method == "average":
expected = DataFrame([1.0, 3.0, 2.0], dtype=expected_dtype)
else:
expected = DataFrame([1, 3, 2], dtype=expected_dtype)
tm.assert_frame_equal(result, expected)

def test_rank_mixed_extension_array_dtype(self):
# GH#52829
pytest.importorskip("pyarrow")
result = DataFrame(
{
"base": Series([4, 5, 6]),
"pyarrow": Series([7, 8, 9], dtype="int32[pyarrow]"),
}
).rank(method="min")
expected = DataFrame(
{
"base": Series([1.0, 2.0, 3.0], dtype="float64"),
"pyarrow": Series([1, 2, 3], dtype="uint64[pyarrow]"),
}
)
tm.assert_frame_equal(result, expected)

def test_2d_extension_array_datetime(self):
# GH#52829
df = DataFrame(
{
"year": to_datetime(["2012-1-1", "2013-1-1", "2014-1-1"]),
"week": to_datetime(["2012-1-2", "2012-1-9", "2012-1-16"]),
"day": to_datetime(["2012-1-3", "2012-1-4", "2012-1-5"]),
}
)
axis0_expected = DataFrame(
{"year": [1.0, 2.0, 3.0], "week": [1.0, 2.0, 3.0], "day": [1.0, 2.0, 3.0]}
)
axis1_expected = DataFrame(
{"year": [1.0, 3.0, 3.0], "week": [2.0, 2.0, 2.0], "day": [3.0, 1.0, 1.0]}
)
tm.assert_frame_equal(df.rank(), axis0_expected)
tm.assert_frame_equal(df.rank(1), axis1_expected)

def test_2d_extension_array_timedelta(self):
# GH#52829
df = DataFrame(
{
"day": to_timedelta(["0 days", "1 day", "2 days"]),
"hourly": to_timedelta(["23 hours", "24 hours", "25 hours"]),
"minute": to_timedelta(
["1439 minutes", "1440 minutes", "1441 minutes"]
),
}
)
axis0_expected = DataFrame(
{
"day": [1.0, 2.0, 3.0],
"hourly": [1.0, 2.0, 3.0],
"minute": [1.0, 2.0, 3.0],
}
)
axis1_expected = DataFrame(
{
"day": [1.0, 2.0, 3.0],
"hourly": [2.0, 2.0, 2.0],
"minute": [3.0, 2.0, 1.0],
}
)
tm.assert_frame_equal(df.rank(), axis0_expected)
tm.assert_frame_equal(df.rank(1), axis1_expected)
Loading