Skip to content

Commit 213e50c

Browse files
author
Roline Stapny Saldanha
committed
BUG: Fix DataFrame.reindex and Series.reindex loosing values when reindexing single to multiindex
1 parent 8307c51 commit 213e50c

File tree

5 files changed

+19
-55
lines changed

5 files changed

+19
-55
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -788,8 +788,7 @@ MultiIndex
788788
- Bug in :class:`DataFrame` arithmetic operations in case of unaligned MultiIndex columns (:issue:`60498`)
789789
- Bug in :class:`DataFrame` arithmetic operations with :class:`Series` in case of unaligned MultiIndex (:issue:`61009`)
790790
- Bug in :meth:`MultiIndex.from_tuples` causing wrong output with input of type tuples having NaN values (:issue:`60695`, :issue:`60988`)
791-
- Bug in :meth:`DataFrame.reindex` where reindexing a Dataframe with a named Index to a MultiIndex would incorrectly set all values to ``NaN``. Now correctly preserves values when the source index name matches a target level name (:issue:`60923`)
792-
- Bug in :meth:`Series.reindex` where reindexing a Series with a named Index to a MultiIndex would incorrectly set all values to ``NaN``. Now correctly preserves values when the source index name matches a target level name (:issue:`60923`)
791+
- Bug in :meth:`DataFrame.reindex` and :meth:`Series.reindex` where reindexing :class:`Index` to a :class:`MultiIndex` would incorrectly set all values to ``NaN``.(:issue:`60923`)
793792

794793
I/O
795794
^^^

pandas/core/generic.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5373,6 +5373,18 @@ def reindex(
53735373
"""
53745374
# TODO: Decide if we care about having different examples for different
53755375
# kinds
5376+
5377+
# Automatically detect matching level when reindexing from Index to MultiIndex.
5378+
# This prevents values from being incorrectly set to NaN when the source index
5379+
# name matches a index name in the target MultiIndex
5380+
if (
5381+
level is None
5382+
and index is not None
5383+
and isinstance(index, MultiIndex)
5384+
and not isinstance(self.index, MultiIndex)
5385+
and self.index.name in index.names
5386+
):
5387+
level = self.index.name
53765388
self._check_copy_deprecation(copy)
53775389

53785390
if index is not None and columns is not None and labels is not None:

pandas/core/series.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4886,15 +4886,6 @@ def reindex( # type: ignore[override]
48864886
limit: int | None = None,
48874887
tolerance=None,
48884888
) -> Series:
4889-
# Automatically detect matching level when reindexing from Index to MultiIndex.
4890-
# This prevents values from being incorrectly set to NaN when the source index
4891-
# name matches a level name in the target MultiIndex
4892-
if (
4893-
level is None
4894-
and isinstance(index, MultiIndex)
4895-
and self.index.name in index.names
4896-
):
4897-
level = self.index.name
48984889
return super().reindex(
48994890
index=index,
49004891
method=method,

pandas/tests/frame/methods/test_reindex.py

Lines changed: 6 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1260,53 +1260,26 @@ def test_invalid_method(self):
12601260
df.reindex([1, 0, 2], method="asfreq")
12611261

12621262
def test_reindex_index_name_matches_multiindex_level(self):
1263-
"""
1264-
Test automatic level detection when reindexing from Index to MultiIndex.
1265-
When source index name matches a level name in target MultiIndex and level
1266-
is not specified, should behave same as if level was explicitly set.
1267-
"""
1268-
# Create source DataFrame with named Index
12691263
df = DataFrame(
12701264
{"value": [1, 2], "other": ["A", "B"]},
12711265
index=Index([10, 20], name="a"),
12721266
)
1273-
1274-
# Create target MultiIndex with matching level name
12751267
target = MultiIndex.from_product(
12761268
[[10, 20], ["x", "y"]],
1277-
names=["a", "b"], # 'a' matches source index name
1269+
names=["a", "b"],
12781270
)
12791271

12801272
result = df.reindex(index=target)
12811273
expected = df.reindex(index=target, level="a")
1282-
12831274
tm.assert_frame_equal(result, expected)
12841275

1285-
# Verify values are propagated correctly
1286-
expected_values = {
1287-
(10, "x"): {"value": 1, "other": "A"},
1288-
(10, "y"): {"value": 1, "other": "A"},
1289-
(20, "x"): {"value": 2, "other": "B"},
1290-
(20, "y"): {"value": 2, "other": "B"},
1291-
}
1292-
for idx, expected_row in expected_values.items():
1293-
for col, val in expected_row.items():
1294-
assert result.loc[idx, col] == val
1295-
12961276
def test_reindex_index_name_no_match_multiindex_level(self):
1297-
"""
1298-
Test reindexing behavior when source index name doesn't match any level
1299-
in target MultiIndex. Should fill with NaN since there's no level match.
1300-
"""
13011277
df = DataFrame({"value": [1, 2]}, index=Index([10, 20], name="different_name"))
1302-
13031278
target = MultiIndex.from_product([[10, 20], ["x", "y"]], names=["a", "b"])
13041279

13051280
result = df.reindex(index=target)
1306-
1307-
# Should fill with NaN since no level match
1308-
assert result.isna().all().all()
1309-
1310-
# Verify shape is correct
1311-
assert result.index.equals(target)
1312-
assert result.columns.equals(df.columns)
1281+
expected = DataFrame(
1282+
{"value": [np.nan] * 4},
1283+
index=MultiIndex.from_product([[10, 20], ["x", "y"]], names=["a", "b"]),
1284+
)
1285+
tm.assert_frame_equal(result, expected)

pandas/tests/series/methods/test_reindex.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -439,18 +439,12 @@ def test_reindex_expand_nonnano_nat(dtype):
439439
@pytest.mark.parametrize(
440440
"name, expected_match_level_a",
441441
[
442-
# Source index has matching name - should match level "a"
443442
("a", True),
444-
# Source index has no name - should not match any level
445443
(None, False),
446-
# Source index name doesn't match any level - should not match
447444
("x", False),
448445
],
449446
)
450447
def test_reindex_multiindex_automatic_level(name, expected_match_level_a):
451-
"""
452-
Test automatic level detection when reindexing from Index to MultiIndex.
453-
"""
454448
series = Series([26.73, 24.255], index=Index([81, 82], name=name))
455449
target = MultiIndex.from_product(
456450
[[81, 82], [np.nan], ["2018-06-01", "2018-07-01"]], names=["a", "b", "c"]
@@ -459,19 +453,14 @@ def test_reindex_multiindex_automatic_level(name, expected_match_level_a):
459453
result = series.reindex(target)
460454

461455
if expected_match_level_a:
462-
# Should match behavior of explicit level="a"
463456
expected = series.reindex(target, level="a")
464457
else:
465-
# Should contain all NaN values
466458
expected = Series(np.nan, index=target, dtype=series.dtype)
467459

468460
tm.assert_series_equal(result, expected)
469461

470462

471463
def test_reindex_multiindex_explicit_level_overrides():
472-
"""
473-
Test that explicit level parameter overrides automatic detection.
474-
"""
475464
series = Series([26.73, 24.255], index=Index([81, 82], name="a"))
476465
target = MultiIndex.from_product(
477466
[[81, 82], [np.nan], ["2018-06-01", "2018-07-01"]], names=["a", "b", "c"]

0 commit comments

Comments
 (0)