From 6a2568a0f856bca3384931245b578733e6ce83e3 Mon Sep 17 00:00:00 2001 From: Matias Lindgren Date: Tue, 30 Sep 2025 21:57:42 -0400 Subject: [PATCH 1/5] stricter edge case checking --- pandas/core/frame.py | 4 ++++ pandas/tests/indexing/multiindex/test_multiindex.py | 9 +++++++++ 2 files changed, 13 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 91f5cd1679a61..aa1cbb94099e3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4454,8 +4454,12 @@ def _set_item_frame_value(self, key, value: DataFrame) -> None: cols_droplevel = maybe_droplevels(cols, key) if ( not isinstance(cols_droplevel, MultiIndex) + and cols_droplevel.dtype.type is str and not cols_droplevel.any() ): + # if cols_droplevel contains only empty strings, + # value.reindex(cols_droplevel, axis=1) would be full of NaNs + # see GH#62518 and GH#61841 return if len(cols_droplevel) and not cols_droplevel.equals(value.columns): value = value.reindex(cols_droplevel, axis=1) diff --git a/pandas/tests/indexing/multiindex/test_multiindex.py b/pandas/tests/indexing/multiindex/test_multiindex.py index 2fe8f9affba2d..120ec4c47c46e 100644 --- a/pandas/tests/indexing/multiindex/test_multiindex.py +++ b/pandas/tests/indexing/multiindex/test_multiindex.py @@ -271,3 +271,12 @@ def test_multiindex_assign_aligns_as_implicit_tuple(self): df1["C"] = s1 tm.assert_frame_equal(df1, df2) tm.assert_frame_equal(df1, df3) + + # GH 62518 + meta = DataFrame( + columns=MultiIndex.from_arrays( + [["a", "a", "z", "z"], pd.Categorical([1, 2, 1, 2])], + ), + dtype=object, + ) + meta["z"] = meta["z"].astype("int64") From c6d65bdc2b37be37b1f44e021bb68dd612837257 Mon Sep 17 00:00:00 2001 From: Matias Lindgren Date: Tue, 30 Sep 2025 23:46:28 -0400 Subject: [PATCH 2/5] use consistent naming in test --- pandas/tests/indexing/multiindex/test_multiindex.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_multiindex.py b/pandas/tests/indexing/multiindex/test_multiindex.py index 120ec4c47c46e..42491dae62de4 100644 --- a/pandas/tests/indexing/multiindex/test_multiindex.py +++ b/pandas/tests/indexing/multiindex/test_multiindex.py @@ -273,10 +273,10 @@ def test_multiindex_assign_aligns_as_implicit_tuple(self): tm.assert_frame_equal(df1, df3) # GH 62518 - meta = DataFrame( + df4 = DataFrame( columns=MultiIndex.from_arrays( [["a", "a", "z", "z"], pd.Categorical([1, 2, 1, 2])], ), dtype=object, ) - meta["z"] = meta["z"].astype("int64") + df4["z"] = df4["z"].astype("int64") From 968cdeb6f5b8c67b3f30c429435d2ac623021d54 Mon Sep 17 00:00:00 2001 From: Matias Lindgren Date: Tue, 30 Sep 2025 23:58:24 -0400 Subject: [PATCH 3/5] fix string dtype check --- pandas/core/frame.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index aa1cbb94099e3..694ff60166d43 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -107,6 +107,7 @@ is_list_like, is_scalar, is_sequence, + is_string_dtype, needs_i8_conversion, pandas_dtype, ) @@ -4454,7 +4455,7 @@ def _set_item_frame_value(self, key, value: DataFrame) -> None: cols_droplevel = maybe_droplevels(cols, key) if ( not isinstance(cols_droplevel, MultiIndex) - and cols_droplevel.dtype.type is str + and is_string_dtype(cols_droplevel.dtype) and not cols_droplevel.any() ): # if cols_droplevel contains only empty strings, From c30cf1d09e4f31c297d2d1b429ee0c4e7039aa9e Mon Sep 17 00:00:00 2001 From: Matias Lindgren Date: Wed, 1 Oct 2025 12:22:28 -0400 Subject: [PATCH 4/5] add separate test for #62518 --- pandas/tests/indexing/multiindex/test_multiindex.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_multiindex.py b/pandas/tests/indexing/multiindex/test_multiindex.py index 42491dae62de4..299799efa23a9 100644 --- a/pandas/tests/indexing/multiindex/test_multiindex.py +++ b/pandas/tests/indexing/multiindex/test_multiindex.py @@ -272,11 +272,12 @@ def test_multiindex_assign_aligns_as_implicit_tuple(self): tm.assert_frame_equal(df1, df2) tm.assert_frame_equal(df1, df3) + def test_multiindex_assign_alignment_with_non_string_dtype(self): # GH 62518 - df4 = DataFrame( + meta = DataFrame( columns=MultiIndex.from_arrays( - [["a", "a", "z", "z"], pd.Categorical([1, 2, 1, 2])], + [["a", "a", "z", "z"], pd.Categorical([1, 2, 1, 2])] ), dtype=object, ) - df4["z"] = df4["z"].astype("int64") + meta["z"] = meta["z"].astype("int64") From 5dd2bb472b770fdb96712b1947d5f04748e7eb30 Mon Sep 17 00:00:00 2001 From: Matias Lindgren Date: Wed, 1 Oct 2025 14:07:50 -0400 Subject: [PATCH 5/5] assert result after assignment --- .../indexing/multiindex/test_multiindex.py | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_multiindex.py b/pandas/tests/indexing/multiindex/test_multiindex.py index 299799efa23a9..35ef7625b6e76 100644 --- a/pandas/tests/indexing/multiindex/test_multiindex.py +++ b/pandas/tests/indexing/multiindex/test_multiindex.py @@ -274,10 +274,21 @@ def test_multiindex_assign_aligns_as_implicit_tuple(self): def test_multiindex_assign_alignment_with_non_string_dtype(self): # GH 62518 - meta = DataFrame( - columns=MultiIndex.from_arrays( - [["a", "a", "z", "z"], pd.Categorical([1, 2, 1, 2])] - ), - dtype=object, + columns = MultiIndex.from_arrays( + [["a", "a", "z", "z"], pd.Categorical([1, 2, 1, 2])] ) + + meta = DataFrame(columns=columns, dtype=object) meta["z"] = meta["z"].astype("int64") + + result = DataFrame( + data={ + ("a", 1): Series([], dtype=object), + ("a", 2): Series([], dtype=object), + ("z", 1): Series([], dtype="int64"), + ("z", 2): Series([], dtype="int64"), + }, + columns=columns, + ) + + tm.assert_frame_equal(meta, result)