Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -847,6 +847,7 @@ Reshaping
- Bug in :meth:`DataFrame.stack` with the new implementation where ``ValueError`` is raised when ``level=[]`` (:issue:`60740`)
- Bug in :meth:`DataFrame.unstack` producing incorrect results when manipulating empty :class:`DataFrame` with an :class:`ExtentionDtype` (:issue:`59123`)
- Bug in :meth:`concat` where concatenating DataFrame and Series with ``ignore_index = True`` drops the series name (:issue:`60723`, :issue:`56257`)
- Bug in :meth:`DataFrame.merge` where user-provided suffixes could result in duplicate column names if the resulting names matched existing columns. Now raises a :class:`MergeError` in such cases. (:issue:`61402`)

Sparse
^^^^^^
Expand Down
7 changes: 5 additions & 2 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -3062,13 +3062,16 @@ def renamer(x, suffix: str | None):
if not llabels.is_unique:
# Only warn when duplicates are caused because of suffixes, already duplicated
# columns in origin should not warn
dups = llabels[(llabels.duplicated()) & (~left.duplicated())].tolist()
dups.extend(llabels[(llabels.duplicated()) & (~left.duplicated())].tolist())
if not rlabels.is_unique:
dups.extend(rlabels[(rlabels.duplicated()) & (~right.duplicated())].tolist())
# Suffix addition creates duplicate to pre-existing column name
dups.extend(llabels.intersection(right.difference(to_rename)).tolist())
dups.extend(rlabels.intersection(left.difference(to_rename)).tolist())
if dups:
raise MergeError(
f"Passing 'suffixes' which cause duplicate columns {set(dups)} is "
f"not allowed.",
"not allowed.",
)

return llabels, rlabels
9 changes: 9 additions & 0 deletions pandas/tests/reshape/merge/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -3060,3 +3060,12 @@ def test_merge_on_all_nan_column():
{"x": [1, 2, 3], "y": [np.nan, np.nan, np.nan], "z": [4, 5, 6], "zz": [4, 5, 6]}
)
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize("suffixes", [("_dup", ""), ("", "_dup")])
def test_merge_for_suffix_collisions(suffixes):
# GH#61402
df1 = DataFrame({"col1": [1], "col2": [2]})
df2 = DataFrame({"col1": [1], "col2": [2], "col2_dup": [3]})
with pytest.raises(MergeError, match="duplicate columns"):
merge(df1, df2, on="col1", suffixes=suffixes)
Loading