Skip to content

Issue #28283, Finalize coverage for DataFrame.merge #61701

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -11110,7 +11110,7 @@ def merge(

from pandas.core.reshape.merge import merge

return merge(
result = merge(
self,
right,
how=how,
Expand All @@ -11124,6 +11124,8 @@ def merge(
indicator=indicator,
validate=validate,
)
# ADDED: Apply __finalize__ to propagate metadata from left DataFrame
return result.__finalize__(self, method="merge")

def round(
self, decimals: int | dict[IndexLabel, int] | Series = 0, *args, **kwargs
Expand Down Expand Up @@ -11211,6 +11213,18 @@ def round(
1 0.0 1.0
2 0.7 0.0
3 0.2 0.0

>>> df1 = pd.DataFrame({"key": [1, 2], "A": [1, 2]})
>>> df2 = pd.DataFrame({"key": [1, 2], "B": [3, 4]})
>>> df1.attrs["source"] = "dataset1"
>>> result = df1.merge(df2, on="key")
>>> result.attrs["source"] # Metadata is preserved
'dataset1'

Note
----
The merge operation propagates metadata (attrs, flags) from the left DataFrame
to the result using the __finalize__ method.
"""
from pandas.core.reshape.concat import concat

Expand Down
22 changes: 18 additions & 4 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@ def merge(
left._check_copy_deprecation(copy)
right_df = _validate_operand(right)
if how == "cross":
return _cross_merge(
result = _cross_merge(
left_df,
right_df,
on=on,
Expand Down Expand Up @@ -398,7 +398,14 @@ def merge(
indicator=indicator,
validate=validate,
)
return op.get_result()
result = op.get_result()

# ADDED: Apply __finalize__ to propagate metadata
# Use left DataFrame as the primary source for metadata
if hasattr(left, "__finalize__"):
result = result.__finalize__(left, method="merge")

return result


def _cross_merge(
Expand Down Expand Up @@ -927,7 +934,12 @@ def merge_asof(
allow_exact_matches=allow_exact_matches,
direction=direction,
)
return op.get_result()
result = op.get_result()
# ADDED: Apply __finalize__ to propagate metadata
if hasattr(left, "__finalize__"):
result = result.__finalize__(left, method="merge_asof")

return result


# TODO: transformations??
Expand Down Expand Up @@ -1143,7 +1155,9 @@ def get_result(self) -> DataFrame:

self._maybe_restore_index_levels(result)

return result.__finalize__(self, method="merge")
# NOTE: __finalize__ is now called in the higher-level merge functions
# rather than here, to ensure it's called consistently across all entry points
return result

@final
@cache_readonly
Expand Down
72 changes: 70 additions & 2 deletions pandas/tests/generic/test_finalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# TODO:
# * Binary methods (mul, div, etc.)
# * Binary outputs (align, etc.)
# * top-level methods (concat, merge, get_dummies, etc.)
# * top-level methods (concat, get_dummies, etc.)
# * window
# * cumulative reductions

Expand Down Expand Up @@ -154,7 +154,7 @@
frame_data,
operator.methodcaller("merge", pd.DataFrame({"A": [1]})),
),
marks=not_implemented_mark,
# marks=not_implemented_mark,
),
(pd.DataFrame, frame_data, operator.methodcaller("round", 2)),
(pd.DataFrame, frame_data, operator.methodcaller("corr")),
Expand Down Expand Up @@ -675,3 +675,71 @@ def test_finalize_frame_series_name():
df = pd.DataFrame({"name": [1, 2]})
result = pd.Series([1, 2]).__finalize__(df)
assert result.name is None


def test_merge_finalize():
"""Test that DataFrame.merge calls __finalize__."""
# Create test DataFrames
df1 = pd.DataFrame({"key": [1, 2, 3], "A": [1, 2, 3]})
df2 = pd.DataFrame({"key": [1, 2, 4], "B": [4, 5, 6]})

# Add metadata
df1.attrs["source"] = "left"
df1.attrs["version"] = "1.0"

# Test different merge types
for how in ["inner", "outer", "left", "right"]:
result = df1.merge(df2, on="key", how=how)

# Check that attrs were propagated from left DataFrame
assert result.attrs["source"] == "left"
assert result.attrs["version"] == "1.0"


def test_merge_asof_finalize():
"""Test that merge_asof calls __finalize__."""
df1 = pd.DataFrame({"time": [1, 2, 3], "A": [1, 2, 3]})
df2 = pd.DataFrame({"time": [1, 2, 4], "B": [4, 5, 6]})

df1.attrs["source"] = "quotes"

result = pd.merge_asof(df1, df2, on="time")

# Check that attrs were propagated
assert result.attrs["source"] == "quotes"


def test_merge_index_finalize():
"""Test that index-based merge calls __finalize__."""
df1 = pd.DataFrame({"A": [1, 2]}, index=[1, 2])
df2 = pd.DataFrame({"B": [3, 4]}, index=[1, 2])

df1.attrs["index_merge"] = True

result = df1.merge(df2, left_index=True, right_index=True)

assert result.attrs["index_merge"] is True


def test_merge_suffixes_finalize():
"""Test merge with suffixes calls __finalize__."""
df1 = pd.DataFrame({"key": [1, 2], "value": [1, 2]})
df2 = pd.DataFrame({"key": [1, 2], "value": [3, 4]})

df1.attrs["has_suffixes"] = True

result = df1.merge(df2, on="key", suffixes=("_left", "_right"))

assert result.attrs["has_suffixes"] is True


def test_merge_series_finalize():
"""Test that merging with a Series calls __finalize__."""
df = pd.DataFrame({"key": [1, 2, 3], "A": [1, 2, 3]})
s = pd.Series([4, 5, 6], index=[1, 2, 3], name="B")

df.attrs["merged_with_series"] = True

result = df.merge(s, left_on="key", right_index=True)

assert result.attrs["merged_with_series"] is True
Loading