Skip to content

Commit 54809ed

Browse files
committed
#28283 Initiall commit
1 parent 1da0d02 commit 54809ed

File tree

3 files changed

+103
-7
lines changed

3 files changed

+103
-7
lines changed

pandas/core/frame.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11110,7 +11110,7 @@ def merge(
1111011110

1111111111
from pandas.core.reshape.merge import merge
1111211112

11113-
return merge(
11113+
result = merge(
1111411114
self,
1111511115
right,
1111611116
how=how,
@@ -11124,6 +11124,8 @@ def merge(
1112411124
indicator=indicator,
1112511125
validate=validate,
1112611126
)
11127+
# ADDED: Apply __finalize__ to propagate metadata from left DataFrame
11128+
return result.__finalize__(self, method="merge")
1112711129

1112811130
def round(
1112911131
self, decimals: int | dict[IndexLabel, int] | Series = 0, *args, **kwargs
@@ -11211,6 +11213,18 @@ def round(
1121111213
1 0.0 1.0
1121211214
2 0.7 0.0
1121311215
3 0.2 0.0
11216+
11217+
>>> df1 = pd.DataFrame({"key": [1, 2], "A": [1, 2]})
11218+
>>> df2 = pd.DataFrame({"key": [1, 2], "B": [3, 4]})
11219+
>>> df1.attrs["source"] = "dataset1"
11220+
>>> result = df1.merge(df2, on="key")
11221+
>>> result.attrs["source"] # Metadata is preserved
11222+
'dataset1'
11223+
11224+
Note
11225+
----
11226+
The merge operation propagates metadata (attrs, flags) from the left DataFrame
11227+
to the result using the __finalize__ method.
1121411228
"""
1121511229
from pandas.core.reshape.concat import concat
1121611230

pandas/core/reshape/merge.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,7 @@ def merge(
370370
left._check_copy_deprecation(copy)
371371
right_df = _validate_operand(right)
372372
if how == "cross":
373-
return _cross_merge(
373+
result = _cross_merge(
374374
left_df,
375375
right_df,
376376
on=on,
@@ -398,7 +398,14 @@ def merge(
398398
indicator=indicator,
399399
validate=validate,
400400
)
401-
return op.get_result()
401+
result = op.get_result()
402+
403+
# ADDED: Apply __finalize__ to propagate metadata
404+
# Use left DataFrame as the primary source for metadata
405+
if hasattr(left, "__finalize__"):
406+
result = result.__finalize__(left, method="merge")
407+
408+
return result
402409

403410

404411
def _cross_merge(
@@ -927,7 +934,12 @@ def merge_asof(
927934
allow_exact_matches=allow_exact_matches,
928935
direction=direction,
929936
)
930-
return op.get_result()
937+
result = op.get_result()
938+
# ADDED: Apply __finalize__ to propagate metadata
939+
if hasattr(left, "__finalize__"):
940+
result = result.__finalize__(left, method="merge_asof")
941+
942+
return result
931943

932944

933945
# TODO: transformations??
@@ -1143,7 +1155,9 @@ def get_result(self) -> DataFrame:
11431155

11441156
self._maybe_restore_index_levels(result)
11451157

1146-
return result.__finalize__(self, method="merge")
1158+
# NOTE: __finalize__ is now called in the higher-level merge functions
1159+
# rather than here, to ensure it's called consistently across all entry points
1160+
return result
11471161

11481162
@final
11491163
@cache_readonly

pandas/tests/generic/test_finalize.py

Lines changed: 70 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# TODO:
1414
# * Binary methods (mul, div, etc.)
1515
# * Binary outputs (align, etc.)
16-
# * top-level methods (concat, merge, get_dummies, etc.)
16+
# * top-level methods (concat, get_dummies, etc.)
1717
# * window
1818
# * cumulative reductions
1919

@@ -154,7 +154,7 @@
154154
frame_data,
155155
operator.methodcaller("merge", pd.DataFrame({"A": [1]})),
156156
),
157-
marks=not_implemented_mark,
157+
# marks=not_implemented_mark,
158158
),
159159
(pd.DataFrame, frame_data, operator.methodcaller("round", 2)),
160160
(pd.DataFrame, frame_data, operator.methodcaller("corr")),
@@ -675,3 +675,71 @@ def test_finalize_frame_series_name():
675675
df = pd.DataFrame({"name": [1, 2]})
676676
result = pd.Series([1, 2]).__finalize__(df)
677677
assert result.name is None
678+
679+
680+
def test_merge_finalize():
681+
"""Test that DataFrame.merge calls __finalize__."""
682+
# Create test DataFrames
683+
df1 = pd.DataFrame({"key": [1, 2, 3], "A": [1, 2, 3]})
684+
df2 = pd.DataFrame({"key": [1, 2, 4], "B": [4, 5, 6]})
685+
686+
# Add metadata
687+
df1.attrs["source"] = "left"
688+
df1.attrs["version"] = "1.0"
689+
690+
# Test different merge types
691+
for how in ["inner", "outer", "left", "right"]:
692+
result = df1.merge(df2, on="key", how=how)
693+
694+
# Check that attrs were propagated from left DataFrame
695+
assert result.attrs["source"] == "left"
696+
assert result.attrs["version"] == "1.0"
697+
698+
699+
def test_merge_asof_finalize():
700+
"""Test that merge_asof calls __finalize__."""
701+
df1 = pd.DataFrame({"time": [1, 2, 3], "A": [1, 2, 3]})
702+
df2 = pd.DataFrame({"time": [1, 2, 4], "B": [4, 5, 6]})
703+
704+
df1.attrs["source"] = "quotes"
705+
706+
result = pd.merge_asof(df1, df2, on="time")
707+
708+
# Check that attrs were propagated
709+
assert result.attrs["source"] == "quotes"
710+
711+
712+
def test_merge_index_finalize():
713+
"""Test that index-based merge calls __finalize__."""
714+
df1 = pd.DataFrame({"A": [1, 2]}, index=[1, 2])
715+
df2 = pd.DataFrame({"B": [3, 4]}, index=[1, 2])
716+
717+
df1.attrs["index_merge"] = True
718+
719+
result = df1.merge(df2, left_index=True, right_index=True)
720+
721+
assert result.attrs["index_merge"] is True
722+
723+
724+
def test_merge_suffixes_finalize():
725+
"""Test merge with suffixes calls __finalize__."""
726+
df1 = pd.DataFrame({"key": [1, 2], "value": [1, 2]})
727+
df2 = pd.DataFrame({"key": [1, 2], "value": [3, 4]})
728+
729+
df1.attrs["has_suffixes"] = True
730+
731+
result = df1.merge(df2, on="key", suffixes=("_left", "_right"))
732+
733+
assert result.attrs["has_suffixes"] is True
734+
735+
736+
def test_merge_series_finalize():
737+
"""Test that merging with a Series calls __finalize__."""
738+
df = pd.DataFrame({"key": [1, 2, 3], "A": [1, 2, 3]})
739+
s = pd.Series([4, 5, 6], index=[1, 2, 3], name="B")
740+
741+
df.attrs["merged_with_series"] = True
742+
743+
result = df.merge(s, left_on="key", right_index=True)
744+
745+
assert result.attrs["merged_with_series"] is True

0 commit comments

Comments
 (0)