Skip to content

Commit 11ebac8

Browse files
committed
Added test to check metadata handling for pandas.merge.
1 parent 58b3c2a commit 11ebac8

File tree

4 files changed

+60
-12
lines changed

4 files changed

+60
-12
lines changed

pandas/core/generic.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6096,7 +6096,10 @@ def __finalize__(self, other, method: str | None = None, **kwargs) -> Self:
60966096
Parameters
60976097
----------
60986098
other : the object from which to get the attributes that we are going
6099-
to propagate
6099+
to propagate. If ``other`` has an ``input_objs`` attribute, then this attribute
6100+
must contain an iterable of objects, each with an ``attrs`` attribute, in which
6101+
case, each such ``attrs`` instance must be a dictionary that is equal to all of
6102+
the others.
61006103
method : str, optional
61016104
A passed method name providing context on where ``__finalize__``
61026105
was called.

pandas/core/reshape/merge.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1151,7 +1151,8 @@ def get_result(self) -> DataFrame:
11511151
self._maybe_restore_index_levels(result)
11521152

11531153
return result.__finalize__(
1154-
self.left, method="merge"
1154+
types.SimpleNamespace(input_objs=[self.left, self.right]),
1155+
method="merge"
11551156
)
11561157

11571158
@final

pandas/tests/generic/test_finalize.py

Lines changed: 52 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,6 @@
148148
operator.methodcaller("melt", id_vars=["A"], value_vars=["B"]),
149149
),
150150
(pd.DataFrame, frame_data, operator.methodcaller("map", lambda x: x)),
151-
(pd.DataFrame, frame_data, operator.methodcaller("merge", pd.DataFrame({"A": [1]}))),
152151
(pd.DataFrame, frame_data, operator.methodcaller("round", 2)),
153152
(pd.DataFrame, frame_data, operator.methodcaller("corr")),
154153
pytest.param(
@@ -736,24 +735,68 @@ def test_merge_asof_sets_duplication_allowance_flag(allow_duplication_on_left, a
736735
expected_duplication_allowance = allow_duplication_on_left and allow_duplication_on_right
737736
assert result.flags.allows_duplicate_labels == expected_duplication_allowance
738737

739-
def test_merge_collects_metadata_from_only_its_left_input():
738+
def test_merge_propagates_metadata_from_equal_input_metadata():
740739
"""
741-
Check that pandas.merge sets the metadata of its result to a copy of the metadata from its
742-
left input.
740+
Check that pandas.merge sets the metadata of its result to a deep copy of the metadata from
741+
its left input, if the metadata from both inputs are equal.
743742
"""
744743
# Arrange
745-
left = pd.DataFrame({"test": [1]})
746744
metadata = {"a": 2}
745+
left = pd.DataFrame({"test": [1]})
747746
left.attrs = metadata
747+
right = pd.DataFrame({"test": [1]})
748+
right.attrs = metadata.copy()
748749

750+
# Act
751+
result = left.merge(right, how="inner", on="test")
752+
753+
# Assert
754+
assert result.attrs == metadata
755+
left.attrs = {"b": 3}
756+
assert result.attrs == metadata
757+
758+
def test_merge_does_not_propagate_metadata_from_unequal_input_metadata():
759+
"""
760+
Check that the metadata for the result of pandas.merge is empty if the metadata
761+
for both inputs to pandas.merge are not equal.
762+
"""
763+
# Arrange
764+
left = pd.DataFrame({"test": [1]})
765+
left.attrs = {"a": 2}
749766
right = pd.DataFrame({"test": [1]})
750767
right.attrs = {"b": 3}
751768

752769
# Act
753770
result = left.merge(right, how="inner", on="test")
754771

755772
# Assert
756-
assert result.attrs == metadata
757-
# Check that the metadata from the left argument is copied, rather than shared.
758-
left.attrs = {"c": 4}
759-
assert result.attrs == metadata
773+
assert result.attrs == {}
774+
775+
no_metadata = pd.DataFrame({"test": [1]})
776+
777+
metadata = {"a": 2}
778+
has_metadata = pd.DataFrame({"test": [1]})
779+
has_metadata.attrs = metadata
780+
781+
@pytest.mark.parametrize(["left", "right", "expected"],
782+
[(no_metadata, has_metadata, metadata),
783+
(has_metadata, no_metadata, metadata),
784+
(no_metadata, no_metadata, {})])
785+
def test_merge_propagates_metadata_if_one_input_has_no_metadata(left: pd.DataFrame, right: pd.DataFrame, expected: dict):
786+
"""
787+
Check that if the metadata for one input to pandas.merge is empty, the result
788+
of merge has the same metadata as the other input.
789+
790+
(empty) (A) (A) (empty) (empty) (empty)
791+
| | | | | |
792+
--> merge <-- --> merge <-- --> merge <--
793+
| | |
794+
(A) (A) (empty)
795+
"""
796+
# Arrange
797+
798+
# Act
799+
result = left.merge(right, how="inner", on="test")
800+
801+
# Assert
802+
assert result.attrs == expected

pandas/tests/generic/test_frame.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from copy import deepcopy
22
from operator import methodcaller
3+
from typing import Literal
34

45
import numpy as np
56
import pytest
@@ -77,7 +78,7 @@ def test_metadata_propagation_indiv(self, monkeypatch):
7778
# merging with override
7879
# GH 6923
7980

80-
def finalize(self, other, method=None, **kwargs):
81+
def finalize(self: DataFrame, other: DataFrame, method: Literal["merge", "concat"] | None = None, **kwargs):
8182
for name in self._metadata:
8283
if method == "merge":
8384
left, right = other.input_objs

0 commit comments

Comments
 (0)