Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
742a268
Activated test for metadata of merge operation.
aijams Sep 4, 2025
a12bdbd
Fixed error in test for merge result flags.
aijams Sep 4, 2025
d22b90a
Tested that merge collects metadata from only its left argument.
aijams Sep 4, 2025
41b3571
Added test to check whether merge_asof correctly sets the allow dupli…
aijams Sep 4, 2025
58b3c2a
Added bug fix description to documentation.
aijams Sep 5, 2025
11ebac8
Added test to check metadata handling for pandas.merge.
aijams Sep 9, 2025
24f4c8d
Added identifiers to test cases for pandas.merge.
aijams Sep 9, 2025
4381364
Modified tests for __finalize__ to respect documented merge behavior.
aijams Sep 11, 2025
bba9a13
Added type annotations to test method parameters.
aijams Sep 17, 2025
da1b0f4
Merge remote-tracking branch 'upstream/main' into aijams-dataframe-me…
aijams Sep 17, 2025
0d52fff
Fixed type issue for test_finalize. Added a little documentation.
aijams Sep 18, 2025
9c7b9ed
Merge remote-tracking branch 'upstream/main' into aijams-dataframe-me…
aijams Sep 18, 2025
9f68134
Ran ruff formatter to correct some issues.
aijams Sep 18, 2025
ff1aba5
Fixed some cosmetic issues with pre-commit hooks.
aijams Sep 19, 2025
8ece859
Merge remote-tracking branch 'upstream/main' into aijams-dataframe-me…
aijams Sep 19, 2025
6d216fe
Merge remote-tracking branch 'upstream/main' into aijams-dataframe-me…
aijams Oct 6, 2025
9b51d3e
Removed docstrings on tests.
aijams Oct 6, 2025
f2abf1f
Resolved several issues in test_finalize.
aijams Oct 7, 2025
eca7671
Merge remote-tracking branch 'upstream/main' into aijams-dataframe-me…
aijams Oct 7, 2025
dddc031
Fixed a couple nitpicks.
aijams Oct 7, 2025
7304a48
Merge remote-tracking branch 'upstream/main' into aijams-dataframe-me…
aijams Oct 8, 2025
15adcd7
Reformatted argument lists as required by ruff.
aijams Oct 8, 2025
1a8602d
Added note and removed potentially confusing docs from __fianlize__.
aijams Oct 8, 2025
c5f31ac
Merge remote-tracking branch 'upstream/main' into aijams-dataframe-me…
aijams Oct 9, 2025
d9b52f0
Removed trailing commas.
aijams Oct 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6102,10 +6102,15 @@ def __finalize__(self, other, method: str | None = None, **kwargs) -> Self:
"""
Propagate metadata from other to self.

This is the default implementation. Subclasses may override this method to
implement their own metadata handling.

Parameters
----------
other : the object from which to get the attributes that we are going
to propagate
to propagate. If ``other`` has an ``input_objs`` attribute, then
this attribute must contain an iterable of objects, each with an
``attrs`` attribute.
method : str, optional
A passed method name providing context on where ``__finalize__``
was called.
Expand All @@ -6114,6 +6119,12 @@ def __finalize__(self, other, method: str | None = None, **kwargs) -> Self:

The value passed as `method` are not currently considered
stable across pandas releases.

Notes
-----
In case ``other`` has an ``input_objs`` attribute, this method only
propagates its metadata if each object in ``input_objs`` has the exact
same metadata as the others.
"""
if isinstance(other, NDFrame):
if other.attrs:
Expand Down
20 changes: 16 additions & 4 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1129,16 +1129,15 @@ def _reindex_and_concat(
return result

def get_result(self) -> DataFrame:
"""
Execute the merge.
"""
if self.indicator:
self.left, self.right = self._indicator_pre_merge(self.left, self.right)

join_index, left_indexer, right_indexer = self._get_join_info()

result = self._reindex_and_concat(join_index, left_indexer, right_indexer)
result = result.__finalize__(
types.SimpleNamespace(input_objs=[self.left, self.right]),
method=self._merge_type,
)

if self.indicator:
result = self._indicator_post_merge(result)
Expand Down Expand Up @@ -1167,6 +1166,13 @@ def _indicator_name(self) -> str | None:
def _indicator_pre_merge(
self, left: DataFrame, right: DataFrame
) -> tuple[DataFrame, DataFrame]:
"""
Add one indicator column to each of the left and right inputs.

These columns are used to produce another column in the output of the
merge, indicating for each row of the output whether it was produced
using the left, right or both inputs.
"""
columns = left.columns.union(right.columns)

for i in ["_left_indicator", "_right_indicator"]:
Expand All @@ -1193,6 +1199,12 @@ def _indicator_pre_merge(

@final
def _indicator_post_merge(self, result: DataFrame) -> DataFrame:
"""
Add an indicator column to the merge result.

This column indicates for each row of the output whether it was produced using
the left, right or both inputs.
"""
result["_left_indicator"] = result["_left_indicator"].fillna(0)
result["_right_indicator"] = result["_right_indicator"].fillna(0)

Expand Down
130 changes: 122 additions & 8 deletions pandas/tests/generic/test_finalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@
An exhaustive list of pandas methods exercising NDFrame.__finalize__.
"""

from copy import deepcopy
import operator
import re

import numpy as np
import pytest

from pandas._typing import MergeHow

import pandas as pd

# TODO:
Expand Down Expand Up @@ -148,14 +151,6 @@
operator.methodcaller("melt", id_vars=["A"], value_vars=["B"]),
),
(pd.DataFrame, frame_data, operator.methodcaller("map", lambda x: x)),
pytest.param(
(
pd.DataFrame,
frame_data,
operator.methodcaller("merge", pd.DataFrame({"A": [1]})),
),
marks=not_implemented_mark,
),
(pd.DataFrame, frame_data, operator.methodcaller("round", 2)),
(pd.DataFrame, frame_data, operator.methodcaller("corr")),
pytest.param(
Expand Down Expand Up @@ -675,3 +670,122 @@ def test_finalize_frame_series_name():
df = pd.DataFrame({"name": [1, 2]})
result = pd.Series([1, 2]).__finalize__(df)
assert result.name is None


# ----------------------------------------------------------------------------
# Merge


@pytest.mark.parametrize(
["allow_on_left", "allow_on_right"],
[(False, False), (False, True), (True, False), (True, True)],
)
@pytest.mark.parametrize(
"how",
[
"left",
"right",
"inner",
"outer",
"left_anti",
"right_anti",
"cross",
],
)
def test_merge_correctly_sets_duplication_allowance_flag(
how: MergeHow,
allow_on_left: bool,
allow_on_right: bool,
):
left = pd.DataFrame({"test": [1]}).set_flags(allows_duplicate_labels=allow_on_left)
right = pd.DataFrame({"test": [1]}).set_flags(
allows_duplicate_labels=allow_on_right,
)

if not how == "cross":
result = left.merge(right, how=how, on="test")
else:
result = left.merge(right, how=how)

expected_duplication_allowance = allow_on_left and allow_on_right
assert result.flags.allows_duplicate_labels == expected_duplication_allowance


@pytest.mark.parametrize(
["allow_on_left", "allow_on_right"],
[(False, False), (False, True), (True, False), (True, True)],
)
def test_merge_asof_correctly_sets_duplication_allowance_flag(
allow_on_left: bool,
allow_on_right: bool,
):
left = pd.DataFrame({"test": [1]}).set_flags(allows_duplicate_labels=allow_on_left)
right = pd.DataFrame({"test": [1]}).set_flags(
allows_duplicate_labels=allow_on_right,
)

result = pd.merge_asof(left, right)

expected_duplication_allowance = allow_on_left and allow_on_right
assert result.flags.allows_duplicate_labels == expected_duplication_allowance


def test_merge_propagates_metadata_from_equal_input_metadata():
metadata = {"a": [1, 2]}
left = pd.DataFrame({"test": [1]})
left.attrs = metadata
right = pd.DataFrame({"test": [1]})
right.attrs = deepcopy(metadata)

result = left.merge(right, how="inner", on="test")

assert result.attrs == metadata

# Verify that merge deep-copies the attr dictionary.
assert result.attrs is not left.attrs
assert result.attrs is not right.attrs
assert result.attrs["a"] is not left.attrs["a"]
assert result.attrs["a"] is not right.attrs["a"]


def test_merge_does_not_propagate_metadata_from_unequal_input_metadata():
left = pd.DataFrame({"test": [1]})
left.attrs = {"a": 2}
right = pd.DataFrame({"test": [1]})
right.attrs = {"b": 3}

result = left.merge(right, how="inner", on="test")

assert result.attrs == {}


@pytest.mark.parametrize(
["left_has_metadata", "right_has_metadata", "expected"],
[
(False, True, {}),
(True, False, {}),
(False, False, {}),
],
ids=["left-empty", "right-empty", "both-empty"],
)
def test_merge_does_not_propagate_metadata_if_one_input_has_no_metadata(
left_has_metadata: bool,
right_has_metadata: bool,
expected: dict,
):
left = pd.DataFrame({"test": [1]})
right = pd.DataFrame({"test": [1]})

if left_has_metadata:
left.attrs = {"a": [1, 2]}
else:
left.attrs = {}

if right_has_metadata:
right.attrs = {"a": [1, 2]}
else:
right.attrs = {}

result = left.merge(right, how="inner", on="test")

assert result.attrs == expected
8 changes: 7 additions & 1 deletion pandas/tests/generic/test_frame.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from copy import deepcopy
from operator import methodcaller
from typing import Literal

import numpy as np
import pytest
Expand Down Expand Up @@ -77,7 +78,12 @@ def test_metadata_propagation_indiv(self, monkeypatch):
# merging with override
# GH 6923

def finalize(self, other, method=None, **kwargs):
def finalize(
self: DataFrame,
other: DataFrame,
method: Literal["merge", "concat"] | None = None,
**kwargs,
):
for name in self._metadata:
if method == "merge":
left, right = other.input_objs
Expand Down
Loading