pandas-dev · rhshadrach · Oct 18, 2025 · Sep 4, 2025 · Sep 4, 2025 · Sep 4, 2025
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -1094,6 +1094,7 @@ Reshaping
 - Bug in :meth:`DataFrame.unstack` producing incorrect results when manipulating empty :class:`DataFrame` with an :class:`ExtentionDtype` (:issue:`59123`)
 - Bug in :meth:`concat` where concatenating DataFrame and Series with ``ignore_index = True`` drops the series name (:issue:`60723`, :issue:`56257`)
 - Bug in :func:`melt` where calling with duplicate column names in ``id_vars`` raised a misleading ``AttributeError`` (:issue:`61475`)
+- Bug in :meth:`DataFrame.merge` where the result of a merge does not contain any metadata or flag information from the inputs to the merge. (:issue:`28283`)
 - Bug in :meth:`DataFrame.merge` where user-provided suffixes could result in duplicate column names if the resulting names matched existing columns. Now raises a :class:`MergeError` in such cases. (:issue:`61402`)
 - Bug in :meth:`DataFrame.merge` with :class:`CategoricalDtype` columns incorrectly raising ``RecursionError`` (:issue:`56376`)
 - Bug in :meth:`DataFrame.merge` with a ``float32`` index incorrectly casting the index to ``float64`` (:issue:`41626`)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -6096,10 +6096,16 @@ def __finalize__(self, other, method: str | None = None, **kwargs) -> Self:
         """
         Propagate metadata from other to self.
 
+        This is the default implementation. Subclasses may override this method to
+        implement their own metadata handling.
+
         Parameters
         ----------
         other : the object from which to get the attributes that we are going
-            to propagate
+            to propagate. If ``other`` has an ``input_objs`` attribute, then
+            this attribute must contain an iterable of objects, each with an
+            ``attrs`` attribute, in which case, each such ``attrs`` instance
+            must be a dictionary that is equal to all of the others.
         method : str, optional
             A passed method name providing context on where ``__finalize__``
             was called.

diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
@@ -1129,12 +1129,17 @@ def _reindex_and_concat(
         return result
 
     def get_result(self) -> DataFrame:
+        """
+        Execute the merge.
+        """
         if self.indicator:
             self.left, self.right = self._indicator_pre_merge(self.left, self.right)
 
         join_index, left_indexer, right_indexer = self._get_join_info()
 
         result = self._reindex_and_concat(join_index, left_indexer, right_indexer)
+
+        # Is this call to __finalize__ really necessary?
         result = result.__finalize__(
             types.SimpleNamespace(input_objs=[self.left, self.right]),
             method=self._merge_type,
@@ -1147,6 +1152,8 @@ def get_result(self) -> DataFrame:
 
         self._maybe_restore_index_levels(result)
 
+        # __finalize is responsible for copying the metadata from the inputs to merge
+        # to the result.
         return result.__finalize__(
             types.SimpleNamespace(input_objs=[self.left, self.right]), method="merge"
         )
@@ -1167,6 +1174,14 @@ def _indicator_name(self) -> str | None:
     def _indicator_pre_merge(
         self, left: DataFrame, right: DataFrame
     ) -> tuple[DataFrame, DataFrame]:
+        """
+        Add one indicator column to each of the left and right inputs to a
+        merge operation.
+
+        These columns are used to produce another column in the output of the
+        merge, indicating for each row of the output whether it was produced
+        using the left, right or both inputs.
+        """
         columns = left.columns.union(right.columns)
 
         for i in ["_left_indicator", "_right_indicator"]:
@@ -1193,6 +1208,12 @@ def _indicator_pre_merge(
 
     @final
     def _indicator_post_merge(self, result: DataFrame) -> DataFrame:
+        """
+        Add an indicator column to the merge result.
+
+        This column indicates for each row of the output whether it was produced using
+        the left, right or both inputs.
+        """
         result["_left_indicator"] = result["_left_indicator"].fillna(0)
         result["_right_indicator"] = result["_right_indicator"].fillna(0)
 

diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py
@@ -1,13 +1,13 @@
-"""
-An exhaustive list of pandas methods exercising NDFrame.__finalize__.
-"""
+"""An exhaustive list of pandas methods exercising NDFrame.__finalize__."""
 
 import operator
 import re
 
 import numpy as np
 import pytest
 
+from pandas._typing import MergeHow
+
 import pandas as pd
 
 # TODO:
@@ -148,14 +148,6 @@
         operator.methodcaller("melt", id_vars=["A"], value_vars=["B"]),
     ),
     (pd.DataFrame, frame_data, operator.methodcaller("map", lambda x: x)),
-    pytest.param(
-        (
-            pd.DataFrame,
-            frame_data,
-            operator.methodcaller("merge", pd.DataFrame({"A": [1]})),
-        ),
-        marks=not_implemented_mark,
-    ),
     (pd.DataFrame, frame_data, operator.methodcaller("round", 2)),
     (pd.DataFrame, frame_data, operator.methodcaller("corr")),
     pytest.param(
@@ -371,8 +363,7 @@ def idfn(x):
     m = xpr.search(str(x))
     if m:
         return m.group(1)
-    else:
-        return str(x)
+    return str(x)
 
 
 @pytest.mark.parametrize("ndframe_method", _all_methods, ids=lambda x: idfn(x[-1]))
@@ -586,7 +577,8 @@ def test_datetime_property(attr):
 
 
 @pytest.mark.parametrize(
-    "attr", ["days", "seconds", "microseconds", "nanoseconds", "components"]
+    "attr",
+    ["days", "seconds", "microseconds", "nanoseconds", "components"],
 )
 def test_timedelta_property(attr):
     s = pd.Series(pd.timedelta_range("2000", periods=4))
@@ -630,7 +622,8 @@ def test_categorical_accessor(method):
 
 
 @pytest.mark.parametrize(
-    "obj", [pd.Series([0, 0]), pd.DataFrame({"A": [0, 1], "B": [1, 2]})]
+    "obj",
+    [pd.Series([0, 0]), pd.DataFrame({"A": [0, 1], "B": [1, 2]})],
 )
 @pytest.mark.parametrize(
     "method",
@@ -649,7 +642,8 @@ def test_groupby_finalize(obj, method):
 
 
 @pytest.mark.parametrize(
-    "obj", [pd.Series([0, 0]), pd.DataFrame({"A": [0, 1], "B": [1, 2]})]
+    "obj",
+    [pd.Series([0, 0]), pd.DataFrame({"A": [0, 1], "B": [1, 2]})],
 )
 @pytest.mark.parametrize(
     "method",
@@ -675,3 +669,154 @@ def test_finalize_frame_series_name():
     df = pd.DataFrame({"name": [1, 2]})
     result = pd.Series([1, 2]).__finalize__(df)
     assert result.name is None
+
+
+# ----------------------------------------------------------------------------
+# Tests for merge
-# Tests for merge
+# Reshaping
-# Tests for merge
+# Reshaping
+
+
+@pytest.mark.parametrize(
+    ["allow_on_left", "allow_on_right"],
+    [(False, False), (False, True), (True, False), (True, True)],
+)
+@pytest.mark.parametrize(
+    "how",
+    [
+        "left",
+        "right",
+        "inner",
+        "outer",
+        "left_anti",
+        "right_anti",
+        "cross",
+    ],
+)
+def test_merge_sets_duplication_allowance_flag(
+    how: MergeHow,
+    allow_on_left: bool,
+    allow_on_right: bool,
+):
+    """Check that DataFrame.merge correctly sets the allow_duplicate_labels flag
+    on its result.
+
+    The flag on the result should be set to true if and only if both arguments
+    to merge have their flags set to True.
+    """
+    # Arrange
+    left = pd.DataFrame({"test": [1]}).set_flags(allows_duplicate_labels=allow_on_left)
+    right = pd.DataFrame({"test": [1]}).set_flags(
+        allows_duplicate_labels=allow_on_right,
+    )
+
+    # Act
+    if not how == "cross":
+        result = left.merge(right, how=how, on="test")
+    else:
+        result = left.merge(right, how=how)
+
+    # Assert
+    expected_duplication_allowance = allow_on_left and allow_on_right
+    assert result.flags.allows_duplicate_labels == expected_duplication_allowance
+
+
+@pytest.mark.parametrize(
+    ["allow_on_left", "allow_on_right"],
+    [(False, False), (False, True), (True, False), (True, True)],
+)
+def test_merge_asof_sets_duplication_allowance_flag(
+    allow_on_left: bool,
+    allow_on_right: bool,
+):
+    """Check that pandas.merge_asof correctly sets the allow_duplicate_labels flag
+    on its result.
+
+    The flag on the result should be set to true if and only if both arguments
+    to merge_asof have their flags set to True.
+    """
+    # Arrange
+    left = pd.DataFrame({"test": [1]}).set_flags(allows_duplicate_labels=allow_on_left)
+    right = pd.DataFrame({"test": [1]}).set_flags(
+        allows_duplicate_labels=allow_on_right,
+    )
+
+    # Act
+    result = pd.merge_asof(left, right)
+
+    # Assert
+    expected_duplication_allowance = allow_on_left and allow_on_right
+    assert result.flags.allows_duplicate_labels == expected_duplication_allowance
+
+
+def test_merge_propagates_metadata_from_equal_input_metadata():
+    """Check that pandas.merge sets the metadata of its result to a deep copy of
+    the metadata from its left input, if the metadata from both inputs are equal.
+    """
+    # Arrange
+    metadata = {"a": 2}
+    left = pd.DataFrame({"test": [1]})
+    left.attrs = metadata
+    right = pd.DataFrame({"test": [1]})
+    right.attrs = metadata.copy()
+
+    # Act
+    result = left.merge(right, how="inner", on="test")
+
+    # Assert
+    assert result.attrs == metadata
+    left.attrs = {"b": 3}
+    assert result.attrs == metadata
+
+
+def test_merge_does_not_propagate_metadata_from_unequal_input_metadata():
+    """Check that the metadata for the result of pandas.merge is empty if the
+    metadata for both inputs to pandas.merge are not equal.
+    """
+    # Arrange
+    left = pd.DataFrame({"test": [1]})
+    left.attrs = {"a": 2}
+    right = pd.DataFrame({"test": [1]})
+    right.attrs = {"b": 3}
+
+    # Act
+    result = left.merge(right, how="inner", on="test")
+
+    # Assert
+    assert result.attrs == {}
+
+
+no_metadata = pd.DataFrame({"test": [1]})
+
+has_metadata = pd.DataFrame({"test": [1]})
+has_metadata.attrs = {"a": 2}
+
+
+@pytest.mark.parametrize(
+    ["left", "right", "expected"],
+    [
+        (no_metadata, has_metadata, {}),
+        (has_metadata, no_metadata, {}),
+        (no_metadata, no_metadata, {}),
+    ],
+    ids=["left-empty", "right-empty", "both-empty"],
+)
+def test_merge_does_not_propagate_metadata_if_one_input_has_no_metadata(
+    left: pd.DataFrame,
+    right: pd.DataFrame,
+    expected: dict,
+):
+    """Check that if the metadata for one input to pandas.merge is empty, the result
+    of merge has the same metadata as the other input.
+
+    (empty)         (A)      (A)         (empty)    (empty)       (empty)
+       |             |        |             |          |             |
+        --> merge <--          --> merge <--            --> merge <--
+              |                      |                        |
+           (empty)                (empty)                  (empty)
+    """
+    # Arrange
+
+    # Act
+    result = left.merge(right, how="inner", on="test")
+
+    # Assert
+    assert result.attrs == expected
diff --git a/pandas/tests/generic/test_frame.py b/pandas/tests/generic/test_frame.py
@@ -1,5 +1,6 @@
 from copy import deepcopy
 from operator import methodcaller
+from typing import Literal
 
 import numpy as np
 import pytest
@@ -77,7 +78,12 @@ def test_metadata_propagation_indiv(self, monkeypatch):
         # merging with override
         # GH 6923
 
-        def finalize(self, other, method=None, **kwargs):
+        def finalize(
+            self: DataFrame,
+            other: DataFrame,
+            method: Literal["merge", "concat"] | None = None,
+            **kwargs,
+        ):
             for name in self._metadata:
                 if method == "merge":
                     left, right = other.input_objs