From e99fc360545f917066f70659b7b742ee9df63a90 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Fri, 1 Aug 2025 10:40:40 -0400 Subject: [PATCH 1/4] Demonstrate how merge can alter original object depending on join value --- xarray/tests/test_merge.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index b6c8338494c..8ae05fbb261 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -365,13 +365,16 @@ def test_merge(self): with pytest.raises(ValueError, match=r"should be coordinates or not"): data.merge(data.reset_coords()) - def test_merge_drop_attrs(self): + @pytest.mark.parametrize( + "join", ["outer", "inner", "left", "right", "exact", "override"] + ) + def test_merge_drop_attrs(self, join): data = create_test_data() ds1 = data[["var1"]] ds2 = data[["var3"]] ds1.coords["dim2"].attrs["keep me"] = "example" ds2.coords["numbers"].attrs["foo"] = "bar" - actual = ds1.merge(ds2, combine_attrs="drop") + actual = ds1.merge(ds2, combine_attrs="drop", join=join) assert actual.coords["dim2"].attrs == {} assert actual.coords["numbers"].attrs == {} assert ds1.coords["dim2"].attrs["keep me"] == "example" From bae8626d7fcd4bfa03f11991e23a0feccd3d2aaa Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Fri, 1 Aug 2025 14:49:24 -0400 Subject: [PATCH 2/4] Make variable copy logic agnostic to join value --- xarray/core/variable.py | 1 - xarray/structure/merge.py | 16 ++++++++-------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 06d7218fe7c..d754f58ce4f 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -970,7 +970,6 @@ def _replace( data = copy.copy(self.data) if attrs is _default: attrs = copy.copy(self._attrs) - if encoding is _default: encoding = copy.copy(self._encoding) return type(self)(dims, data, attrs, encoding, fastpath=True) diff --git a/xarray/structure/merge.py b/xarray/structure/merge.py index 5ae70e4524f..8b4efe65552 100644 --- a/xarray/structure/merge.py +++ b/xarray/structure/merge.py @@ -276,6 +276,7 @@ def merge_collected( if index is not None: merged_indexes[name] = index else: + attrs = None indexed_elements = [ (variable, index) for variable, index in elements_list @@ -306,13 +307,7 @@ def merge_collected( [var.attrs for var, _ in indexed_elements], combine_attrs=combine_attrs, ) - if variable.attrs or attrs: - # Make a shallow copy to so that assigning merged_vars[name].attrs - # does not affect the original input variable. - merged_vars[name] = variable.copy(deep=False) - merged_vars[name].attrs = attrs - else: - merged_vars[name] = variable + merged_vars[name] = variable merged_indexes[name] = index else: variables = [variable for variable, _ in elements_list] @@ -342,10 +337,15 @@ def merge_collected( raise if name in merged_vars: - merged_vars[name].attrs = merge_attrs( + attrs = merge_attrs( [var.attrs for var in variables], combine_attrs=combine_attrs ) + if name in merged_vars and (merged_vars[name].attrs or attrs): + # Ensure that assigning attrs does not affect the original input variable. + merged_vars[name] = merged_vars[name].copy(deep=False) + merged_vars[name].attrs = attrs + return merged_vars, merged_indexes From c4255f5e102a0c710143da4049212d65bf96f4ec Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Fri, 1 Aug 2025 15:12:44 -0400 Subject: [PATCH 3/4] Make attrs an empty dict to start --- xarray/structure/merge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/structure/merge.py b/xarray/structure/merge.py index 8b4efe65552..58168ddb024 100644 --- a/xarray/structure/merge.py +++ b/xarray/structure/merge.py @@ -276,7 +276,7 @@ def merge_collected( if index is not None: merged_indexes[name] = index else: - attrs = None + attrs: dict[Any, Any] = {} indexed_elements = [ (variable, index) for variable, index in elements_list From b79a10b90c2ea59d5b0d32797490498329eac3a6 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Mon, 4 Aug 2025 10:19:20 -0400 Subject: [PATCH 4/4] Update what's new --- doc/whats-new.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 06a3c2cb22d..7848bce9e8c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -46,6 +46,8 @@ Bug fixes By `Deepak Cherian `_. - Fix detection of the ``h5netcdf`` backend. Xarray now selects ``h5netcdf`` if the default ``netCDF4`` engine is not available (:issue:`10401`, :pull:`10557`). By `Scott Staniewicz `_. +- Fix ``ds.merge`` to prevent altering original object depending on join value (:pull:`10596`) + By `Julia Signell `_. Documentation ~~~~~~~~~~~~~