diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 81d370766ab..571e35ec105 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -50,6 +50,8 @@ Bug fixes By `Deepak Cherian `_. - Fix detection of the ``h5netcdf`` backend. Xarray now selects ``h5netcdf`` if the default ``netCDF4`` engine is not available (:issue:`10401`, :pull:`10557`). By `Scott Staniewicz `_. +- Fix :py:func:`merge` to prevent altering original object depending on join value (:pull:`10596`) + By `Julia Signell `_. - Ensure ``unlimited_dims`` passed to :py:meth:`xarray.DataArray.to_netcdf`, :py:meth:`xarray.Dataset.to_netcdf` or :py:meth:`xarray.DataTree.to_netcdf` only contains dimensions present in the object; raise ``ValueError`` otherwise (:issue:`10549`, :pull:`10608`). By `Kai Mühlbauer `_. diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 06d7218fe7c..d754f58ce4f 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -970,7 +970,6 @@ def _replace( data = copy.copy(self.data) if attrs is _default: attrs = copy.copy(self._attrs) - if encoding is _default: encoding = copy.copy(self._encoding) return type(self)(dims, data, attrs, encoding, fastpath=True) diff --git a/xarray/structure/merge.py b/xarray/structure/merge.py index 5ae70e4524f..58168ddb024 100644 --- a/xarray/structure/merge.py +++ b/xarray/structure/merge.py @@ -276,6 +276,7 @@ def merge_collected( if index is not None: merged_indexes[name] = index else: + attrs: dict[Any, Any] = {} indexed_elements = [ (variable, index) for variable, index in elements_list @@ -306,13 +307,7 @@ def merge_collected( [var.attrs for var, _ in indexed_elements], combine_attrs=combine_attrs, ) - if variable.attrs or attrs: - # Make a shallow copy to so that assigning merged_vars[name].attrs - # does not affect the original input variable. - merged_vars[name] = variable.copy(deep=False) - merged_vars[name].attrs = attrs - else: - merged_vars[name] = variable + merged_vars[name] = variable merged_indexes[name] = index else: variables = [variable for variable, _ in elements_list] @@ -342,10 +337,15 @@ def merge_collected( raise if name in merged_vars: - merged_vars[name].attrs = merge_attrs( + attrs = merge_attrs( [var.attrs for var in variables], combine_attrs=combine_attrs ) + if name in merged_vars and (merged_vars[name].attrs or attrs): + # Ensure that assigning attrs does not affect the original input variable. + merged_vars[name] = merged_vars[name].copy(deep=False) + merged_vars[name].attrs = attrs + return merged_vars, merged_indexes diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index b6c8338494c..8ae05fbb261 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -365,13 +365,16 @@ def test_merge(self): with pytest.raises(ValueError, match=r"should be coordinates or not"): data.merge(data.reset_coords()) - def test_merge_drop_attrs(self): + @pytest.mark.parametrize( + "join", ["outer", "inner", "left", "right", "exact", "override"] + ) + def test_merge_drop_attrs(self, join): data = create_test_data() ds1 = data[["var1"]] ds2 = data[["var3"]] ds1.coords["dim2"].attrs["keep me"] = "example" ds2.coords["numbers"].attrs["foo"] = "bar" - actual = ds1.merge(ds2, combine_attrs="drop") + actual = ds1.merge(ds2, combine_attrs="drop", join=join) assert actual.coords["dim2"].attrs == {} assert actual.coords["numbers"].attrs == {} assert ds1.coords["dim2"].attrs["keep me"] == "example"