Skip to content
4 changes: 4 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ Deprecations
Bug Fixes
~~~~~~~~~

- :py:meth:`Dataset.map` now merges attrs from the function result and the original
using the ``drop_conflicts`` strategy when ``keep_attrs=True``, preserving attrs
set by the function (:issue:`11019`, :pull:`11020`).
By `Maximilian Roos <https://github.com/max-sixty>`_.
- Ensure that ``keep_attrs='drop'`` and ``keep_attrs=False`` remove attrs from result, even when there is
only one xarray object given to ``apply_ufunc`` (:issue:`10982` :pull:`10997`).
By `Julia Signell <https://github.com/jsignell>`_.
Expand Down
24 changes: 22 additions & 2 deletions xarray/computation/weighted.py
Original file line number Diff line number Diff line change
Expand Up @@ -544,13 +544,33 @@ def _implementation(self, func, dim, **kwargs) -> DataArray:

dataset = self.obj._to_temp_dataset()
dataset = dataset.map(func, dim=dim, **kwargs)
return self.obj._from_temp_dataset(dataset)
result = self.obj._from_temp_dataset(dataset)

# Clear attrs when keep_attrs is explicitly False
# (weighted operations can propagate attrs from weights through internal computations)
if kwargs.get("keep_attrs") is False:
result.attrs = {}
for var in result.coords.values():
var.attrs = {}

return result


class DatasetWeighted(Weighted["Dataset"]):
def _implementation(self, func, dim, **kwargs) -> Dataset:
self._check_dim(dim)
return self.obj.map(func, dim=dim, **kwargs)
result = self.obj.map(func, dim=dim, **kwargs)

# Clear attrs when keep_attrs is explicitly False
# (weighted operations can propagate attrs from weights through internal computations)
if kwargs.get("keep_attrs") is False:
result.attrs = {}
for var in result.data_vars.values():
var.attrs = {}
for var in result.coords.values():
var.attrs = {}

return result


def _inject_docstring(cls, cls_name):
Expand Down
23 changes: 14 additions & 9 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -6908,8 +6908,10 @@ def map(
DataArray.
keep_attrs : bool or None, optional
If True, both the dataset's and variables' attributes (`attrs`) will be
copied from the original objects to the new ones. If False, the new dataset
and variables will be returned without copying the attributes.
combined from the original objects and the function results using the
``drop_conflicts`` strategy: matching attrs are kept, conflicting attrs
are dropped. If False, the new dataset and variables will have only
the attributes set by the function.
args : iterable, optional
Positional arguments passed on to `func`.
**kwargs : Any
Expand Down Expand Up @@ -6958,16 +6960,19 @@ def map(
coords = Coordinates._construct_direct(coords=coord_vars, indexes=indexes)

if keep_attrs:
# Merge attrs from function result and original, dropping conflicts
from xarray.structure.merge import merge_attrs

for k, v in variables.items():
v._copy_attrs_from(self.data_vars[k])
v.attrs = merge_attrs(
[v.attrs, self.data_vars[k].attrs], "drop_conflicts"
)
for k, v in coords.items():
if k in self.coords:
v._copy_attrs_from(self.coords[k])
else:
for v in variables.values():
v.attrs = {}
for v in coords.values():
v.attrs = {}
v.attrs = merge_attrs(
[v.attrs, self.coords[k].attrs], "drop_conflicts"
)
# When keep_attrs=False, leave attrs as the function returned them
Comment on lines 6960 to +6975
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem with interpreting keep_attrs=False as "leave the attrs as returned by the function" is that that means we don't have keep_attrs="drop" anymore.

I'd argue that keep_attrs=True should be closer to what you're proposing for keep_attrs=False, which I do think would be more intuitive.

So instead we may need to consider supporting keep_attrs with strategy names / a strategy function, like apply_ufunc does. That would still allow you to choose "drop_conflicts" if preferred (or maybe as the default? Not sure), while not changing behavior too drastically.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, the proposed code treats keep_attrs=False as "remove all the input attrs". but not "remove all the output attrs".

@keewis can you see a reasonable change to fix the immediate issue without adding a whole strategy to keep_attrs? I don't have a particularly strong view on this specific implementation, but it does seem reasonable / logical, and it does let us solve this immediate bug...

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(zooming out — as I mentioned before, for me the best "blank-slate" implementation for keep_attrs is to mostly not have a the option at all, and folks can drop attrs if they want. though I agree with you that merging is case that neither approach handles well...)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the proposed code treats keep_attrs=False as "remove all the input attrs". but not "remove all the output attrs".

keep_attrs as a boolean is always ambiguous. For instance in #10997 I arrived at the conclusion that keep_attrs=False should drop all attrs from the output.

It seems like it wouldn't be too hard to make this more configurable by just passing any string that is passed to keep_args along to merge_attrs like @keewis is suggesting. You would just map True to "override" and False to "drop" (I think that's what the behavior is now on main) and then people who want something else can use a specific string.


attrs = self.attrs if keep_attrs else None
return type(self)(variables, coords=coords, attrs=attrs)
Expand Down
13 changes: 10 additions & 3 deletions xarray/core/datatree.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,8 +397,10 @@ def map( # type: ignore[override]
DataArray.
keep_attrs : bool | None, optional
If True, both the dataset's and variables' attributes (`attrs`) will be
copied from the original objects to the new ones. If False, the new dataset
and variables will be returned without copying the attributes.
combined from the original objects and the function results using the
``drop_conflicts`` strategy: matching attrs are kept, conflicting attrs
are dropped. If False, the new dataset and variables will have only
the attributes set by the function.
args : iterable, optional
Positional arguments passed on to `func`.
**kwargs : Any
Expand Down Expand Up @@ -438,8 +440,13 @@ def map( # type: ignore[override]
for k, v in self.data_vars.items()
}
if keep_attrs:
# Merge attrs from function result and original, dropping conflicts
from xarray.structure.merge import merge_attrs

for k, v in variables.items():
v._copy_attrs_from(self.data_vars[k])
v.attrs = merge_attrs(
[v.attrs, self.data_vars[k].attrs], "drop_conflicts"
)
attrs = self.attrs if keep_attrs else None
# return type(self)(variables, attrs=attrs)
return Dataset(variables, attrs=attrs)
Expand Down
29 changes: 29 additions & 0 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -6509,6 +6509,35 @@ def mixed_func(x):
expected = xr.Dataset({"foo": 42, "bar": ("y", [4, 5])})
assert_identical(result, expected)

def test_map_preserves_function_attrs(self) -> None:
# Regression test for GH11019
# Attrs added by function should be preserved in result
ds = xr.Dataset({"test": ("x", [1, 2, 3], {"original": "value"})})

def add_attr(da):
return da.assign_attrs(new_attr="foobar")

# With keep_attrs=True: merge using drop_conflicts (no conflict here)
result = ds.map(add_attr, keep_attrs=True)
assert result["test"].attrs == {"original": "value", "new_attr": "foobar"}

# With keep_attrs=False: function's attrs preserved
result = ds.map(add_attr, keep_attrs=False)
assert result["test"].attrs == {"original": "value", "new_attr": "foobar"}

# When function modifies existing attr with keep_attrs=True, conflict is dropped
def modify_attr(da):
return da.assign_attrs(original="modified", extra="added")

result = ds.map(modify_attr, keep_attrs=True)
assert result["test"].attrs == {
"extra": "added"
} # "original" dropped due to conflict

# When function modifies existing attr with keep_attrs=False, function wins
result = ds.map(modify_attr, keep_attrs=False)
assert result["test"].attrs == {"original": "modified", "extra": "added"}

def test_apply_pending_deprecated_map(self) -> None:
data = create_test_data()
data.attrs["foo"] = "bar"
Expand Down
18 changes: 18 additions & 0 deletions xarray/tests/test_weighted.py
Original file line number Diff line number Diff line change
Expand Up @@ -786,6 +786,24 @@ def test_weighted_mean_keep_attrs_ds():
assert data.coords["dim_1"].attrs == result.coords["dim_1"].attrs


@pytest.mark.parametrize("as_dataset", (True, False))
def test_weighted_operations_drop_coord_attrs(as_dataset):
# Test that coord attrs are cleared when keep_attrs=False
weights = DataArray(np.random.randn(2))
ds = Dataset(
{"a": (["dim_0", "dim_1"], np.random.randn(2, 2), {"attr": "data"})},
coords={"dim_1": ("dim_1", ["a", "b"], {"coord_attr": "value"})},
)

data: DataArray | Dataset = ds if as_dataset else ds["a"]

result = data.weighted(weights).mean(dim="dim_0", keep_attrs=False)

# All attrs should be cleared
assert result.attrs == {}
assert result.coords["dim_1"].attrs == {}


@pytest.mark.parametrize("operation", ("sum_of_weights", "sum", "mean", "quantile"))
@pytest.mark.parametrize("as_dataset", (True, False))
def test_weighted_bad_dim(operation, as_dataset):
Expand Down
Loading