diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 5cb879620cb..85da88f5339 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1463,7 +1463,7 @@ def open_mfdataset( "netcdf4" over "h5netcdf" over "scipy" (customizable via ``netcdf_engine_order`` in ``xarray.set_options()``). A custom backend class (a subclass of ``BackendEntrypoint``) can also be used. - data_vars : {"minimal", "different", "all"} or list of str, default: "all" + data_vars : {"minimal", "different", "all", None} or list of str, default: "all" These data variables will be concatenated together: * "minimal": Only data variables in which the dimension already appears are included. @@ -1473,9 +1473,12 @@ class (a subclass of ``BackendEntrypoint``) can also be used. load the data payload of data variables into memory if they are not already loaded. * "all": All data variables will be concatenated. + * None: Means ``"all"`` if ``concat_dim`` is not present in any of + the ``objs``, and ``"minimal"`` if ``concat_dim`` is present + in any of ``objs``. * list of str: The listed data variables will be concatenated, in addition to the "minimal" data variables. - coords : {"minimal", "different", "all"} or list of str, optional + coords : {"minimal", "different", "all"} or list of str, default: "different" These coordinate variables will be concatenated together: * "minimal": Only coordinates in which the dimension already appears are included. diff --git a/xarray/structure/combine.py b/xarray/structure/combine.py index b5546e6daf0..c29e6aa1dbf 100644 --- a/xarray/structure/combine.py +++ b/xarray/structure/combine.py @@ -509,7 +509,7 @@ def combine_nested( Must be the same length as the depth of the list passed to ``datasets``. compat : {"identical", "equals", "broadcast_equals", \ - "no_conflicts", "override"}, optional + "no_conflicts", "override"}, default: "no_conflicts" String indicating how to compare variables of the same name for potential merge conflicts: @@ -522,7 +522,7 @@ def combine_nested( must be equal. The returned dataset then contains the combination of all non-null values. - "override": skip comparing and pick variable from first dataset - data_vars : {"minimal", "different", "all" or list of str}, optional + data_vars : {"minimal", "different", "all", None} or list of str, default: "all" These data variables will be concatenated together: * "minimal": Only data variables in which the dimension already appears are included. @@ -532,15 +532,16 @@ def combine_nested( load the data payload of data variables into memory if they are not already loaded. * "all": All data variables will be concatenated. - * None: Means ``"all"`` if ``dim`` is not present in any of the ``objs``, - and ``"minimal"`` if ``dim`` is present in any of ``objs``. + * None: Means ``"all"`` if ``concat_dim`` is not present in any of + the ``objs``, and ``"minimal"`` if ``concat_dim`` is present + in any of ``objs``. * list of dims: The listed data variables will be concatenated, in addition to the "minimal" data variables. - coords : {"minimal", "different", "all" or list of str}, optional + coords : {"minimal", "different", "all"} or list of str, default: "different" These coordinate variables will be concatenated together: - * "minimal": Only coordinates in which the dimension already appears - are included. If concatenating over a dimension _not_ + * "minimal": Only coordinates in which the dimension already + appears are included. If concatenating over a dimension _not_ present in any of the objects, then all data variables will be concatenated along that new dimension. * "different": Coordinates which are not equal (ignoring attributes) @@ -557,7 +558,7 @@ def combine_nested( Value to use for newly missing values. If a dict-like, maps variable names to fill values. Use a data array's name to refer to its values. - join : {"outer", "inner", "left", "right", "exact"}, optional + join : {"outer", "inner", "left", "right", "exact"}, default: "outer" String indicating how to combine differing indexes (excluding concat_dim) in objects @@ -836,7 +837,8 @@ def combine_by_coords( data_objects : Iterable of Datasets or DataArrays Data objects to combine. - compat : {"identical", "equals", "broadcast_equals", "no_conflicts", "override"}, optional + compat : {"identical", "equals", "broadcast_equals", "no_conflicts", "override"}, \ + default: "no_conflicts" String indicating how to compare variables of the same name for potential conflicts: @@ -850,9 +852,8 @@ def combine_by_coords( of all non-null values. - "override": skip comparing and pick variable from first dataset - data_vars : {"minimal", "different", "all" or list of str}, optional + data_vars : {"minimal", "different", "all", None} or list of str, default: "all" These data variables will be concatenated together: - - "minimal": Only data variables in which the dimension already appears are included. - "different": Data variables which are not equal (ignoring @@ -861,18 +862,32 @@ def combine_by_coords( load the data payload of data variables into memory if they are not already loaded. - "all": All data variables will be concatenated. + - None: Means ``"all"`` if ``concat_dim`` is not present in any of + the ``objs``, and ``"minimal"`` if ``concat_dim`` is present + in any of ``objs``. - list of str: The listed data variables will be concatenated, in addition to the "minimal" data variables. - - If objects are DataArrays, `data_vars` must be "all". - coords : {"minimal", "different", "all"} or list of str, optional - As per the "data_vars" kwarg, but for coordinate variables. + coords : {"minimal", "different", "all"} or list of str, default: "different" + These coordinate variables will be concatenated together: + - "minimal": Only coordinates in which the dimension already + appears are included. If concatenating over a dimension _not_ + present in any of the objects, then all data variables will + be concatenated along that new dimension. + - "different": Coordinates which are not equal (ignoring attributes) + across all datasets are also concatenated (as well as all for which + dimension already appears). Beware: this option may load the data + payload of coordinate variables into memory if they are not already + loaded. + - "all": All coordinate variables will be concatenated, except + those corresponding to other dimensions. + - list of Hashable: The listed coordinate variables will be concatenated, + in addition to the "minimal" coordinates. fill_value : scalar or dict-like, optional Value to use for newly missing values. If a dict-like, maps variable names to fill values. Use a data array's name to refer to its values. If None, raises a ValueError if the passed Datasets do not create a complete hypercube. - join : {"outer", "inner", "left", "right", "exact"}, optional + join : {"outer", "inner", "left", "right", "exact"}, default: "outer" String indicating how to combine differing indexes in objects - "outer": use the union of object indexes diff --git a/xarray/structure/concat.py b/xarray/structure/concat.py index 69b05880e3d..774993efefa 100644 --- a/xarray/structure/concat.py +++ b/xarray/structure/concat.py @@ -114,7 +114,7 @@ def concat( unchanged. If dimension is provided as a Variable, DataArray or Index, its name is used as the dimension to concatenate along and the values are added as a coordinate. - data_vars : {"minimal", "different", "all", None} or list of Hashable, optional + data_vars : {"minimal", "different", "all", None} or list of Hashable, default: "all" These data variables will be concatenated together: * "minimal": Only data variables in which the dimension already appears are included. @@ -129,8 +129,8 @@ def concat( * list of dims: The listed data variables will be concatenated, in addition to the "minimal" data variables. - If objects are DataArrays, data_vars must be "all". - coords : {"minimal", "different", "all"} or list of Hashable, optional + If objects are DataArrays, data_vars must be "all" or None. + coords : {"minimal", "different", "all"} or list of Hashable, default: "different" These coordinate variables will be concatenated together: * "minimal": Only coordinates in which the dimension already appears are included. @@ -143,7 +143,8 @@ def concat( those corresponding to other dimensions. * list of Hashable: The listed coordinate variables will be concatenated, in addition to the "minimal" coordinates. - compat : {"identical", "equals", "broadcast_equals", "no_conflicts", "override"}, optional + compat : {"identical", "equals", "broadcast_equals", "no_conflicts", "override"}, \ + default: "equals" String indicating how to compare non-concatenated variables of the same name for potential conflicts. This is passed down to merge. @@ -164,7 +165,7 @@ def concat( Value to use for newly missing values. If a dict-like, maps variable names to fill values. Use a data array's name to refer to its values. - join : {"outer", "inner", "left", "right", "exact"}, optional + join : {"outer", "inner", "left", "right", "exact"}, default: "outer" String indicating how to combine differing indexes (excluding dim) in objects diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index 5207ee3316e..bc98d72d50c 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -979,8 +979,12 @@ def test_concat_do_not_promote(self) -> None: Dataset({"y": ("t", [1])}, {"x": 1, "t": [0]}), Dataset({"y": ("t", [2])}, {"x": 2, "t": [0]}), ] - with pytest.raises(ValueError): - concat(objs, "t", coords="minimal") + with set_options(use_new_combine_kwarg_defaults=False): + with pytest.raises(ValueError): + concat(objs, "t", coords="minimal") + with set_options(use_new_combine_kwarg_defaults=True): + with pytest.raises(ValueError): + concat(objs, "t", compat="equals") def test_concat_dim_is_variable(self) -> None: objs = [Dataset({"x": 0}), Dataset({"x": 1})] @@ -1664,8 +1668,17 @@ def test_concat_datatree_along_existing_dim_defaults(self): FutureWarning, match="will change from data_vars='all' to data_vars=None" ): actual = concat([dt1, dt2], dim="x") + assert actual.identical(expected) + with set_options(use_new_combine_kwarg_defaults=True): + expected = DataTree.from_dict( + data={"/a": ("x", [1, 2]), "/b": 3}, coords={"/x": [0, 1]} + ) + actual = concat([dt1, dt2], dim="x") + + assert actual.identical(expected) + def test_concat_datatree_isomorphic_error(self): dt1 = DataTree.from_dict(data={"/data": ("x", [1]), "/a": None}) dt2 = DataTree.from_dict(data={"/data": ("x", [2]), "/b": None}) diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 2d103994410..f00e945d0fe 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -471,7 +471,16 @@ def test_concat_loads_variables(self): assert isinstance(out["d"].data, dask.array.Array) assert isinstance(out["c"].data, dask.array.Array) - out = xr.concat([ds1, ds2, ds3], dim="n", data_vars=[], coords=[]) + with xr.set_options(use_new_combine_kwarg_defaults=True): + out = xr.concat([ds1, ds2, ds3], dim="n", data_vars=[], coords=[]) + # no extra kernel calls + assert kernel_call_count == 6 + assert isinstance(out["d"].data, dask.array.Array) + assert isinstance(out["c"].data, dask.array.Array) + + out = xr.concat( + [ds1, ds2, ds3], dim="n", data_vars=[], coords=[], compat="equals" + ) # variables are loaded once as we are validating that they're identical assert kernel_call_count == 12 assert isinstance(out["d"].data, np.ndarray) diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index 68db0babb04..de24b378539 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -528,7 +528,7 @@ def test_merge_coordinates(self): def test_merge_error(self): ds = xr.Dataset({"x": 0}) with pytest.raises(xr.MergeError): - xr.merge([ds, ds + 1]) + xr.merge([ds, ds + 1], compat="no_conflicts") def test_merge_alignment_error(self): ds = xr.Dataset(coords={"x": [1, 2]}) @@ -624,7 +624,7 @@ def test_merge(self): assert_identical(data, actual) with pytest.raises(ValueError, match="conflicting values for variable"): - ds1.merge(ds2.rename({"var3": "var1"})) + ds1.merge(ds2.rename({"var3": "var1"}), compat="no_conflicts") with pytest.raises(ValueError, match=r"should be coordinates or not"): data.reset_coords().merge(data) with pytest.raises(ValueError, match=r"should be coordinates or not"): @@ -948,7 +948,7 @@ def test_merge_error_includes_path(self) -> None: "Raised whilst mapping function over node(s) with path 'a'" ), ): - xr.merge([tree1, tree2], join="exact") + xr.merge([tree1, tree2], join="exact", compat="no_conflicts") def test_fill_value_errors(self) -> None: trees = [xr.DataTree(), xr.DataTree()] diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index 56b3d9ad22b..42bf3649202 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -785,7 +785,7 @@ def test_combine_by_coords(variant, unit, error, dtype): if error is not None: with pytest.raises(error): - xr.combine_by_coords([ds, other]) + xr.combine_by_coords([ds, other], coords="different", compat="no_conflicts") return @@ -825,12 +825,6 @@ def test_combine_by_coords(variant, unit, error, dtype): "coords", ), ) -@pytest.mark.filterwarnings( - "ignore:.*the default value for join will change:FutureWarning" -) -@pytest.mark.filterwarnings( - "ignore:.*the default value for compat will change:FutureWarning" -) def test_combine_nested(variant, unit, error, dtype): original_unit = unit_registry.m @@ -890,7 +884,7 @@ def test_combine_nested(variant, unit, error, dtype): }, ) - func = function(xr.combine_nested, concat_dim=["x", "y"]) + func = function(xr.combine_nested, concat_dim=["x", "y"], join="outer") if error is not None: with pytest.raises(error): func([[ds1, ds2], [ds3, ds4]]) @@ -1071,12 +1065,6 @@ def test_concat_dataset(variant, unit, error, dtype): "coords", ), ) -@pytest.mark.filterwarnings( - "ignore:.*the default value for join will change:FutureWarning" -) -@pytest.mark.filterwarnings( - "ignore:.*the default value for compat will change:FutureWarning" -) def test_merge_dataarray(variant, unit, error, dtype): original_unit = unit_registry.m @@ -1128,9 +1116,10 @@ def test_merge_dataarray(variant, unit, error, dtype): dims=("y", "z"), ) + func = function(xr.merge, compat="no_conflicts", join="outer") if error is not None: with pytest.raises(error): - xr.merge([arr1, arr2, arr3]) + func([arr1, arr2, arr3]) return @@ -1146,13 +1135,13 @@ def test_merge_dataarray(variant, unit, error, dtype): convert_and_strip = lambda arr: strip_units(convert_units(arr, units)) expected = attach_units( - xr.merge( + func( [convert_and_strip(arr1), convert_and_strip(arr2), convert_and_strip(arr3)] ), units, ) - actual = xr.merge([arr1, arr2, arr3]) + actual = func([arr1, arr2, arr3]) assert_units_equal(expected, actual) assert_allclose(expected, actual) @@ -1181,12 +1170,6 @@ def test_merge_dataarray(variant, unit, error, dtype): "coords", ), ) -@pytest.mark.filterwarnings( - "ignore:.*the default value for join will change:FutureWarning" -) -@pytest.mark.filterwarnings( - "ignore:.*the default value for compat will change:FutureWarning" -) def test_merge_dataset(variant, unit, error, dtype): original_unit = unit_registry.m @@ -1235,7 +1218,7 @@ def test_merge_dataset(variant, unit, error, dtype): }, ) - func = function(xr.merge) + func = function(xr.merge, compat="no_conflicts", join="outer") if error is not None: with pytest.raises(error): func([ds1, ds2, ds3]) @@ -5607,9 +5590,6 @@ def test_content_manipulation(self, func, variant, dtype): "coords", ), ) - @pytest.mark.filterwarnings( - "ignore:.*the default value for join will change:FutureWarning" - ) @pytest.mark.filterwarnings( "ignore:.*the default value for compat will change:FutureWarning" ) @@ -5651,13 +5631,15 @@ def test_merge(self, variant, unit, error, dtype): if error is not None: with pytest.raises(error): - left.merge(right) + left.merge(right, compat="no_conflicts", join="outer") return converted = convert_units(right, units) - expected = attach_units(strip_units(left).merge(strip_units(converted)), units) - actual = left.merge(right) + expected = attach_units( + strip_units(left).merge(strip_units(converted), join="outer"), units + ) + actual = left.merge(right, join="outer") assert_units_equal(expected, actual) assert_equal(expected, actual)