pydata
diff --git a/‎doc/user-guide/hierarchical-data.rst‎
Lines changed: 2 additions & 0 deletions b/‎doc/user-guide/hierarchical-data.rst‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎doc/whats-new.rst‎
Lines changed: 14 additions & 0 deletions b/‎doc/whats-new.rst‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎xarray/backends/api.py‎
Lines changed: 16 additions & 8 deletions b/‎xarray/backends/api.py‎
Lines changed: 16 additions & 8 deletions
diff --git a/‎xarray/backends/common.py‎
Lines changed: 20 additions & 2 deletions b/‎xarray/backends/common.py‎
Lines changed: 20 additions & 2 deletions
diff --git a/‎xarray/backends/h5netcdf_.py‎
Lines changed: 4 additions & 0 deletions b/‎xarray/backends/h5netcdf_.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎xarray/backends/netCDF4_.py‎
Lines changed: 4 additions & 0 deletions b/‎xarray/backends/netCDF4_.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎xarray/backends/zarr.py‎
Lines changed: 4 additions & 2 deletions b/‎xarray/backends/zarr.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎xarray/core/coordinates.py‎
Lines changed: 1 addition & 1 deletion b/‎xarray/core/coordinates.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎xarray/core/datatree.py‎
Lines changed: 67 additions & 0 deletions b/‎xarray/core/datatree.py‎
Lines changed: 67 additions & 0 deletions
@@ -453,6 +453,8 @@ The result is a new tree, containing only the nodes matching the condition.
 
 (Yes, under the hood :py:meth:`~xarray.DataTree.filter` is just syntactic sugar for the pattern we showed you in :ref:`iterating over trees` !)
 
+If you want to filter out empty nodes you can use :py:meth:`~xarray.DataTree.prune`.
+
 .. _Tree Contents:
 
 Tree Contents
 
@@ -12,6 +12,9 @@ v2025.07.2 (unreleased)
 
 New Features
 ~~~~~~~~~~~~
+- Added :py:meth:`DataTree.prune` method to remove empty nodes while preserving tree structure.
+  Useful for cleaning up DataTree after time-based filtering operations (:issue:`10590`, :pull:`10598`).
+  By `Alfonso Ladino <https://github.com/aladinor>`_.
 
 - :py:meth:`DataTree.to_netcdf` can now write to a file-like object, or return bytes if called without a filepath. (:issue:`10570`)
   By `Matthew Willson <https://github.com/mjwillson>`_.
@@ -24,6 +27,13 @@ New Features
 Breaking changes
 ~~~~~~~~~~~~~~~~
 
+- When writing to NetCDF files with groups, Xarray no longer redefines dimensions
+  that have the same size in parent groups (:issue:`10241`). This conforms with
+  `CF Conventions for group scrope <https://cfconventions.org/cf-conventions/cf-conventions.html#_scope>`_
+  but may require adjustments for code that consumes NetCDF files produced by
+  Xarray.
+  By `Stephan Hoyer <https://github.com/shoyer>`_.
+
 
 Deprecations
 ~~~~~~~~~~~~
@@ -60,6 +70,10 @@ Bug fixes
 Documentation
 ~~~~~~~~~~~~~
 
+- Clarify lazy behaviour and eager loading for ``chunks=None`` in :py:func:`~xarray.open_dataset`, :py:func:`~xarray.open_dataarray`, :py:func:`~xarray.open_datatree`, :py:func:`~xarray.open_groups` and :py:func:`~xarray.open_zarr` (:issue:`10612`, :pull:`10627`).
+  By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
+
+
 
 Internal Changes
 ~~~~~~~~~~~~~~~~
 
@@ -578,8 +578,10 @@ def open_dataset(
 
         - ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
           engine preferred chunks.
-        - ``chunks=None`` skips using dask, which is generally faster for
-          small arrays.
+        - ``chunks=None`` skips using dask. This uses xarray's internally private
+          :ref:`lazy indexing classes <internal design.lazy indexing>`,
+          but data is eagerly loaded into memory as numpy arrays when accessed.
+          This can be more efficient for smaller arrays or when large arrays are sliced before computation.
         - ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
         - ``chunks={}`` loads the data with dask using the engine's preferred chunk
           size, generally identical to the format's chunk size. If not available, a
@@ -819,8 +821,10 @@ def open_dataarray(
 
         - ``chunks='auto'`` will use dask ``auto`` chunking taking into account the
           engine preferred chunks.
-        - ``chunks=None`` skips using dask, which is generally faster for
-          small arrays.
+        - ``chunks=None`` skips using dask. This uses xarray's internally private
+          :ref:`lazy indexing classes <internal design.lazy indexing>`,
+          but data is eagerly loaded into memory as numpy arrays when accessed.
+          This can be more efficient for smaller arrays, though results may vary.
         - ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
         - ``chunks={}`` loads the data with dask using engine preferred chunks if
           exposed by the backend, otherwise with a single chunk for all arrays.
@@ -1044,8 +1048,10 @@ def open_datatree(
 
         - ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
           engine preferred chunks.
-        - ``chunks=None`` skips using dask, which is generally faster for
-          small arrays.
+        - ``chunks=None`` skips using dask. This uses xarray's internally private
+          :ref:`lazy indexing classes <internal design.lazy indexing>`,
+          but data is eagerly loaded into memory as numpy arrays when accessed.
+          This can be more efficient for smaller arrays, though results may vary.
         - ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
         - ``chunks={}`` loads the data with dask using the engine's preferred chunk
           size, generally identical to the format's chunk size. If not available, a
@@ -1288,8 +1294,10 @@ def open_groups(
 
         - ``chunks="auto"`` will use dask ``auto`` chunking taking into account the
           engine preferred chunks.
-        - ``chunks=None`` skips using dask, which is generally faster for
-          small arrays.
+        - ``chunks=None`` skips using dask. This uses xarray's internally private
+          :ref:`lazy indexing classes <internal design.lazy indexing>`,
+          but data is eagerly loaded into memory as numpy arrays when accessed.
+          This can be more efficient for smaller arrays, though results may vary.
         - ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
         - ``chunks={}`` loads the data with dask using the engine's preferred chunk
           size, generally identical to the format's chunk size. If not available, a
 
@@ -256,6 +256,20 @@ def find_root_and_group(ds):
     return ds, group
 
 
+def collect_ancestor_dimensions(group) -> dict[str, int]:
+    """Returns dimensions defined in parent groups.
+
+    If dimensions are defined in multiple ancestors, use the size of the closest
+    ancestor.
+    """
+    dims = {}
+    while (group := group.parent) is not None:
+        for k, v in group.dimensions.items():
+            if k not in dims:
+                dims[k] = len(v)
+    return dims
+
+
 def datatree_from_dict_with_io_cleanup(groups_dict: Mapping[str, Dataset]) -> DataTree:
     """DataTree.from_dict with file clean-up."""
     try:
@@ -308,6 +322,9 @@ class AbstractDataStore:
     def get_dimensions(self):  # pragma: no cover
         raise NotImplementedError()
 
+    def get_parent_dimensions(self):  # pragma: no cover
+        return {}
+
     def get_attrs(self):  # pragma: no cover
         raise NotImplementedError()
 
@@ -563,21 +580,22 @@ def set_dimensions(self, variables, unlimited_dims=None):
         if unlimited_dims is None:
             unlimited_dims = set()
 
+        parent_dims = self.get_parent_dimensions()
         existing_dims = self.get_dimensions()
 
         dims = {}
         for v in unlimited_dims:  # put unlimited_dims first
             dims[v] = None
         for v in variables.values():
-            dims.update(dict(zip(v.dims, v.shape, strict=True)))
+            dims |= v.sizes
 
         for dim, length in dims.items():
             if dim in existing_dims and length != existing_dims[dim]:
                 raise ValueError(
                     "Unable to update size for existing dimension"
                     f"{dim!r} ({length} != {existing_dims[dim]})"
                 )
-            elif dim not in existing_dims:
+            elif dim not in existing_dims and length != parent_dims.get(dim):
                 is_unlimited = dim in unlimited_dims
                 self.set_dimension(dim, length, is_unlimited)
 
 
@@ -16,6 +16,7 @@
     WritableCFDataStore,
     _normalize_path,
     _open_remote_file,
+    collect_ancestor_dimensions,
     datatree_from_dict_with_io_cleanup,
     find_root_and_group,
 )
@@ -287,6 +288,9 @@ def get_attrs(self):
     def get_dimensions(self):
         return FrozenDict((k, len(v)) for k, v in self.ds.dimensions.items())
 
+    def get_parent_dimensions(self):
+        return FrozenDict(collect_ancestor_dimensions(self.ds))
+
     def get_encoding(self):
         return {
             "unlimited_dims": {
 
@@ -16,6 +16,7 @@
     T_PathFileOrDataStore,
     WritableCFDataStore,
     _normalize_path,
+    collect_ancestor_dimensions,
     datatree_from_dict_with_io_cleanup,
     find_root_and_group,
     robust_getitem,
@@ -518,6 +519,9 @@ def get_attrs(self):
     def get_dimensions(self):
         return FrozenDict((k, len(v)) for k, v in self.ds.dimensions.items())
 
+    def get_parent_dimensions(self):
+        return FrozenDict(collect_ancestor_dimensions(self.ds))
+
     def get_encoding(self):
         return {
             "unlimited_dims": {
 
@@ -1370,8 +1370,10 @@ def open_zarr(
 
         - ``chunks='auto'`` will use dask ``auto`` chunking taking into account the
           engine preferred chunks.
-        - ``chunks=None`` skips using dask, which is generally faster for
-          small arrays.
+        - ``chunks=None`` skips using dask. This uses xarray's internally private
+          :ref:`lazy indexing classes <internal design.lazy indexing>`,
+          but data is eagerly loaded into memory as numpy arrays when accessed.
+          This can be more efficient for smaller arrays, though results may vary.
         - ``chunks=-1`` loads the data with dask using a single chunk for all arrays.
         - ``chunks={}`` loads the data with dask using engine preferred chunks if
           exposed by the backend, otherwise with a single chunk for all arrays.
 
@@ -177,7 +177,7 @@ def to_index(self, ordered_dims: Sequence[Hashable] | None = None) -> pd.Index:
 
                 # compute the cartesian product
                 code_list += [
-                    np.tile(np.repeat(code, repeat_counts[i]), tile_counts[i]).tolist()
+                    np.tile(np.repeat(code, repeat_counts[i]), tile_counts[i])
                     for code in codes
                 ]
                 level_list += levels
 
@@ -1450,6 +1450,73 @@ def filter_like(self, other: DataTree) -> DataTree:
         other_keys = {key for key, _ in other.subtree_with_keys}
         return self.filter(lambda node: node.relative_to(self) in other_keys)
 
+    def prune(self, drop_size_zero_vars: bool = False) -> DataTree:
+        """
+        Remove empty nodes from the tree.
+
+        Returns a new tree containing only nodes that contain data variables with actual data.
+        Intermediate nodes are kept if they are required to support non-empty children.
+
+        Parameters
+        ----------
+        drop_size_zero_vars : bool, default False
+            If True, also considers variables with zero size as empty.
+            If False, keeps nodes with data variables even if they have zero size.
+
+        Returns
+        -------
+        DataTree
+            A new tree with empty nodes removed.
+
+        See Also
+        --------
+        filter
+
+        Examples
+        --------
+        >>> dt = xr.DataTree.from_dict(
+        ...     {
+        ...         "/a": xr.Dataset({"foo": ("x", [1, 2])}),
+        ...         "/b": xr.Dataset({"bar": ("x", [])}),
+        ...         "/c": xr.Dataset(),
+        ...     }
+        ... )
+        >>> dt.prune()  # doctest: +ELLIPSIS,+NORMALIZE_WHITESPACE
+        <xarray.DataTree>
+        Group: /
+        ├── Group: /a
+        │       Dimensions:  (x: 2)
+        │       Dimensions without coordinates: x
+        │       Data variables:
+        │           foo      (x) int64 16B 1 2
+        └── Group: /b
+                Dimensions:  (x: 0)
+                Dimensions without coordinates: x
+                Data variables:
+                    bar      (x) float64 0B...
+
+        The ``drop_size_zero_vars`` parameter controls whether variables
+        with zero size are considered empty:
+
+        >>> dt.prune(drop_size_zero_vars=True)
+        <xarray.DataTree>
+        Group: /
+        └── Group: /a
+                Dimensions:  (x: 2)
+                Dimensions without coordinates: x
+                Data variables:
+                    foo      (x) int64 16B 1 2
+        """
+        non_empty_cond: Callable[[DataTree], bool]
+        if drop_size_zero_vars:
+            non_empty_cond = lambda node: len(node.data_vars) > 0 and any(
+                var.size > 0 for var in node.data_vars.values()
+            )
+        else:
+            non_empty_cond = lambda node: len(node.data_vars) > 0
+
+        return self.filter(non_empty_cond)
+
     def match(self, pattern: str) -> DataTree:
         """
         Return nodes with paths matching pattern.
Original file line number	Diff line number	Diff line change
`@@ -177,7 +177,7 @@ def to_index(self, ordered_dims: Sequence[Hashable] \| None = None) -> pd.Index:`
`177`	`177`
`178`	`178`	`# compute the cartesian product`
`179`	`179`	`code_list += [`
`180`		`- np.tile(np.repeat(code, repeat_counts[i]), tile_counts[i]).tolist()`
	`180`	`+ np.tile(np.repeat(code, repeat_counts[i]), tile_counts[i])`
`181`	`181`	`for code in codes`
`182`	`182`	`]`
`183`	`183`	`level_list += levels`