Skip to content

Commit ef180b8

Browse files
aladinorTomNicholaspre-commit-ci[bot]
authored
Add DataTree.prune() method … (#10598)
* Add DataTree.is_data_empty property and .prune() method │ │ │ │ - Add is_data_empty property to check if node contains data variables with actual data │ │ - Add prune() method to remove empty nodes while preserving tree structure │ │ - Include comprehensive tests covering basic pruning, intermediate nodes, and filtering scenarios │ │ - Useful for cleaning up DataTree after time-based filtering operations * documenting changes in whats-new.rst file * removing blank lines * removing new property instead using data_vars and fixing corresponding test * removing .is_empty_data entry * updating github url * fixing test accordingly * fixing doctest * fixing doctest * fixing doctest * replacing doctest * replacing doctest * removing empty line * removing empty line * Update xarray/core/datatree.py Co-authored-by: Tom Nicholas <[email protected]> * Update xarray/core/datatree.py Co-authored-by: Tom Nicholas <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * improving doctests * fixing typo * refactoring test accodingly to Tom's suggestion * fixing test_prune_after_filtering * refactoring test to use assert_identical * refactoring test to use assert)_equal * adding reference to .prune method in Subsetting Tree Nodes * adding # doctest: +NORMALIZE_WHITESPACE to avoid error with trailing space * Fix doctest trailing space issue in prune method * trial 2 fixing trailing space * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * normalizing whitespace for doctest * normalizing whitespace and adding ellipsis for doctest * normalizing whitespace and adding ellipsis for doctest * normalizing whitespace and adding ellipsis for doctest --------- Co-authored-by: Tom Nicholas <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 6762c2e commit ef180b8

File tree

4 files changed

+151
-0
lines changed

4 files changed

+151
-0
lines changed

doc/user-guide/hierarchical-data.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -453,6 +453,8 @@ The result is a new tree, containing only the nodes matching the condition.
453453

454454
(Yes, under the hood :py:meth:`~xarray.DataTree.filter` is just syntactic sugar for the pattern we showed you in :ref:`iterating over trees` !)
455455

456+
If you want to filter out empty nodes you can use :py:meth:`~xarray.DataTree.prune`.
457+
456458
.. _Tree Contents:
457459

458460
Tree Contents

doc/whats-new.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ v2025.07.2 (unreleased)
1212

1313
New Features
1414
~~~~~~~~~~~~
15+
- Added :py:meth:`DataTree.prune` method to remove empty nodes while preserving tree structure.
16+
Useful for cleaning up DataTree after time-based filtering operations (:issue:`10590`, :pull:`10598`).
17+
By `Alfonso Ladino <https://github.com/aladinor>`_.
1518

1619
- :py:meth:`DataTree.to_netcdf` can now write to a file-like object, or return bytes if called without a filepath. (:issue:`10570`)
1720
By `Matthew Willson <https://github.com/mjwillson>`_.

xarray/core/datatree.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1450,6 +1450,73 @@ def filter_like(self, other: DataTree) -> DataTree:
14501450
other_keys = {key for key, _ in other.subtree_with_keys}
14511451
return self.filter(lambda node: node.relative_to(self) in other_keys)
14521452

1453+
def prune(self, drop_size_zero_vars: bool = False) -> DataTree:
1454+
"""
1455+
Remove empty nodes from the tree.
1456+
1457+
Returns a new tree containing only nodes that contain data variables with actual data.
1458+
Intermediate nodes are kept if they are required to support non-empty children.
1459+
1460+
Parameters
1461+
----------
1462+
drop_size_zero_vars : bool, default False
1463+
If True, also considers variables with zero size as empty.
1464+
If False, keeps nodes with data variables even if they have zero size.
1465+
1466+
Returns
1467+
-------
1468+
DataTree
1469+
A new tree with empty nodes removed.
1470+
1471+
See Also
1472+
--------
1473+
filter
1474+
1475+
Examples
1476+
--------
1477+
>>> dt = xr.DataTree.from_dict(
1478+
... {
1479+
... "/a": xr.Dataset({"foo": ("x", [1, 2])}),
1480+
... "/b": xr.Dataset({"bar": ("x", [])}),
1481+
... "/c": xr.Dataset(),
1482+
... }
1483+
... )
1484+
>>> dt.prune() # doctest: +ELLIPSIS,+NORMALIZE_WHITESPACE
1485+
<xarray.DataTree>
1486+
Group: /
1487+
├── Group: /a
1488+
│ Dimensions: (x: 2)
1489+
│ Dimensions without coordinates: x
1490+
│ Data variables:
1491+
│ foo (x) int64 16B 1 2
1492+
└── Group: /b
1493+
Dimensions: (x: 0)
1494+
Dimensions without coordinates: x
1495+
Data variables:
1496+
bar (x) float64 0B...
1497+
1498+
The ``drop_size_zero_vars`` parameter controls whether variables
1499+
with zero size are considered empty:
1500+
1501+
>>> dt.prune(drop_size_zero_vars=True)
1502+
<xarray.DataTree>
1503+
Group: /
1504+
└── Group: /a
1505+
Dimensions: (x: 2)
1506+
Dimensions without coordinates: x
1507+
Data variables:
1508+
foo (x) int64 16B 1 2
1509+
"""
1510+
non_empty_cond: Callable[[DataTree], bool]
1511+
if drop_size_zero_vars:
1512+
non_empty_cond = lambda node: len(node.data_vars) > 0 and any(
1513+
var.size > 0 for var in node.data_vars.values()
1514+
)
1515+
else:
1516+
non_empty_cond = lambda node: len(node.data_vars) > 0
1517+
1518+
return self.filter(non_empty_cond)
1519+
14531520
def match(self, pattern: str) -> DataTree:
14541521
"""
14551522
Return nodes with paths matching pattern.

xarray/tests/test_datatree.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1942,6 +1942,85 @@ def test_filter(self) -> None:
19421942
)
19431943
assert_identical(actual, expected)
19441944

1945+
def test_prune_basic(self) -> None:
1946+
tree = DataTree.from_dict(
1947+
{"/a": xr.Dataset({"foo": ("x", [1, 2])}), "/b": xr.Dataset()}
1948+
)
1949+
1950+
pruned = tree.prune()
1951+
1952+
assert "a" in pruned.children
1953+
assert "b" not in pruned.children
1954+
assert_identical(
1955+
pruned.children["a"].to_dataset(), tree.children["a"].to_dataset()
1956+
)
1957+
1958+
def test_prune_with_zero_size_vars(self) -> None:
1959+
tree = DataTree.from_dict(
1960+
{
1961+
"/a": xr.Dataset({"foo": ("x", [1, 2])}),
1962+
"/b": xr.Dataset({"empty": ("dim", [])}),
1963+
"/c": xr.Dataset(),
1964+
}
1965+
)
1966+
1967+
pruned_default = tree.prune()
1968+
expected_default = DataTree.from_dict(
1969+
{
1970+
"/a": xr.Dataset({"foo": ("x", [1, 2])}),
1971+
"/b": xr.Dataset({"empty": ("dim", [])}),
1972+
}
1973+
)
1974+
assert_identical(pruned_default, expected_default)
1975+
1976+
pruned_strict = tree.prune(drop_size_zero_vars=True)
1977+
expected_strict = DataTree.from_dict(
1978+
{
1979+
"/a": xr.Dataset({"foo": ("x", [1, 2])}),
1980+
}
1981+
)
1982+
assert_identical(pruned_strict, expected_strict)
1983+
1984+
def test_prune_with_intermediate_nodes(self) -> None:
1985+
tree = DataTree.from_dict(
1986+
{
1987+
"/": xr.Dataset(),
1988+
"/group1": xr.Dataset(),
1989+
"/group1/subA": xr.Dataset({"temp": ("x", [1, 2])}),
1990+
"/group1/subB": xr.Dataset(),
1991+
"/group2": xr.Dataset({"empty": ("dim", [])}),
1992+
}
1993+
)
1994+
pruned = tree.prune()
1995+
expected_tree = DataTree.from_dict(
1996+
{
1997+
"/group1/subA": xr.Dataset({"temp": ("x", [1, 2])}),
1998+
"/group2": xr.Dataset({"empty": ("dim", [])}),
1999+
}
2000+
)
2001+
assert_identical(pruned, expected_tree)
2002+
2003+
def test_prune_after_filtering(self) -> None:
2004+
from pandas import date_range
2005+
2006+
ds1 = xr.Dataset(
2007+
{"foo": ("time", [1, 2, 3, 4, 5])},
2008+
coords={"time": date_range("2023-01-01", periods=5, freq="D")},
2009+
)
2010+
ds2 = xr.Dataset(
2011+
{"var": ("time", [1, 2, 3, 4, 5])},
2012+
coords={"time": date_range("2023-01-04", periods=5, freq="D")},
2013+
)
2014+
2015+
tree = DataTree.from_dict({"a": ds1, "b": ds2})
2016+
filtered = tree.sel(time=slice("2023-01-01", "2023-01-03"))
2017+
2018+
pruned = filtered.prune(drop_size_zero_vars=True)
2019+
expected_tree = DataTree.from_dict(
2020+
{"a": ds1.sel(time=slice("2023-01-01", "2023-01-03"))}
2021+
)
2022+
assert_identical(pruned, expected_tree)
2023+
19452024

19462025
class TestIndexing:
19472026
def test_isel_siblings(self) -> None:

0 commit comments

Comments
 (0)