Skip to content

Add DataTree.prune() method … #10598

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 41 commits into from
Aug 13, 2025
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
16b92e3
Add DataTree.is_data_empty property and .prune() method …
aladinor Aug 1, 2025
0b5ee3b
documenting changes in whats-new.rst file
aladinor Aug 1, 2025
d5d5621
removing blank lines
aladinor Aug 1, 2025
043a037
Merge branch 'main' into feature/datatree-prune-method
aladinor Aug 2, 2025
ba73805
removing new property instead using data_vars and fixing correspondin…
aladinor Aug 5, 2025
6a9664b
removing .is_empty_data entry
aladinor Aug 5, 2025
239c53f
Merge branch 'feature/datatree-prune-method' of https://github.com/al…
aladinor Aug 5, 2025
d141fbe
updating github url
aladinor Aug 5, 2025
d4b3970
fixing test accordingly
aladinor Aug 5, 2025
4431834
fixing doctest
aladinor Aug 5, 2025
7fc2e8b
fixing doctest
aladinor Aug 5, 2025
6e1956f
fixing doctest
aladinor Aug 5, 2025
6f20286
replacing doctest
aladinor Aug 5, 2025
6da389d
replacing doctest
aladinor Aug 5, 2025
d7f85b8
removing empty line
aladinor Aug 5, 2025
ecf186d
removing empty line
aladinor Aug 5, 2025
03c78fa
Update xarray/core/datatree.py
aladinor Aug 5, 2025
6161755
Update xarray/core/datatree.py
aladinor Aug 5, 2025
a7cd8d5
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 5, 2025
338c76a
improving doctests
aladinor Aug 5, 2025
aea2e67
fixing typo
aladinor Aug 5, 2025
730c7aa
refactoring test accodingly to Tom's suggestion
aladinor Aug 5, 2025
7722cfe
Merge branch 'main' into feature/datatree-prune-method
aladinor Aug 5, 2025
7aa25c9
fixing test_prune_after_filtering
aladinor Aug 6, 2025
1c78329
Merge branch 'feature/datatree-prune-method' of https://github.com/al…
aladinor Aug 6, 2025
d99ee78
refactoring test to use assert_identical
aladinor Aug 6, 2025
82ea57e
refactoring test to use assert)_equal
aladinor Aug 6, 2025
7cd493d
adding reference to .prune method in Subsetting Tree Nodes
aladinor Aug 6, 2025
8223810
adding # doctest: +NORMALIZE_WHITESPACE to avoid error with trailing …
aladinor Aug 6, 2025
bbcaf91
Fix doctest trailing space issue in prune method
aladinor Aug 6, 2025
219a4e6
trial 2 fixing trailing space
aladinor Aug 6, 2025
27c964a
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 6, 2025
53789c7
Merge branch 'main' into feature/datatree-prune-method
aladinor Aug 10, 2025
55e12a3
normalizing whitespace for doctest
aladinor Aug 10, 2025
25db677
Merge branch 'feature/datatree-prune-method' of https://github.com/al…
aladinor Aug 10, 2025
678f45b
normalizing whitespace and adding ellipsis for doctest
aladinor Aug 10, 2025
1e95e56
normalizing whitespace and adding ellipsis for doctest
aladinor Aug 10, 2025
e4ae620
normalizing whitespace and adding ellipsis for doctest
aladinor Aug 10, 2025
5502194
Merge branch 'main' into feature/datatree-prune-method
aladinor Aug 11, 2025
fb2a004
Merge branch 'main' into feature/datatree-prune-method
aladinor Aug 13, 2025
185cc60
Merge branch 'main' into feature/datatree-prune-method
aladinor Aug 13, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ v2025.07.2 (unreleased)

New Features
~~~~~~~~~~~~
- Added :py:attr:`DataTree.is_data_empty` property to check if a node contains data variables with actual data (:issue:`10590`, :pull:`10598`).
By `Alfonso Ladino <https://github.com/aladinor`_.
- Added :py:meth:`DataTree.prune` method to remove empty nodes while preserving tree structure.
Useful for cleaning up DataTree after time-based filtering operations (:issue:`10590`, :pull:`10598`).
By `Alfonso Ladino <https://github.com/aladinor`_.


Breaking changes
Expand Down
43 changes: 43 additions & 0 deletions xarray/core/datatree.py
Original file line number Diff line number Diff line change
Expand Up @@ -686,6 +686,13 @@ def is_empty(self) -> bool:
"""False if node contains any data or attrs. Does not look at children."""
return not (self.has_data or self.has_attrs)

@property
def is_data_empty(self) -> bool:
"""False if node contains any data variables with actual data. Does not look at children."""
if not self._data_variables:
return True
return not any(var.size > 0 for var in self._data_variables.values())

@property
def is_hollow(self) -> bool:
"""True if only leaf nodes contain data."""
Expand Down Expand Up @@ -1448,6 +1455,42 @@ def filter_like(self, other: DataTree) -> DataTree:
other_keys = {key for key, _ in other.subtree_with_keys}
return self.filter(lambda node: node.relative_to(self) in other_keys)

def prune(self) -> DataTree:
"""
Remove empty nodes from the tree.

Returns a new tree containing only nodes that contain data variables.
Intermediate nodes are kept if they are required to support non-empty children.

Returns
-------
DataTree
A new tree with empty nodes removed.

See Also
--------
filter
is_data_empty

Examples
--------
>>> dt = xr.DataTree.from_dict(
... {
... "/a": xr.Dataset({"foo": ("x", [1, 2])}),
... "/b": xr.Dataset(), # empty dataset
... }
... )
>>> dt.prune()
<xarray.DataTree>
Group: /
└── Group: /a
Dimensions: (x: 2)
Dimensions without coordinates: x
Data variables:
foo (x) int64 16B 1 2
"""
return self.filter(lambda node: not node.is_data_empty)

def match(self, pattern: str) -> DataTree:
"""
Return nodes with paths matching pattern.
Expand Down
69 changes: 69 additions & 0 deletions xarray/tests/test_datatree.py
Original file line number Diff line number Diff line change
Expand Up @@ -1942,6 +1942,75 @@ def test_filter(self) -> None:
)
assert_identical(actual, expected)

def test_is_data_empty(self) -> None:
ds_with_data = xr.Dataset({"foo": ("x", [1, 2])})
dt_with_data = DataTree(dataset=ds_with_data)
assert dt_with_data.is_data_empty is False

ds_coords_only = xr.Dataset(coords={"x": [1, 2]})
dt_coords_only = DataTree(dataset=ds_coords_only)
assert dt_coords_only.is_data_empty is True

dt_empty = DataTree()
assert dt_empty.is_data_empty is True

ds_zero_size = xr.Dataset({"var": ("time", [])})
dt_zero_size = DataTree(dataset=ds_zero_size)
assert dt_zero_size.is_data_empty is True

def test_prune_basic(self) -> None:
tree = DataTree.from_dict(
{"/a": xr.Dataset({"foo": ("x", [1, 2])}), "/b": xr.Dataset()}
)

pruned = tree.prune()

assert "a" in pruned.children
assert "b" not in pruned.children
assert_identical(
pruned.children["a"].to_dataset(), tree.children["a"].to_dataset()
)

def test_prune_with_intermediate_nodes(self) -> None:
tree = DataTree.from_dict(
{
"/": xr.Dataset(),
"/group1": xr.Dataset(),
"/group1/subA": xr.Dataset({"temp": ("x", [1, 2])}),
"/group1/subB": xr.Dataset(),
"/group2": xr.Dataset(),
}
)

pruned = tree.prune()

assert "group1" in pruned.children
assert "subA" in pruned.children["group1"].children
assert "subB" not in pruned.children["group1"].children
assert "group2" not in pruned.children

def test_prune_after_filtering(self) -> None:
import pandas as pd

ds1 = xr.Dataset(
{"foo": ("time", [1, 2, 3, 4, 5])},
coords={"time": pd.date_range("2023-01-01", periods=5, freq="D")},
)
ds2 = xr.Dataset(
{"var": ("time", [1, 2, 3, 4, 5])},
coords={"time": pd.date_range("2023-01-04", periods=5, freq="D")},
)

tree = DataTree.from_dict({"a": ds1, "b": ds2})
filtered = tree.sel(time=slice("2023-01-01", "2023-01-03"))

assert "b" in filtered.children
assert filtered.children["b"].is_data_empty is True

pruned = filtered.prune()
assert "a" in pruned.children
assert "b" not in pruned.children


class TestIndexing:
def test_isel_siblings(self) -> None:
Expand Down
Loading