Skip to content

Commit 9e20aab

Browse files
committed
Remove drop_variables and truthy loadable_variables support
1 parent 9b91ad3 commit 9e20aab

File tree

4 files changed

+39
-60
lines changed

4 files changed

+39
-60
lines changed

conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ def netcdf4_file_with_data_in_sibling_groups(tmp_path: Path) -> str:
220220
filepath = tmp_path / "test.nc"
221221
ds1 = xr.DataArray([1, 2, 3], name="foo").to_dataset()
222222
ds1.to_netcdf(filepath, group="subgroup1")
223-
ds2 = xr.DataArray([4, 5], name="bar").to_dataset()
223+
ds2 = xr.DataArray([4, 5], coords={"x": [0, 1]}, dims="x", name="bar").to_dataset()
224224
ds2.to_netcdf(filepath, group="subgroup2", mode="a")
225225
return str(filepath)
226226

virtualizarr/manifests/store.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,6 @@ def to_virtual_datatree(
336336
self,
337337
group="",
338338
*,
339-
drop_variables: Iterable[str] | None = None,
340339
loadable_variables: Iterable[str] | None = None,
341340
decode_times: bool | None = None,
342341
) -> "xr.DataTree":
@@ -350,8 +349,6 @@ def to_virtual_datatree(
350349
Parameters
351350
----------
352351
group : Group to convert to a virtual DataTree
353-
drop_variables
354-
Variables in the data source to drop before returning.
355352
loadable_variables
356353
Variables in the data source to load as Dask/NumPy arrays instead of as virtual arrays.
357354
decode_times
@@ -369,7 +366,6 @@ def to_virtual_datatree(
369366
group=group,
370367
loadable_variables=loadable_variables,
371368
decode_times=decode_times,
372-
drop_variables=drop_variables,
373369
)
374370

375371

virtualizarr/tests/test_xarray.py

Lines changed: 30 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -689,47 +689,60 @@ def test_open_virtual_datatree(
689689
assert list(vdt["/subgroup1"].variables) == ["foo"]
690690
assert isinstance(vdt["/subgroup1"]["foo"].data, ManifestArray)
691691
assert vdt["/subgroup1"]["foo"].shape == (3,)
692-
assert list(vdt["/subgroup2"].variables) == ["bar"]
692+
assert list(vdt["/subgroup2"].variables) == ["bar", "x"]
693693
assert isinstance(vdt["/subgroup2"]["bar"].data, ManifestArray)
694+
assert isinstance(vdt["/subgroup2"]["x"].data, np.ndarray)
694695
assert vdt["/subgroup2"]["bar"].shape == (2,)
696+
assert vdt["/subgroup2"]["x"].shape == (2,)
695697

696-
def test_open_virtual_datatree_all_vars_loaded(
698+
def test_open_virtual_datatree_no_vars_loaded(
697699
self, netcdf4_file_with_data_in_sibling_groups, local_registry
698700
):
699701
with (
700702
open_virtual_datatree(
701703
url=netcdf4_file_with_data_in_sibling_groups,
702704
registry=local_registry,
703705
parser=HDFParser(),
704-
loadable_variables=["foo", "bar"],
706+
loadable_variables=[],
705707
) as vdt,
706708
open_datatree(
707709
netcdf4_file_with_data_in_sibling_groups, engine="h5netcdf"
708710
) as dt,
709711
):
710-
xr.testing.assert_allclose(vdt, dt)
712+
vdt.isomorphic(dt)
713+
assert list(vdt["/subgroup1"].variables) == ["foo"]
714+
assert isinstance(vdt["/subgroup1"]["foo"].data, ManifestArray)
715+
assert vdt["/subgroup1"]["foo"].shape == (3,)
716+
assert list(vdt["/subgroup2"].variables) == ["bar", "x"]
717+
assert isinstance(vdt["/subgroup2"]["bar"].data, ManifestArray)
718+
assert isinstance(vdt["/subgroup2"]["x"].data, ManifestArray)
719+
assert vdt["/subgroup2"]["bar"].shape == (2,)
720+
assert vdt["/subgroup2"]["x"].shape == (2,)
721+
722+
def test_open_virtual_datatree_all_vars_loaded(
723+
self, netcdf4_file_with_data_in_sibling_groups, local_registry
724+
):
725+
with pytest.raises(
726+
NotImplementedError,
727+
match=r"Only `loadable_variables=\[\]` or `loadable_variables=None` are supported, got loadable_variables",
728+
):
729+
open_virtual_datatree(
730+
url=netcdf4_file_with_data_in_sibling_groups,
731+
registry=local_registry,
732+
parser=HDFParser(),
733+
loadable_variables=["foo", "bar"],
734+
)
711735

712736
def test_open_virtual_datatree_drop_vars(
713737
self, netcdf4_file_with_data_in_sibling_groups, local_registry
714738
):
715-
with (
739+
with pytest.raises(TypeError, match="unexpected keyword argument"):
716740
open_virtual_datatree(
717741
url=netcdf4_file_with_data_in_sibling_groups,
718742
registry=local_registry,
719743
parser=HDFParser(),
720744
drop_variables=["foo"],
721-
) as vdt,
722-
open_datatree(
723-
netcdf4_file_with_data_in_sibling_groups,
724-
engine="h5netcdf",
725-
drop_variables=["foo"],
726-
) as dt,
727-
):
728-
vdt.isomorphic(dt)
729-
assert list(vdt["/subgroup1"].variables) == []
730-
assert list(vdt["/subgroup2"].variables) == ["bar"]
731-
assert isinstance(vdt["/subgroup2"]["bar"].data, ManifestArray)
732-
assert vdt["/subgroup2"]["bar"].shape == (2,)
745+
)
733746

734747
@pytest.mark.parametrize("group", ["", None])
735748
def test_open_root_group(

virtualizarr/xarray.py

Lines changed: 8 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ def open_virtual_datatree(
4040
registry: ObjectStoreRegistry,
4141
parser: Parser,
4242
*,
43-
drop_variables: Iterable[str] | None = None,
4443
loadable_variables: Iterable[str] | None = None,
4544
decode_times: bool | None = None,
4645
) -> xr.DataTree:
@@ -72,33 +71,13 @@ def open_virtual_datatree(
7271
- [virtualizarr.parsers.ZarrParser][] for virtualizing Zarr stores.
7372
- [virtual_tiff.VirtualTIFF][] for virtualizing TIFFs.
7473
75-
drop_variables
76-
Variables in the data source to drop before returning.
77-
78-
Variable names are matched by their simple name (not a path). Matching is applied uniformly
79-
across all groups in the DataTree. For example, ``drop_variables=["time"]`` will drop any variable
80-
named "time" from every group that contains it.
81-
82-
Path-like strings (e.g., ``"/group1/time"``) are ignored unless they match a specific variable name.
83-
To drop variables from specific groups only, open the DataTree first and then use xarray's
84-
``.drop_vars()`` method on the desired nodes.
85-
86-
Unlike xarray's opening functions, errors are not raised when a specified variable is not found.
87-
8874
loadable_variables
89-
Variables in the data source to load as Dask/NumPy arrays instead of as virtual arrays. If
90-
``None`` (the default), dimension coordinate variables (1D variables whose name matches
75+
If ``None`` (the default), dimension coordinate variables (1D variables whose name matches
9176
their dimension) will be loaded automatically to enable xarray indexing.
9277
93-
Variable names are matched by their simple name (not a path). Matching is applied uniformly
94-
across all groups in the DataTree. For example, ``loadable_variables=["time", "lat", "lon"]``
95-
will load any variable with those names from every group that contains them.
96-
97-
Path-like strings (e.g., ``"/group1/time"``) are ignored unless they match a specific variable name.
98-
To load variables from specific groups only, you would need to open groups separately using
99-
``open_virtual_dataset`` with the parser's ``group`` parameter.
78+
If an empty iterable, no variables will be loaded.
10079
101-
Unlike xarray's opening functions, errors are not raised when a specified variable is not found.
80+
Other options are not yet supported.
10281
10382
decode_times
10483
Bool that is passed into [xarray.open_dataset][]. Allows time to be decoded into a datetime object.
@@ -162,17 +141,6 @@ def open_virtual_datatree(
162141
)
163142
```
164143
165-
Drop the "lon" variable from all groups:
166-
167-
```python
168-
vdt = open_virtual_datatree(
169-
url=url,
170-
registry=registry,
171-
parser=parser,
172-
drop_variables=["lon"],
173-
)
174-
```
175-
176144
Drop variables from a specific group after opening:
177145
178146
```python
@@ -187,6 +155,10 @@ def open_virtual_datatree(
187155
"""
188156
filepath = validate_and_normalize_path_to_uri(url, fs_root=Path.cwd().as_uri())
189157

158+
if loadable_variables:
159+
raise NotImplementedError(
160+
f"Only `loadable_variables=[]` or `loadable_variables=None` are supported, got loadable_variables={loadable_variables}"
161+
)
190162
manifest_store = parser(
191163
url=filepath,
192164
registry=registry,
@@ -195,7 +167,6 @@ def open_virtual_datatree(
195167
return manifest_store.to_virtual_datatree(
196168
loadable_variables=loadable_variables,
197169
decode_times=decode_times,
198-
drop_variables=drop_variables,
199170
)
200171

201172

@@ -522,7 +493,6 @@ def construct_virtual_datatree(
522493
manifest_store: ManifestStore,
523494
group: str = "",
524495
*,
525-
drop_variables: Iterable[str] | None = None,
526496
loadable_variables: Iterable[str] | None = None,
527497
decode_times: bool | None = None,
528498
) -> xr.DataTree:
@@ -548,7 +518,7 @@ def construct_virtual_datatree(
548518
virtual_node.to_dataset(),
549519
fully_loadable_datatree[name].to_dataset(),
550520
loadable_variables,
551-
).drop_vars(list(drop_variables or ()), errors="ignore")
521+
)
552522
for name, virtual_node in node.to_virtual_datatree().subtree_with_keys
553523
}
554524

0 commit comments

Comments
 (0)