Skip to content

Commit caa24a5

Browse files
TomNicholaspre-commit-ci[bot]maxrjones
authored
Remove indexes kwarg from public API (#721)
* remove indexes from API * adapt tests * update migration guide * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Apply suggestions from code review Co-authored-by: Max Jones <14077947+maxrjones@users.noreply.github.com> * Explanation for removing `cftime_variables` Co-authored-by: Max Jones <14077947+maxrjones@users.noreply.github.com> * remove indexes from construct_virtual_dataset too --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Max Jones <14077947+maxrjones@users.noreply.github.com>
1 parent ae98724 commit caa24a5

File tree

5 files changed

+16
-26
lines changed

5 files changed

+16
-26
lines changed

docs/migration_guide.md

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,11 @@
33
VirtualiZarr V2 includes breaking changes and other conceptual differences relative to V1. The goal of this guide
44
is to provide some context around the core changes and demonstrate the updated usage.
55

6-
## Breaking changes
6+
## Breaking API changes in `open_virtual_dataset`
77

8-
### Open_virtual_dataset
8+
### Filetype identification, parsers, and stores
99

10-
In V1 there was a lot of auto-magic guesswork of filetypes and urls happening under the hood.
10+
In V1 there was a lot of auto-magic guesswork of filetypes, urls, and types of remote storage happening under the hood.
1111
While this made it easy to get started, it could lead to a lot of foot-guns and unexpected behavior.
1212

1313
For example, the following V1-style usage would guess that your data is in a NetCDF file format and that your data
@@ -74,6 +74,17 @@ for reading the original data, but some parsers may accept an empty [ObjectStore
7474
)
7575
```
7676

77+
### Deprecation of other kwargs
78+
79+
We have removed some keyword arguments to `open_virtual_dataset` that were deprecated, saw little use, or are now redundant. Specifically:
80+
81+
- `indexes` - there is little need to control this separately from `loadable_variables`,
82+
- `cftime_variables` - this argument is deprecated upstream in favor of `decode_times`,
83+
- `backend` - replaced by the `parser` kwarg,
84+
- `virtual_backend_kwargs` - replaced by arguments to the `parser` instance,
85+
- `reader_options` - replaced by arguments to the ObjectStore instance.
86+
- `virtual_array_class` - so far has not been needed,
87+
7788
## Missing features
7889

7990
We have worked hard to ensure that nearly all features from VirtualiZarr V1 are available in V2. To our knowledge,

virtualizarr/manifests/store.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from __future__ import annotations
22

33
import re
4-
from collections.abc import AsyncGenerator, Iterable, Mapping
4+
from collections.abc import AsyncGenerator, Iterable
55
from dataclasses import dataclass
66
from typing import TYPE_CHECKING, Literal, TypeAlias
77
from urllib.parse import urlparse
@@ -282,7 +282,6 @@ def to_virtual_dataset(
282282
group="",
283283
loadable_variables: Iterable[str] | None = None,
284284
decode_times: bool | None = None,
285-
indexes: Mapping[str, xr.Index] | None = None,
286285
) -> "xr.Dataset":
287286
"""
288287
Create a "virtual" [xarray.Dataset][] containing the contents of one zarr group.
@@ -312,7 +311,6 @@ def to_virtual_dataset(
312311
manifest_store=self,
313312
group=group,
314313
loadable_variables=loadable_variables,
315-
indexes=indexes,
316314
decode_times=decode_times,
317315
)
318316

virtualizarr/tests/test_parsers/test_dmrpp.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -462,7 +462,7 @@ def test_parse_dataset(group: str | None, warns: bool, netcdf4_file):
462462
with pytest.warns(UserWarning, match=f"ignoring group parameter {group!r}"):
463463
ms = drmpp.parse_dataset(object_store=store, group=group)
464464

465-
vds = ms.to_virtual_dataset(loadable_variables=None, indexes=None)
465+
vds = ms.to_virtual_dataset()
466466

467467
assert vds.sizes == {"lat": 25, "lon": 53, "time": 2920}
468468
assert vds.data_vars.keys() == {"air"}

virtualizarr/tests/test_xarray.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -439,14 +439,6 @@ def test_nbytes(simple_netcdf4, local_registry):
439439

440440

441441
class TestOpenVirtualDatasetIndexes:
442-
@pytest.mark.xfail(reason="not yet implemented")
443-
def test_specify_no_indexes(self, netcdf4_file, local_registry):
444-
parser = HDFParser()
445-
with open_virtual_dataset(
446-
url=netcdf4_file, registry=local_registry, parser=parser, indexes={}
447-
) as vds:
448-
assert vds.indexes == {}
449-
450442
@requires_hdf5plugin
451443
@requires_imagecodecs
452444
def test_create_default_indexes_for_loadable_variables(
@@ -460,7 +452,6 @@ def test_create_default_indexes_for_loadable_variables(
460452
url=netcdf4_file,
461453
registry=local_registry,
462454
parser=parser,
463-
indexes=None,
464455
loadable_variables=loadable_variables,
465456
) as vds,
466457
open_dataset(netcdf4_file, decode_times=True) as ds,

virtualizarr/xarray.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@ def open_virtual_dataset(
4141
drop_variables: Iterable[str] | None = None,
4242
loadable_variables: Iterable[str] | None = None,
4343
decode_times: bool | None = None,
44-
indexes: Mapping[str, xr.Index] | None = None,
4544
) -> xr.Dataset:
4645
"""
4746
Open an archival data source as an [xarray.Dataset][] wrapping virtualized zarr arrays.
@@ -76,10 +75,6 @@ def open_virtual_dataset(
7675
Variables in the data source to load as Dask/NumPy arrays instead of as virtual arrays.
7776
decode_times
7877
Bool that is passed into [xarray.open_dataset][]. Allows time to be decoded into a datetime object.
79-
indexes
80-
Indexes to use on the returned [xarray.Dataset][].
81-
Default will read any 1D coordinate data to create in-memory Pandas indexes.
82-
To avoid creating any indexes, pass `indexes={}`.
8378
8479
Returns
8580
-------
@@ -97,7 +92,6 @@ def open_virtual_dataset(
9792
ds = manifest_store.to_virtual_dataset(
9893
loadable_variables=loadable_variables,
9994
decode_times=decode_times,
100-
indexes=indexes,
10195
)
10296
return ds.drop_vars(list(drop_variables or ()))
10397

@@ -329,7 +323,6 @@ def construct_virtual_dataset(
329323
group: str | None = None,
330324
loadable_variables: Iterable[Hashable] | None = None,
331325
decode_times: bool | None = None,
332-
indexes: Mapping[str, xr.Index] | None = None,
333326
reader_options: Optional[dict] = None,
334327
) -> xr.Dataset:
335328
"""
@@ -338,9 +331,6 @@ def construct_virtual_dataset(
338331
339332
"""
340333

341-
if indexes is not None:
342-
raise NotImplementedError()
343-
344334
if group:
345335
raise NotImplementedError("ManifestStore does not yet support nested groups")
346336
else:

0 commit comments

Comments
 (0)