diff --git a/docs/migration_guide.md b/docs/migration_guide.md index 0cce39d6b..a4e58ca46 100644 --- a/docs/migration_guide.md +++ b/docs/migration_guide.md @@ -3,11 +3,11 @@ VirtualiZarr V2 includes breaking changes and other conceptual differences relative to V1. The goal of this guide is to provide some context around the core changes and demonstrate the updated usage. -## Breaking changes +## Breaking API changes in `open_virtual_dataset` -### Open_virtual_dataset +### Filetype identification, parsers, and stores -In V1 there was a lot of auto-magic guesswork of filetypes and urls happening under the hood. +In V1 there was a lot of auto-magic guesswork of filetypes, urls, and types of remote storage happening under the hood. While this made it easy to get started, it could lead to a lot of foot-guns and unexpected behavior. For example, the following V1-style usage would guess that your data is in a NetCDF file format and that your data @@ -74,6 +74,17 @@ for reading the original data, but some parsers may accept an empty [ObjectStore ) ``` +### Deprecation of other kwargs + +We have removed some keyword arguments to `open_virtual_dataset` that were deprecated, saw little use, or are now redundant. Specifically: + +- `indexes` - there is little need to control this separately from `loadable_variables`, +- `cftime_variables` - this argument is deprecated upstream in favor of `decode_times`, +- `backend` - replaced by the `parser` kwarg, +- `virtual_backend_kwargs` - replaced by arguments to the `parser` instance, +- `reader_options` - replaced by arguments to the ObjectStore instance. +- `virtual_array_class` - so far has not been needed, + ## Missing features We have worked hard to ensure that nearly all features from VirtualiZarr V1 are available in V2. To our knowledge, diff --git a/virtualizarr/manifests/store.py b/virtualizarr/manifests/store.py index 4366185bb..8ce504e21 100644 --- a/virtualizarr/manifests/store.py +++ b/virtualizarr/manifests/store.py @@ -1,7 +1,7 @@ from __future__ import annotations import re -from collections.abc import AsyncGenerator, Iterable, Mapping +from collections.abc import AsyncGenerator, Iterable from dataclasses import dataclass from typing import TYPE_CHECKING, Literal, TypeAlias from urllib.parse import urlparse @@ -282,7 +282,6 @@ def to_virtual_dataset( group="", loadable_variables: Iterable[str] | None = None, decode_times: bool | None = None, - indexes: Mapping[str, xr.Index] | None = None, ) -> "xr.Dataset": """ Create a "virtual" [xarray.Dataset][] containing the contents of one zarr group. @@ -312,7 +311,6 @@ def to_virtual_dataset( manifest_store=self, group=group, loadable_variables=loadable_variables, - indexes=indexes, decode_times=decode_times, ) diff --git a/virtualizarr/tests/test_parsers/test_dmrpp.py b/virtualizarr/tests/test_parsers/test_dmrpp.py index 857b3413d..e6abdcab9 100644 --- a/virtualizarr/tests/test_parsers/test_dmrpp.py +++ b/virtualizarr/tests/test_parsers/test_dmrpp.py @@ -462,7 +462,7 @@ def test_parse_dataset(group: str | None, warns: bool, netcdf4_file): with pytest.warns(UserWarning, match=f"ignoring group parameter {group!r}"): ms = drmpp.parse_dataset(object_store=store, group=group) - vds = ms.to_virtual_dataset(loadable_variables=None, indexes=None) + vds = ms.to_virtual_dataset() assert vds.sizes == {"lat": 25, "lon": 53, "time": 2920} assert vds.data_vars.keys() == {"air"} diff --git a/virtualizarr/tests/test_xarray.py b/virtualizarr/tests/test_xarray.py index 00e964ae4..a1b6c2671 100644 --- a/virtualizarr/tests/test_xarray.py +++ b/virtualizarr/tests/test_xarray.py @@ -439,14 +439,6 @@ def test_nbytes(simple_netcdf4, local_registry): class TestOpenVirtualDatasetIndexes: - @pytest.mark.xfail(reason="not yet implemented") - def test_specify_no_indexes(self, netcdf4_file, local_registry): - parser = HDFParser() - with open_virtual_dataset( - url=netcdf4_file, registry=local_registry, parser=parser, indexes={} - ) as vds: - assert vds.indexes == {} - @requires_hdf5plugin @requires_imagecodecs def test_create_default_indexes_for_loadable_variables( @@ -460,7 +452,6 @@ def test_create_default_indexes_for_loadable_variables( url=netcdf4_file, registry=local_registry, parser=parser, - indexes=None, loadable_variables=loadable_variables, ) as vds, open_dataset(netcdf4_file, decode_times=True) as ds, diff --git a/virtualizarr/xarray.py b/virtualizarr/xarray.py index 3a65d99e6..52a785740 100644 --- a/virtualizarr/xarray.py +++ b/virtualizarr/xarray.py @@ -41,7 +41,6 @@ def open_virtual_dataset( drop_variables: Iterable[str] | None = None, loadable_variables: Iterable[str] | None = None, decode_times: bool | None = None, - indexes: Mapping[str, xr.Index] | None = None, ) -> xr.Dataset: """ Open an archival data source as an [xarray.Dataset][] wrapping virtualized zarr arrays. @@ -76,10 +75,6 @@ def open_virtual_dataset( Variables in the data source to load as Dask/NumPy arrays instead of as virtual arrays. decode_times Bool that is passed into [xarray.open_dataset][]. Allows time to be decoded into a datetime object. - indexes - Indexes to use on the returned [xarray.Dataset][]. - Default will read any 1D coordinate data to create in-memory Pandas indexes. - To avoid creating any indexes, pass `indexes={}`. Returns ------- @@ -97,7 +92,6 @@ def open_virtual_dataset( ds = manifest_store.to_virtual_dataset( loadable_variables=loadable_variables, decode_times=decode_times, - indexes=indexes, ) return ds.drop_vars(list(drop_variables or ())) @@ -329,7 +323,6 @@ def construct_virtual_dataset( group: str | None = None, loadable_variables: Iterable[Hashable] | None = None, decode_times: bool | None = None, - indexes: Mapping[str, xr.Index] | None = None, reader_options: Optional[dict] = None, ) -> xr.Dataset: """ @@ -338,9 +331,6 @@ def construct_virtual_dataset( """ - if indexes is not None: - raise NotImplementedError() - if group: raise NotImplementedError("ManifestStore does not yet support nested groups") else: