Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions docs/migration_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
VirtualiZarr V2 includes breaking changes and other conceptual differences relative to V1. The goal of this guide
is to provide some context around the core changes and demonstrate the updated usage.

## Breaking changes
## Breaking API changes in `open_virtual_dataset`

### Open_virtual_dataset
### Filetype identification, parsers, and stores

In V1 there was a lot of auto-magic guesswork of filetypes and urls happening under the hood.
In V1 there was a lot of auto-magic guesswork of filetypes, urls, and types of remote storage happening under the hood.
While this made it easy to get started, it could lead to a lot of foot-guns and unexpected behavior.

For example, the following V1-style usage would guess that your data is in a NetCDF file format and that your data
Expand Down Expand Up @@ -74,6 +74,17 @@ for reading the original data, but some parsers may accept an empty [ObjectStore
)
```

### Deprecation of other kwargs

We have removed some keyword arguments to `open_virtual_dataset` that were deprecated, saw little use, or are now redundant. Specifically:

- `indexes` - there is little need to control this separately from `loadable_variables`,
- `cftime_variables` - this argument is deprecated upstream in favor of `decode_times`,
- `backend` - replaced by the `parser` kwarg,
- `virtual_backend_kwargs` - replaced by arguments to the `parser` instance,
- `reader_options` - replaced by arguments to the ObjectStore instance.
- `virtual_array_class` - so far has not been needed,

## Missing features

We have worked hard to ensure that nearly all features from VirtualiZarr V1 are available in V2. To our knowledge,
Expand Down
4 changes: 1 addition & 3 deletions virtualizarr/manifests/store.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

import re
from collections.abc import AsyncGenerator, Iterable, Mapping
from collections.abc import AsyncGenerator, Iterable
from dataclasses import dataclass
from typing import TYPE_CHECKING, Literal, TypeAlias
from urllib.parse import urlparse
Expand Down Expand Up @@ -282,7 +282,6 @@ def to_virtual_dataset(
group="",
loadable_variables: Iterable[str] | None = None,
decode_times: bool | None = None,
indexes: Mapping[str, xr.Index] | None = None,
) -> "xr.Dataset":
"""
Create a "virtual" [xarray.Dataset][] containing the contents of one zarr group.
Expand Down Expand Up @@ -312,7 +311,6 @@ def to_virtual_dataset(
manifest_store=self,
group=group,
loadable_variables=loadable_variables,
indexes=indexes,
decode_times=decode_times,
)

Expand Down
2 changes: 1 addition & 1 deletion virtualizarr/tests/test_parsers/test_dmrpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,7 @@ def test_parse_dataset(group: str | None, warns: bool, netcdf4_file):
with pytest.warns(UserWarning, match=f"ignoring group parameter {group!r}"):
ms = drmpp.parse_dataset(object_store=store, group=group)

vds = ms.to_virtual_dataset(loadable_variables=None, indexes=None)
vds = ms.to_virtual_dataset()

assert vds.sizes == {"lat": 25, "lon": 53, "time": 2920}
assert vds.data_vars.keys() == {"air"}
Expand Down
9 changes: 0 additions & 9 deletions virtualizarr/tests/test_xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,14 +439,6 @@ def test_nbytes(simple_netcdf4, local_registry):


class TestOpenVirtualDatasetIndexes:
@pytest.mark.xfail(reason="not yet implemented")
def test_specify_no_indexes(self, netcdf4_file, local_registry):
parser = HDFParser()
with open_virtual_dataset(
url=netcdf4_file, registry=local_registry, parser=parser, indexes={}
) as vds:
assert vds.indexes == {}

@requires_hdf5plugin
@requires_imagecodecs
def test_create_default_indexes_for_loadable_variables(
Expand All @@ -460,7 +452,6 @@ def test_create_default_indexes_for_loadable_variables(
url=netcdf4_file,
registry=local_registry,
parser=parser,
indexes=None,
loadable_variables=loadable_variables,
) as vds,
open_dataset(netcdf4_file, decode_times=True) as ds,
Expand Down
10 changes: 0 additions & 10 deletions virtualizarr/xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ def open_virtual_dataset(
drop_variables: Iterable[str] | None = None,
loadable_variables: Iterable[str] | None = None,
decode_times: bool | None = None,
indexes: Mapping[str, xr.Index] | None = None,
) -> xr.Dataset:
"""
Open an archival data source as an [xarray.Dataset][] wrapping virtualized zarr arrays.
Expand Down Expand Up @@ -76,10 +75,6 @@ def open_virtual_dataset(
Variables in the data source to load as Dask/NumPy arrays instead of as virtual arrays.
decode_times
Bool that is passed into [xarray.open_dataset][]. Allows time to be decoded into a datetime object.
indexes
Indexes to use on the returned [xarray.Dataset][].
Default will read any 1D coordinate data to create in-memory Pandas indexes.
To avoid creating any indexes, pass `indexes={}`.

Returns
-------
Expand All @@ -97,7 +92,6 @@ def open_virtual_dataset(
ds = manifest_store.to_virtual_dataset(
loadable_variables=loadable_variables,
decode_times=decode_times,
indexes=indexes,
)
return ds.drop_vars(list(drop_variables or ()))

Expand Down Expand Up @@ -329,7 +323,6 @@ def construct_virtual_dataset(
group: str | None = None,
loadable_variables: Iterable[Hashable] | None = None,
decode_times: bool | None = None,
indexes: Mapping[str, xr.Index] | None = None,
reader_options: Optional[dict] = None,
) -> xr.Dataset:
"""
Expand All @@ -338,9 +331,6 @@ def construct_virtual_dataset(

"""

if indexes is not None:
raise NotImplementedError()

if group:
raise NotImplementedError("ManifestStore does not yet support nested groups")
else:
Expand Down