Skip to content

can not open zarr v3 datasets with scalar variables #895

@larsbuntemeyer

Description

@larsbuntemeyer

Hi all, i was trying to open zarr v3 datasets with scalar coordinates (e.g., a height coordinate). Peeking into the tests, i could construct a minimal example of the problem:

from virtualizarr.parsers import ZarrParser
from virtualizarr import open_virtual_dataset
from obspec_utils.registry import ObjectStoreRegistry
from obstore.store import LocalStore
import xarray as xr

# Create a small Dataset with a scalar
ds = xr.Dataset(
    {"data": 42.0},
)

filepath = "/tmp/scalar.zarr"

ds.to_zarr(
    filepath,
    consolidated=False,
    zarr_format=3,
    mode="w",
)

store = LocalStore(prefix=filepath)
registry = ObjectStoreRegistry({f"file://{filepath}": store})
parser = ZarrParser()

vds = open_virtual_dataset(
    url=filepath,
    parser=parser,
    registry=registry,
)

This fails with TypeError: 'NoneType' object is not iterable somewhere in xarray because dims is None. Note that this works fine with zarr_format=2 or if i add a dimension, e.g., like this:

ds = xr.Dataset(
    {"data": (("x",), [42.0])},
)
Full traceback
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[18], [line 28](vscode-notebook-cell:?execution_count=18&line=28)
     25 registry = ObjectStoreRegistry({f"file://{filepath}": store})
     26 parser = ZarrParser()
---> [28](vscode-notebook-cell:?execution_count=18&line=28) vds = open_virtual_dataset(
     29   url=filepath,
     30   parser=parser,
     31   registry=registry,
     32 )

File /mnt/CORDEX_CMIP6_tmp/user_tmp/lbuntemeyer/conda_envs/cordex-etl/lib/python3.13/site-packages/virtualizarr/xarray.py:228, in open_virtual_dataset(url, registry, parser, drop_variables, loadable_variables, decode_times)
    221 filepath = validate_and_normalize_path_to_uri(url, fs_root=Path.cwd().as_uri())
    223 manifest_store = parser(
    224     url=filepath,
    225     registry=registry,
    226 )
--> [228](https://vscode-remote+ssh-002dremote-002bjsc-002dcordex.vscode-resource.vscode-cdn.net/mnt/CORDEX_CMIP6_tmp/user_tmp/lbuntemeyer/conda_envs/cordex-etl/lib/python3.13/site-packages/virtualizarr/xarray.py:228) ds = manifest_store.to_virtual_dataset(
    229     loadable_variables=loadable_variables,
    230     decode_times=decode_times,
    231 )
    232 return ds.drop_vars(list(drop_variables or ()))

File /mnt/CORDEX_CMIP6_tmp/user_tmp/lbuntemeyer/conda_envs/cordex-etl/lib/python3.13/site-packages/virtualizarr/manifests/store.py:328, in ManifestStore.to_virtual_dataset(self, group, loadable_variables, decode_times)
    323 if loadable_variables and self._registry.map is None:
    324     raise ValueError(
    325         f"ManifestStore contains an empty store registry, but {loadable_variables} were provided as loadable variables. Must provide an ObjectStore instance in order to load variables."
    326     )
--> [328](https://vscode-remote+ssh-002dremote-002bjsc-002dcordex.vscode-resource.vscode-cdn.net/mnt/CORDEX_CMIP6_tmp/user_tmp/lbuntemeyer/conda_envs/cordex-etl/lib/python3.13/site-packages/virtualizarr/manifests/store.py:328) return construct_virtual_dataset(
    329     manifest_store=self,
    330     group=group,
    331     loadable_variables=loadable_variables,
    332     decode_times=decode_times,
    333 )

File /mnt/CORDEX_CMIP6_tmp/user_tmp/lbuntemeyer/conda_envs/cordex-etl/lib/python3.13/site-packages/virtualizarr/xarray.py:477, in construct_virtual_dataset(manifest_store, group, loadable_variables, decode_times, reader_options)
    474 else:
    475     manifestgroup = manifest_store._group
--> [477](https://vscode-remote+ssh-002dremote-002bjsc-002dcordex.vscode-resource.vscode-cdn.net/mnt/CORDEX_CMIP6_tmp/user_tmp/lbuntemeyer/conda_envs/cordex-etl/lib/python3.13/site-packages/virtualizarr/xarray.py:477) fully_virtual_ds = manifestgroup.to_virtual_dataset()
    479 with xr.open_zarr(
    480     manifest_store,
    481     group=group,
   (...)    485     decode_times=decode_times,
    486 ) as loadable_ds:
    487     return replace_virtual_with_loadable_vars(
    488         fully_virtual_ds, loadable_ds, loadable_variables
    489     )

File /mnt/CORDEX_CMIP6_tmp/user_tmp/lbuntemeyer/conda_envs/cordex-etl/lib/python3.13/site-packages/virtualizarr/manifests/group.py:120, in ManifestGroup.to_virtual_dataset(self)
    116 attributes = metadata_dict["attributes"]
    117 coord_names = attributes.pop("coordinates", [])
    119 virtual_vars = {
--> [120](https://vscode-remote+ssh-002dremote-002bjsc-002dcordex.vscode-resource.vscode-cdn.net/mnt/CORDEX_CMIP6_tmp/user_tmp/lbuntemeyer/conda_envs/cordex-etl/lib/python3.13/site-packages/virtualizarr/manifests/group.py:120)     name: marr.to_virtual_variable() for name, marr in self.arrays.items()
    121 }
    123 return construct_fully_virtual_dataset(
    124     virtual_vars=virtual_vars,
    125     coord_names=coord_names,
    126     attrs=attributes,
    127 )

File /mnt/CORDEX_CMIP6_tmp/user_tmp/lbuntemeyer/conda_envs/cordex-etl/lib/python3.13/site-packages/virtualizarr/manifests/array.py:292, in ManifestArray.to_virtual_variable(self)
    285 stripped_metadata = utils.copy_and_replace_metadata(
    286     self.metadata, new_dimension_names=None, new_attributes={}
    287 )
    288 stripped_marr = ManifestArray(
    289     chunkmanifest=self.manifest, metadata=stripped_metadata
    290 )
--> [292](https://vscode-remote+ssh-002dremote-002bjsc-002dcordex.vscode-resource.vscode-cdn.net/mnt/CORDEX_CMIP6_tmp/user_tmp/lbuntemeyer/conda_envs/cordex-etl/lib/python3.13/site-packages/virtualizarr/manifests/array.py:292) return xr.Variable(
    293     data=stripped_marr,
    294     dims=dims,
    295     attrs=attrs,
    296 )

File /mnt/CORDEX_CMIP6_tmp/user_tmp/lbuntemeyer/conda_envs/cordex-etl/lib/python3.13/site-packages/xarray/core/variable.py:399, in Variable.__init__(self, dims, data, attrs, encoding, fastpath)
    371 def __init__(
    372     self,
    373     dims,
   (...)    377     fastpath=False,
    378 ):
    379     """
    380     Parameters
    381     ----------
   (...)    397         unrecognized encoding items.
    398     """
--> [399](https://vscode-remote+ssh-002dremote-002bjsc-002dcordex.vscode-resource.vscode-cdn.net/mnt/CORDEX_CMIP6_tmp/user_tmp/lbuntemeyer/conda_envs/cordex-etl/lib/python3.13/site-packages/xarray/core/variable.py:399)     super().__init__(
    400         dims=dims, data=as_compatible_data(data, fastpath=fastpath), attrs=attrs
    401     )
    403     self._encoding: dict[Any, Any] | None = None
    404     if encoding is not None:

File /mnt/CORDEX_CMIP6_tmp/user_tmp/lbuntemeyer/conda_envs/cordex-etl/lib/python3.13/site-packages/xarray/namedarray/core.py:261, in NamedArray.__init__(self, dims, data, attrs)
    254 def __init__(
    255     self,
    256     dims: _DimsLike,
    257     data: duckarray[Any, _DType_co],
    258     attrs: _AttrsLike = None,
    259 ):
    260     self._data = data
--> [261](https://vscode-remote+ssh-002dremote-002bjsc-002dcordex.vscode-resource.vscode-cdn.net/mnt/CORDEX_CMIP6_tmp/user_tmp/lbuntemeyer/conda_envs/cordex-etl/lib/python3.13/site-packages/xarray/namedarray/core.py:261)     self._dims = self._parse_dimensions(dims)
    262     self._attrs = dict(attrs) if attrs else None

File /mnt/CORDEX_CMIP6_tmp/user_tmp/lbuntemeyer/conda_envs/cordex-etl/lib/python3.13/site-packages/xarray/namedarray/core.py:503, in NamedArray._parse_dimensions(self, dims)
    502 def _parse_dimensions(self, dims: _DimsLike) -> _Dims:
--> [503](https://vscode-remote+ssh-002dremote-002bjsc-002dcordex.vscode-resource.vscode-cdn.net/mnt/CORDEX_CMIP6_tmp/user_tmp/lbuntemeyer/conda_envs/cordex-etl/lib/python3.13/site-packages/xarray/namedarray/core.py:503)     dims = (dims,) if isinstance(dims, str) else tuple(dims)
    504     if len(dims) != self.ndim:
    505         raise ValueError(
    506             f"dimensions {dims} must have the same length as the "
    507             f"number of data dimensions, ndim={self.ndim}"
    508         )

TypeError: 'NoneType' object is not iterable

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions