Skip to content

dmrpp parsing fails for variables without a dim #666

@ebolch

Description

@ebolch

I'm trying to open some Daymet data with earthaccess.open_virtual_dataset using the following workflow:

import earthaccess
import xarray as xr

earthaccess.login(persist=True)

results = earthaccess.search_data(
    concept_id='C2532426483-ORNL_CLOUD',
    count=10
)

ds = earthaccess.open_virtual_dataset(results[0],
     access="direct",
     load=False)

and getting this error:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[18], line 1
----> 1 ds = earthaccess.open_virtual_dataset(results[0], **open_options)
      2 ds

File /home/conda/ebolch@contractor.usgs.gov/envs/ebolch@contractor.usgs.gov-lpdaac/lib/python3.13/site-packages/earthaccess/dmrpp_zarr.py:193, in open_virtual_dataset(granule, group, access, load)
    147 def open_virtual_dataset(
    148     granule: earthaccess.DataGranule,
    149     group: str | None = None,
    150     access: str = "indirect",
    151     load: bool = False,
    152 ) -> xr.Dataset:
    153     """Open a granule as a single virtual xarray Dataset.
    154 
    155     Uses NASA DMR++ metadata files to create a virtual xarray dataset with ManifestArrays. This virtual dataset can be used to create zarr reference files. See [https://virtualizarr.readthedocs.io](https://virtualizarr.readthedocs.io) for more information on virtual xarray datasets.
   (...)    191         ```
    192     """
--> 193     return open_virtual_mfdataset(
    194         granules=[granule],
    195         group=group,
    196         access=access,
    197         load=load,
    198         parallel=False,
    199         preprocess=None,
    200     )

File /home/conda/ebolch@contractor.usgs.gov/envs/ebolch@contractor.usgs.gov-lpdaac/lib/python3.13/site-packages/earthaccess/dmrpp_zarr.py:112, in open_virtual_mfdataset(granules, group, access, load, preprocess, parallel, **xr_combine_nested_kwargs)
    109 # Get list of virtual datasets (or dask delayed objects)
    110 for g in granules:
    111     vdatasets.append(
--> 112         open_(
    113             filepath=g.data_links(access=access)[0] + ".dmrpp",
    114             filetype="dmrpp",  # type: ignore
    115             group=group,
    116             indexes={},
    117             reader_options={"storage_options": fs.storage_options},
    118         )
    119     )
    120 if preprocess is not None:
    121     vdatasets = [preprocess(ds) for ds in vdatasets]

File /home/conda/ebolch@contractor.usgs.gov/envs/ebolch@contractor.usgs.gov-lpdaac/lib/python3.13/site-packages/virtualizarr/backend.py:200, in open_virtual_dataset(filepath, filetype, group, drop_variables, loadable_variables, decode_times, cftime_variables, indexes, virtual_array_class, virtual_backend_kwargs, reader_options, backend)
    197 if backend_cls is None:
    198     raise NotImplementedError(f"Unsupported file type: {filetype.name}")
--> 200 vds = backend_cls.open_virtual_dataset(
    201     filepath,
    202     group=group,
    203     drop_variables=drop_variables,
    204     loadable_variables=loadable_variables,
    205     decode_times=decode_times,
    206     indexes=indexes,
    207     virtual_backend_kwargs=virtual_backend_kwargs,
    208     reader_options=reader_options,
    209 )
    211 return vds

File /home/conda/ebolch@contractor.usgs.gov/envs/ebolch@contractor.usgs.gov-lpdaac/lib/python3.13/site-packages/virtualizarr/readers/dmrpp.py:57, in DMRPPVirtualBackend.open_virtual_dataset(filepath, group, drop_variables, loadable_variables, decode_times, indexes, virtual_backend_kwargs, reader_options)
     49 fpath = _FsspecFSFromFilepath(
     50     filepath=filepath, reader_options=reader_options
     51 ).open_file()
     53 parser = DMRParser(
     54     root=ET.parse(fpath).getroot(),
     55     data_filepath=filepath.removesuffix(".dmrpp"),
     56 )
---> 57 vds = parser.parse_dataset(group=group, indexes=indexes)
     59 return vds.drop_vars(drop_variables)

File /home/conda/ebolch@contractor.usgs.gov/envs/ebolch@contractor.usgs.gov-lpdaac/lib/python3.13/site-packages/virtualizarr/readers/dmrpp.py:172, in DMRParser.parse_dataset(self, group, indexes)
    170         else:
    171             raise ValueError(f"Group {group} not found in DMR++ file")
--> 172 return self._parse_dataset(self.root, indexes)

File /home/conda/ebolch@contractor.usgs.gov/envs/ebolch@contractor.usgs.gov-lpdaac/lib/python3.13/site-packages/virtualizarr/readers/dmrpp.py:276, in DMRParser._parse_dataset(self, root, indexes)
    274 data_vars: dict[str, Variable] = {}
    275 for var_tag in self._find_var_tags(root):
--> 276     variable = self._parse_variable(var_tag)
    277     # Either coordinates are explicitly defined or 1d variable with same name as dimension is a coordinate
    278     if var_tag.attrib["name"] in coord_names or (
    279         len(variable.dims) == 1 and variable.dims[0] == var_tag.attrib["name"]
    280     ):

File /home/conda/ebolch@contractor.usgs.gov/envs/ebolch@contractor.usgs.gov-lpdaac/lib/python3.13/site-packages/virtualizarr/readers/dmrpp.py:385, in DMRParser._parse_variable(self, var_tag)
    383 dimension_tags = self._find_dimension_tags(var_tag)
    384 if not dimension_tags:
--> 385     raise ValueError("Variable has no dimensions")
    386 for dim in dimension_tags:
    387     dims.update(self._parse_dim(dim))

ValueError: Variable has no dimensions

I'm guessing this is caused by the variable lambert_conformal_conic in the dmrpp file, which has no dimension. Is there a way the dmrpp parser can ignore cases like this? I think there are probably other datasets with the CRS used for grid mapping set as a variable like this example.

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions