Skip to content

Commit 661ae45

Browse files
authored
Minor improvements to dmrpp parser (#757)
* improve parser handling of container attrs and scalar vars * add simple tests * remove old commented code * add warning to skip variable parsing rather than fail * adds info to notes * remove commented debug code
1 parent 7638893 commit 661ae45

File tree

4 files changed

+93
-8
lines changed

4 files changed

+93
-8
lines changed

docs/releases.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,12 @@ Extremely minor release to ensure compatibility with the soon-to-be released ver
2121
- Adjust for minor upcoming change in private xarray API `xarray.structure.combine._nested_combine`.
2222
([#779](https://github.com/zarr-developers/VirtualiZarr/pull/779)).
2323
By [Tom Nicholas](https://github.com/TomNicholas).
24+
- Enable `DMRPParser` to process scalar, dimensionless variables that lack chunks are present.
25+
([#666](https://github.com/zarr-developers/VirtualiZarr/pull/757)).
26+
By [Miguel Jimenez-Urias](https://github.com/Mikejmnez)
27+
- Enable `DMRPParser` to parse flattened dmrpp metadata reference files, which contain container attributes.
28+
([#581](https://github.com/zarr-developers/VirtualiZarr/pull/757)).
29+
By [Miguel Jimenez-Urias](https://github.com/Mikejmnez)
2430

2531
## v2.1.0 (14th August 2025)
2632

virtualizarr/parsers/dmrpp.py

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -277,8 +277,16 @@ def _parse_dataset(
277277
manifest_dict: dict[str, ManifestArray] = {}
278278
for var_tag in self._find_var_tags(root):
279279
if var_tag.attrib["name"] not in self.skip_variables:
280-
variable = self._parse_variable(var_tag)
281-
manifest_dict[var_tag.attrib["name"]] = variable
280+
try:
281+
variable = self._parse_variable(var_tag)
282+
manifest_dict[var_tag.attrib["name"]] = variable
283+
except (UnboundLocalError, ValueError):
284+
name = var_tag.attrib["name"]
285+
warnings.warn(
286+
f"This DMRpp contains the variable {name} that could not"
287+
" be parsed. Consider adding it to the list of skipped "
288+
"variables, or opening an issue to help resolve this"
289+
)
282290

283291
# Attributes
284292
attrs: dict[str, str] = {}
@@ -381,8 +389,6 @@ def _parse_variable(self, var_tag: ET.Element) -> ManifestArray:
381389
# Dimension info
382390
dims: dict[str, int] = {}
383391
dimension_tags = self._find_dimension_tags(var_tag)
384-
if not dimension_tags:
385-
raise ValueError("Variable has no dimensions")
386392
for dim in dimension_tags:
387393
dims.update(self._parse_dim(dim))
388394
# convert DAP dtype to numpy dtype
@@ -407,9 +413,13 @@ def _parse_variable(self, var_tag: ET.Element) -> ManifestArray:
407413
if "fillValue" in chunks_tag.attrib:
408414
fillValue_attrib = chunks_tag.attrib["fillValue"]
409415
array_fill_value = np.array(fillValue_attrib).astype(dtype)[()]
410-
chunkmanifest = self._parse_chunks(chunks_tag, chunks_shape)
416+
if chunks_shape:
417+
chunkmanifest = self._parse_chunks(chunks_tag, chunks_shape)
418+
else:
419+
chunkmanifest = ChunkManifest(entries={}, shape=array_fill_value.shape)
411420
# Filters
412421
codecs = self._parse_filters(chunks_tag, dtype)
422+
413423
# Attributes
414424
attrs: dict[str, Any] = {}
415425
for attr_tag in var_tag.iterfind("dap:Attribute", self._NS):
@@ -449,9 +459,14 @@ def _parse_attribute(self, attr_tag: ET.Element) -> dict[str, Any]:
449459
# DMR++ build information that is not part of the dataset
450460
if attr_tag.attrib["name"] == "build_dmrpp_metadata":
451461
return {}
452-
raise ValueError(
453-
"Nested attributes cannot be assigned to a variable or dataset"
454-
)
462+
else:
463+
container_attr = attr_tag.attrib["name"]
464+
warnings.warn(
465+
"This DMRpp contains a nested attribute "
466+
f"{container_attr}. Nested attributes cannot "
467+
"be assigned to a variable or dataset and will be dropped"
468+
)
469+
return {}
455470
dtype = np.dtype(self._DAP_NP_DTYPE[attr_tag.attrib["type"]])
456471
# if multiple Value tags are present, store as "key": "[v1, v2, ...]"
457472
for value_tag in attr_tag:

virtualizarr/tests/test_parsers/conftest.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import numpy as np
66
import pytest
77
import xarray as xr
8+
from netCDF4 import Dataset
89
from packaging.version import Version
910
from xarray.tests.test_dataset import create_test_data
1011
from xarray.util.print_versions import netcdf_and_hdf5_versions
@@ -41,6 +42,16 @@ def no_chunks_hdf5_url(tmpdir):
4142
return f"file://{filepath}"
4243

4344

45+
@pytest.fixture
46+
def fill_value_scalar_no_chunks_nc4_url(tmpdir):
47+
filepath = f"{tmpdir}/fill_value_scalar_no_chunks.nc4"
48+
f = Dataset(filepath, "w")
49+
f.createVariable("data", "<i4", fill_value=-999)
50+
f.long_name = "empty scalar data"
51+
f.close()
52+
return f"file://{filepath}"
53+
54+
4455
@pytest.fixture
4556
def chunked_hdf5_url(tmpdir):
4657
filepath = f"{tmpdir}/chunks.nc"

virtualizarr/tests/test_parsers/test_dmrpp.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,47 @@
337337
</Dataset>
338338
"""
339339
),
340+
"fill_value_scalar_no_chunks_nc4_url": textwrap.dedent(
341+
"""\
342+
<?xml version="1.0" encoding="ISO-8859-1"?>
343+
<Dataset xmlns="http://xml.opendap.org/ns/DAP/4.0#" xmlns:dmrpp="http://xml.opendap.org/dap/dmrpp/1.0.0#" dapVersion="4.0" dmrVersion="1.0" name="fill_value_scalar_no_chunks.nc4" dmrpp:href="OPeNDAP_DMRpp_DATA_ACCESS_URL" dmrpp:version="3.21.1-477">
344+
<Int32 name="data">
345+
<Attribute name="_FillValue" type="Int32">
346+
<Value>-999</Value>
347+
</Attribute>
348+
<dmrpp:chunks fillValue="-999"/>
349+
</Int32>
350+
<Attribute name="long_name" type="String">
351+
<Value>empty scalar data</Value>
352+
</Attribute>
353+
<Attribute name="drop_container_attribute" type="Container">
354+
<Attribute name="created" type="String">
355+
<Value>2025-08-14T23:32:01Z</Value>
356+
</Attribute>
357+
<Attribute name="reason" type="String">
358+
<Value>container attributes are no longer supported</Value>
359+
</Attribute>
360+
</Attribute>
361+
<Attribute name="build_dmrpp_metadata" type="Container">
362+
<Attribute name="created" type="String">
363+
<Value>2025-08-14T23:32:01Z</Value>
364+
</Attribute>
365+
<Attribute name="build_dmrpp" type="String">
366+
<Value>3.21.1-477</Value>
367+
</Attribute>
368+
<Attribute name="bes" type="String">
369+
<Value>3.21.1-477</Value>
370+
</Attribute>
371+
<Attribute name="libdap" type="String">
372+
<Value>libdap-3.21.1-222</Value>
373+
</Attribute>
374+
<Attribute name="invocation" type="String">
375+
<Value>build_dmrpp -f /usr/share/hyrax/fill_value_scalar_no_chunks.nc4 -r fill_value_scalar_no_chunks.nc4.dmr -u OPeNDAP_DMRpp_DATA_ACCESS_URL -M</Value>
376+
</Attribute>
377+
</Attribute>
378+
</Dataset>
379+
"""
380+
),
340381
}
341382

342383

@@ -540,3 +581,15 @@ def test_parse_attribute(netcdf4_file, attr_path, expected):
540581

541582
result = parser._parse_attribute(parser.find_node_fqn(attr_path))
542583
assert result == expected
584+
585+
586+
def test_empty_scalar_warns_container(fill_value_scalar_no_chunks_nc4_url):
587+
parsed_dmrpp = dmrparser(
588+
DMRPP_XML_STRINGS["fill_value_scalar_no_chunks_nc4_url"],
589+
filepath=fill_value_scalar_no_chunks_nc4_url,
590+
)
591+
store = obstore_local(url=f"file://{parsed_dmrpp.data_filepath}")
592+
with pytest.warns(UserWarning):
593+
parsed_vds = parsed_dmrpp.parse_dataset(object_store=store)
594+
vds_g1 = parsed_vds.to_virtual_dataset()
595+
assert vds_g1["data"].attrs == {"_FillValue": -999}

0 commit comments

Comments
 (0)