Skip to content

Commit 4b5efe4

Browse files
committed
test: expand builder encoding coverage
1 parent 8c7a346 commit 4b5efe4

File tree

2 files changed

+75
-1
lines changed

2 files changed

+75
-1
lines changed

src/mdio/core/v1/builder.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,10 @@ def _generate_encodings() -> dict:
284284
chunks = None
285285
if var.metadata is not None and var.metadata.chunk_grid is not None:
286286
chunks = var.metadata.chunk_grid.configuration.chunk_shape
287+
if isinstance(chunks[0], list):
288+
chunks = tuple(tuple(c) for c in chunks)
289+
else:
290+
chunks = tuple(chunks)
287291
else:
288292
# When no chunk_grid is provided, set chunks to shape to avoid chunking
289293
dim_names = [d.name if isinstance(d, NamedDimension) else d for d in var.dimensions]
@@ -298,14 +302,19 @@ def _generate_encodings() -> dict:
298302
}
299303
return global_encodings
300304

305+
global_encodings = _generate_encodings()
301306
ds.to_mdio(
302307
store,
303308
mode=mode,
304309
zarr_format=2,
305310
consolidated=True,
306311
safe_chunks=False,
307312
compute=compute,
308-
encoding=_generate_encodings(),
313+
encoding=global_encodings,
309314
**kwargs,
310315
)
316+
317+
for var_name, encoding in global_encodings.items():
318+
ds[var_name].encoding.update(encoding)
319+
311320
return ds
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
import numpy as np
2+
from pathlib import Path
3+
4+
from mdio.core.v1.builder import MDIODatasetBuilder
5+
6+
7+
def test_builder_populates_encodings(tmp_path: Path) -> None:
8+
builder = MDIODatasetBuilder("encodings")
9+
builder.add_dimension("x", 10)
10+
builder.add_variable(
11+
"var",
12+
metadata={"chunkGrid": {"name": "regular", "configuration": {"chunkShape": [5]}}},
13+
)
14+
ds = builder.to_mdio(str(tmp_path / "enc.mdio"))
15+
16+
assert ds["x"].encoding["chunks"] == (10,)
17+
assert ds["var"].encoding["chunks"] == (5,)
18+
19+
20+
def test_builder_populates_encodings_multiple(tmp_path: Path) -> None:
21+
"""Ensure encodings are populated for a more complex dataset."""
22+
23+
builder = MDIODatasetBuilder("encodings_multi")
24+
25+
# Add several dimensions
26+
builder.add_dimension("x", 10)
27+
builder.add_dimension("y", 20)
28+
builder.add_dimension("z", 5)
29+
30+
# Add coordinates using different combinations of dimensions
31+
builder.add_coordinate("x_coord", dimensions=["x"])
32+
builder.add_coordinate("yz_coord", dimensions=["y", "z"])
33+
34+
# Add variables with explicit chunking across multiple dimensions
35+
builder.add_variable(
36+
"var_xy",
37+
dimensions=["x", "y"],
38+
metadata={"chunkGrid": {"name": "regular", "configuration": {"chunkShape": [5, 10]}}},
39+
)
40+
builder.add_variable(
41+
"var_xyz",
42+
dimensions=["x", "y", "z"],
43+
metadata={"chunkGrid": {"name": "regular", "configuration": {"chunkShape": [5, 10, 5]}}},
44+
)
45+
builder.add_variable(
46+
"var_z",
47+
dimensions=["z"],
48+
metadata={"chunkGrid": {"name": "regular", "configuration": {"chunkShape": [5]}}},
49+
)
50+
51+
ds = builder.to_mdio(str(tmp_path / "enc_multi.mdio"))
52+
53+
# Dimension variables should have full-size chunks
54+
assert ds["x"].encoding["chunks"] == (10,)
55+
assert ds["y"].encoding["chunks"] == (20,)
56+
assert ds["z"].encoding["chunks"] == (5,)
57+
58+
# Coordinate variables should inherit dimension chunking
59+
assert ds["x_coord"].encoding["chunks"] == (10,)
60+
assert ds["yz_coord"].encoding["chunks"] == (20, 5)
61+
62+
# Data variables should use the provided chunk shapes
63+
assert ds["var_xy"].encoding["chunks"] == (5, 10)
64+
assert ds["var_xyz"].encoding["chunks"] == (5, 10, 5)
65+
assert ds["var_z"].encoding["chunks"] == (5,)

0 commit comments

Comments
 (0)