Skip to content

Commit 7d8bc80

Browse files
committed
ensure fill value is correct for structured arrays
1 parent 1388264 commit 7d8bc80

File tree

1 file changed

+4
-14
lines changed

1 file changed

+4
-14
lines changed

src/mdio/schemas/v1/dataset_serializer.py

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -149,17 +149,12 @@ def _convert_compressor(
149149

150150

151151
def _get_fill_value(data_type: ScalarType | StructuredType | str) -> any:
152-
"""Get the fill value for a given data type.
153-
154-
The Zarr fill_value is a scalar value providing the default value to use for
155-
uninitialized portions of the array, or null if no fill_value is to be used
156-
https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html
157-
"""
152+
"""Get the fill value for a given data type."""
158153
if isinstance(data_type, ScalarType):
159154
return fill_value_map.get(data_type)
160155
if isinstance(data_type, StructuredType):
161-
d_type = to_numpy_dtype(data_type)
162-
return np.zeros((), dtype=d_type)
156+
numpy_dtype = to_numpy_dtype(data_type)
157+
return np.void((), dtype=numpy_dtype)
163158
if isinstance(data_type, str):
164159
return ""
165160
# If we do not have a fill value for this type, use None
@@ -182,9 +177,6 @@ def to_xarray_dataset(mdio_ds: Dataset) -> xr_Dataset: # noqa: PLR0912
182177
Returns:
183178
The constructed dataset with proper MDIO structure and metadata.
184179
"""
185-
# See the xarray tutorial for more details on how to create datasets:
186-
# https://tutorial.xarray.dev/fundamentals/01.1_creating_data_structures.html
187-
188180
all_named_dims = _get_all_named_dimensions(mdio_ds)
189181

190182
# First pass: Build all variables
@@ -215,10 +207,8 @@ def to_xarray_dataset(mdio_ds: Dataset) -> xr_Dataset: # noqa: PLR0912
215207
encoding = {
216208
"chunks": chunks,
217209
"compressor": _convert_compressor(v.compressor),
210+
"fill_value": _get_fill_value(v.data_type),
218211
}
219-
# NumPy structured data types have fields attribute, while scalar types do not.
220-
if not hasattr(v.data_type, "fields"):
221-
encoding["fill_value"] = _get_fill_value(v.data_type)
222212

223213
data_array.encoding = encoding
224214

0 commit comments

Comments
 (0)