Skip to content

Commit 9177095

Browse files
committed
Harmonize encoding from h5netcdf with netcdf4
We were trying to figure why certain compression features were not included in our h5netcdf backend. This PR to h5netcdf should provide the filters variable, but I understand that it is too bleeding edge for you all to pin to h5netcdf/h5netcdf#307
1 parent 18ebe98 commit 9177095

File tree

1 file changed

+44
-36
lines changed

1 file changed

+44
-36
lines changed

xarray/backends/h5netcdf_.py

Lines changed: 44 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -266,41 +266,8 @@ def open_store_variable(self, name, var):
266266
dimensions = var.dimensions
267267
data = indexing.LazilyIndexedArray(H5NetCDFArrayWrapper(name, self))
268268
attrs = _read_attributes(var)
269-
270-
# netCDF4 specific encoding
271-
encoding = {
272-
"chunksizes": var.chunks,
273-
"fletcher32": var.fletcher32,
274-
"shuffle": var.shuffle,
275-
}
276-
if var.chunks:
277-
encoding["preferred_chunks"] = dict(
278-
zip(var.dimensions, var.chunks, strict=True)
279-
)
280-
# Convert h5py-style compression options to NetCDF4-Python
281-
# style, if possible
282-
if var.compression == "gzip":
283-
encoding["zlib"] = True
284-
encoding["complevel"] = var.compression_opts
285-
elif var.compression is not None:
286-
encoding["compression"] = var.compression
287-
encoding["compression_opts"] = var.compression_opts
288-
289-
# save source so __repr__ can detect if it's local or not
290-
encoding["source"] = self._filename
291-
encoding["original_shape"] = data.shape
292-
293-
vlen_dtype = h5py.check_dtype(vlen=var.dtype)
294-
if vlen_dtype is str:
295-
encoding["dtype"] = str
296-
elif vlen_dtype is not None: # pragma: no cover
297-
# xarray doesn't support writing arbitrary vlen dtypes yet.
298-
pass
299-
# just check if datatype is available and create dtype
300-
# this check can be removed if h5netcdf >= 1.4.0 for any environment
301-
elif (datatype := getattr(var, "datatype", None)) and isinstance(
302-
datatype, h5netcdf.core.EnumType
303-
):
269+
encoding: dict[str, Any] = {}
270+
if (datatype := var.datatype) and isinstance(datatype, h5netcdf.core.EnumType):
304271
encoding["dtype"] = np.dtype(
305272
data.dtype,
306273
metadata={
@@ -309,7 +276,48 @@ def open_store_variable(self, name, var):
309276
},
310277
)
311278
else:
312-
encoding["dtype"] = var.dtype
279+
vlen_dtype = h5py.check_dtype(vlen=var.dtype)
280+
if vlen_dtype is str:
281+
encoding["dtype"] = str
282+
elif vlen_dtype is not None: # pragma: no cover
283+
# xarray doesn't support writing arbitrary vlen dtypes yet.
284+
encoding["dtype"] = var.dtype
285+
else:
286+
encoding["dtype"] = var.dtype
287+
288+
if var.chunks:
289+
encoding["contiguous"] = False
290+
encoding["chunksizes"] = var.chunks
291+
encoding["preferred_chunks"] = dict(
292+
zip(var.dimensions, var.chunks, strict=True)
293+
)
294+
else:
295+
encoding["contiguous"] = True
296+
encoding["chunksizes"] = None
297+
298+
# filters only exists in an unreleased version of h5netcdf for now
299+
if hasattr(var, "filters"):
300+
filters = var.filters()
301+
if filters is not None:
302+
encoding.update(filters)
303+
else:
304+
# Continue with the old path before the filters() method existed
305+
encoding |= {
306+
"chunksizes": var.chunks,
307+
"fletcher32": var.fletcher32,
308+
"shuffle": var.shuffle,
309+
}
310+
if var.compression == "gzip":
311+
encoding["zlib"] = True
312+
encoding["complevel"] = var.compression_opts
313+
# I'm pretty sure compression is always None if it is not gzip
314+
# The filters() method returns more information
315+
elif var.compression is not None:
316+
encoding["compression"] = var.compression
317+
encoding["compression_opts"] = var.compression_opts
318+
319+
encoding["source"] = self._filename
320+
encoding["original_shape"] = data.shape
313321

314322
return Variable(dimensions, data, attrs, encoding)
315323

0 commit comments

Comments
 (0)