-
Notifications
You must be signed in to change notification settings - Fork 95
zarr-python
v3 compatibility
#516
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
39722e7
d3c7e37
25d7d14
ffe5f9d
5aef233
b9323d2
0f17119
5c8806b
80fedcd
1f69a0b
d556e52
b27e64c
41d6e8e
7ade1a6
492ddee
6e5741c
c0316ac
59bd36c
187ced2
690ed21
5019b15
d96cf46
cbcb720
b88655f
73eaf33
3757199
9444ff8
d8848ce
1fa294e
543178d
96b56cd
0808b05
aef006e
d9bf0dd
884fc68
1145f45
94ec479
a9693d1
7e9112a
a7af691
f7b87de
95f340f
fa364a7
53922a2
0486ac1
c522a52
9b96d8c
17478bd
2d5033c
9066360
4750f8e
73385f5
1a79a5c
f51604f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,8 @@ | ||
import base64 | ||
import io | ||
import logging | ||
from typing import Union, BinaryIO | ||
from typing import Union, BinaryIO, Any, cast | ||
from packaging.version import Version | ||
|
||
import fsspec.core | ||
from fsspec.implementations.reference import LazyReferenceMapper | ||
|
@@ -21,11 +22,11 @@ | |
"for more details." | ||
) | ||
|
||
try: | ||
from zarr.meta import encode_fill_value | ||
except ModuleNotFoundError: | ||
# https://github.com/zarr-developers/zarr-python/issues/2021 | ||
from zarr.v2.meta import encode_fill_value | ||
# try: | ||
# from zarr.meta import encode_fill_value | ||
# except ModuleNotFoundError: | ||
# # https://github.com/zarr-developers/zarr-python/issues/2021 | ||
# from zarr.v2.meta import encode_fill_value | ||
|
||
lggr = logging.getLogger("h5-to-zarr") | ||
_HIDDEN_ATTRS = { # from h5netcdf.attrs | ||
|
@@ -111,9 +112,14 @@ def __init__( | |
if vlen_encode not in ["embed", "null", "leave", "encode"]: | ||
raise NotImplementedError | ||
self.vlen = vlen_encode | ||
self.store = out or {} | ||
self._zroot = zarr.group(store=self.store, overwrite=True) | ||
|
||
self.store_dict = out or {} | ||
if Version(zarr.__version__) < Version("3.0.0.a0"): | ||
self.store = zarr.storage.KVStore(self.store_dict) | ||
self._zroot = zarr.group(store=self.store, overwrite=True) | ||
else: | ||
self.store = zarr.storage.MemoryStore(mode="a", store_dict=self.store_dict) | ||
self._zroot = zarr.group(store=self.store, zarr_format=2, overwrite=True) | ||
|
||
self._uri = url | ||
self.error = error | ||
lggr.debug(f"HDF5 file URI: {self._uri}") | ||
|
@@ -140,7 +146,6 @@ def translate(self, preserve_linked_dsets=False): | |
""" | ||
lggr.debug("Translation begins") | ||
self._transfer_attrs(self._h5f, self._zroot) | ||
|
||
self._h5f.visititems(self._translator) | ||
|
||
if preserve_linked_dsets: | ||
|
@@ -157,7 +162,17 @@ def translate(self, preserve_linked_dsets=False): | |
self.store.flush() | ||
return self.store | ||
else: | ||
store = _encode_for_JSON(self.store) | ||
keys_to_remove = [] | ||
new_keys = {} | ||
for k, v in self.store_dict.items(): | ||
if isinstance(v, zarr.core.buffer.cpu.Buffer): | ||
key = str.removeprefix(k, "/") | ||
new_keys[key] = v.to_bytes() | ||
keys_to_remove.append(k) | ||
for k in keys_to_remove: | ||
del self.store_dict[k] | ||
|
||
self.store_dict.update(new_keys) | ||
store = _encode_for_JSON(self.store_dict) | ||
return {"version": 1, "refs": store} | ||
|
||
def _unref(self, ref): | ||
|
@@ -465,26 +480,31 @@ def _translator( | |
if h5py.h5ds.is_scale(h5obj.id) and not cinfo: | ||
return | ||
if h5obj.attrs.get("_FillValue") is not None: | ||
fill = h5obj.attrs.get("_FillValue") | ||
fill = encode_fill_value( | ||
h5obj.attrs.get("_FillValue"), dt or h5obj.dtype | ||
) | ||
|
||
# Create a Zarr array equivalent to this HDF5 dataset... | ||
za = self._zroot.require_dataset( | ||
h5obj.name, | ||
adims = self._get_array_dims(h5obj) | ||
|
||
# Create a Zarr array equivalent to this HDF5 dataset.. | ||
za = self._zroot.require_array( | ||
name=h5obj.name, | ||
shape=h5obj.shape, | ||
dtype=dt or h5obj.dtype, | ||
chunks=h5obj.chunks or False, | ||
fill_value=fill, | ||
compression=None, | ||
compressor=None, | ||
|
||
filters=filters, | ||
overwrite=True, | ||
attributes={ | ||
"_ARRAY_DIMENSIONS": adims, | ||
}, | ||
**kwargs, | ||
) | ||
lggr.debug(f"Created Zarr array: {za}") | ||
self._transfer_attrs(h5obj, za) | ||
adims = self._get_array_dims(h5obj) | ||
za.attrs["_ARRAY_DIMENSIONS"] = adims | ||
|
||
# za.attrs["_ARRAY_DIMENSIONS"] = adims | ||
lggr.debug(f"_ARRAY_DIMENSIONS = {adims}") | ||
|
||
if "data" in kwargs: | ||
|
@@ -496,6 +516,8 @@ def _translator( | |
if h5obj.fletcher32: | ||
logging.info("Discarding fletcher32 checksum") | ||
v["size"] -= 4 | ||
key = str.removeprefix(h5obj.name, "/") + "/" + ".".join(map(str, k)) | ||
|
||
|
||
if ( | ||
self.inline | ||
and isinstance(v, dict) | ||
|
@@ -508,9 +530,10 @@ def _translator( | |
data.decode("ascii") | ||
except UnicodeDecodeError: | ||
data = b"base64:" + base64.b64encode(data) | ||
self.store[za._chunk_key(k)] = data | ||
|
||
self.store_dict[key] = data | ||
else: | ||
self.store[za._chunk_key(k)] = [ | ||
self.store_dict[key] = [ | ||
self._uri, | ||
v["offset"], | ||
v["size"], | ||
|
@@ -681,3 +704,44 @@ def _is_netcdf_variable(dataset: h5py.Dataset): | |
|
||
def has_visititems_links(): | ||
return hasattr(h5py.Group, "visititems_links") | ||
|
||
|
||
def encode_fill_value(v: Any, dtype: np.dtype, object_codec: Any = None) -> Any: | ||
mpiannucci marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
# early out | ||
if v is None: | ||
return v | ||
if dtype.kind == "V" and dtype.hasobject: | ||
if object_codec is None: | ||
raise ValueError("missing object_codec for object array") | ||
v = object_codec.encode(v) | ||
v = str(base64.standard_b64encode(v), "ascii") | ||
return v | ||
if dtype.kind == "f": | ||
if np.isnan(v): | ||
return "NaN" | ||
elif np.isposinf(v): | ||
return "Infinity" | ||
elif np.isneginf(v): | ||
return "-Infinity" | ||
else: | ||
return float(v) | ||
elif dtype.kind in "ui": | ||
return int(v) | ||
elif dtype.kind == "b": | ||
return bool(v) | ||
elif dtype.kind in "c": | ||
c = cast(np.complex128, np.dtype(complex).type()) | ||
v = ( | ||
encode_fill_value(v.real, c.real.dtype, object_codec), | ||
encode_fill_value(v.imag, c.imag.dtype, object_codec), | ||
) | ||
return v | ||
elif dtype.kind in "SV": | ||
v = str(base64.standard_b64encode(v), "ascii") | ||
return v | ||
elif dtype.kind == "U": | ||
return v | ||
elif dtype.kind in "mM": | ||
return int(v.view("i8")) | ||
else: | ||
return v |
Uh oh!
There was an error while loading. Please reload this page.