Skip to content

Commit 9c0c621

Browse files
committed
make consolidated metadata human-readable
1 parent c8ed0f6 commit 9c0c621

File tree

6 files changed

+63
-19
lines changed

6 files changed

+63
-19
lines changed

zarr/attrs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
from collections import MutableMapping
55

66

7-
from zarr.compat import text_type
87
from zarr.errors import PermissionError
8+
from zarr.meta import parse_metadata
99

1010

1111
class Attributes(MutableMapping):
@@ -43,7 +43,7 @@ def _get_nosync(self):
4343
except KeyError:
4444
d = dict()
4545
else:
46-
d = json.loads(text_type(data, 'ascii'))
46+
d = parse_metadata(data)
4747
return d
4848

4949
def asdict(self):

zarr/compat.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ class PermissionError(Exception):
1919
def OrderedDict_move_to_end(od, key):
2020
od[key] = od.pop(key)
2121

22+
from collections import Mapping
23+
2224

2325
else: # pragma: py2 no cover
2426

@@ -29,3 +31,5 @@ def OrderedDict_move_to_end(od, key):
2931

3032
def OrderedDict_move_to_end(od, key):
3133
od.move_to_end(key)
34+
35+
from collections.abc import Mapping

zarr/convenience.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from zarr.errors import err_path_not_found, CopyError
1616
from zarr.util import normalize_storage_path, TreeViewer, buffer_size
1717
from zarr.compat import PY2, text_type
18+
from zarr.meta import ensure_str, json_dumps
1819

1920

2021
# noinspection PyShadowingBuiltins
@@ -1119,8 +1120,15 @@ def is_zarr_key(key):
11191120
return (key.endswith('.zarray') or key.endswith('.zgroup') or
11201121
key.endswith('.zattrs'))
11211122

1122-
out = {key: store[key].decode() for key in store if is_zarr_key(key)}
1123-
store[metadata_key] = json.dumps(out).encode()
1123+
# out = {key: store[key].decode() for key in store if is_zarr_key(key)}
1124+
out = {
1125+
'zarr_consolidated_format': 1,
1126+
'metadata': {
1127+
key: json.loads(ensure_str(store[key]))
1128+
for key in store if is_zarr_key(key)
1129+
}
1130+
}
1131+
store[metadata_key] = json_dumps(out).encode()
11241132
return open_consolidated(store, metadata_key=metadata_key)
11251133

11261134

zarr/core.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,9 @@ def _load_metadata_nosync(self):
165165
if config is None:
166166
self._compressor = None
167167
else:
168+
# temporary workaround for
169+
# https://github.com/zarr-developers/numcodecs/issues/78
170+
config = dict(config)
168171
self._compressor = get_codec(config)
169172

170173
# setup filters

zarr/meta.py

Lines changed: 31 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,14 @@
77
import numpy as np
88

99

10-
from zarr.compat import PY2, binary_type
10+
from zarr.compat import PY2, binary_type, Mapping
1111
from zarr.errors import MetadataError
1212

1313

1414
ZARR_FORMAT = 2
1515

1616

17-
def _ensure_str(s):
17+
def ensure_str(s):
1818
if PY2: # pragma: py3 no cover
1919
# noinspection PyUnresolvedReferences
2020
if isinstance(s, buffer): # noqa
@@ -27,12 +27,32 @@ def _ensure_str(s):
2727
return s
2828

2929

30+
def json_dumps(o):
31+
"""Write JSON in a consistent, human-readable way."""
32+
return json.dumps(o, indent=4, sort_keys=True, ensure_ascii=True,
33+
separators=(',', ': '))
34+
35+
36+
def parse_metadata(s):
37+
if isinstance(s, Mapping):
38+
# assume metadata has already been parsed into a mapping object
39+
meta = s
40+
else:
41+
# assume metadata needs to be parsed as JSON
42+
s = ensure_str(s)
43+
meta = json.loads(s)
44+
return meta
45+
46+
3047
def decode_array_metadata(s):
31-
s = _ensure_str(s)
32-
meta = json.loads(s)
48+
meta = parse_metadata(s)
49+
50+
# check metadata format
3351
zarr_format = meta.get('zarr_format', None)
3452
if zarr_format != ZARR_FORMAT:
3553
raise MetadataError('unsupported zarr format: %s' % zarr_format)
54+
55+
# extract array metadata fields
3656
try:
3757
dtype = decode_dtype(meta['dtype'])
3858
fill_value = decode_fill_value(meta['fill_value'], dtype)
@@ -67,8 +87,7 @@ def encode_array_metadata(meta):
6787
order=meta['order'],
6888
filters=meta['filters'],
6989
)
70-
s = json.dumps(meta, indent=4, sort_keys=True, ensure_ascii=True,
71-
separators=(',', ': '))
90+
s = json_dumps(meta)
7291
b = s.encode('ascii')
7392
return b
7493

@@ -98,14 +117,14 @@ def decode_dtype(d):
98117

99118

100119
def decode_group_metadata(s):
101-
s = _ensure_str(s)
102-
meta = json.loads(s)
120+
meta = parse_metadata(s)
121+
122+
# check metadata format version
103123
zarr_format = meta.get('zarr_format', None)
104124
if zarr_format != ZARR_FORMAT:
105125
raise MetadataError('unsupported zarr format: %s' % zarr_format)
106-
meta = dict(
107-
zarr_format=ZARR_FORMAT,
108-
)
126+
127+
meta = dict(zarr_format=zarr_format)
109128
return meta
110129

111130

@@ -115,7 +134,7 @@ def encode_group_metadata(meta=None):
115134
meta = dict(
116135
zarr_format=ZARR_FORMAT,
117136
)
118-
s = json.dumps(meta, indent=4, sort_keys=True, ensure_ascii=True)
137+
s = json_dumps(meta)
119138
b = s.encode('ascii')
120139
return b
121140

zarr/storage.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
from zarr.compat import PY2, binary_type, OrderedDict_move_to_end
4242
from numcodecs.registry import codec_registry
4343
from zarr.errors import (err_contains_group, err_contains_array, err_bad_compressor,
44-
err_fspath_exists_notdir, err_read_only)
44+
err_fspath_exists_notdir, err_read_only, MetadataError)
4545

4646

4747
array_meta_key = '.zarray'
@@ -1932,12 +1932,22 @@ class ConsolidatedMetadataStore(MutableMapping):
19321932
"""
19331933
def __init__(self, store, metadata_key='.zmetadata'):
19341934
self.store = store
1935+
1936+
# retrieve consolidated metadata
19351937
if sys.version_info.major == 3 and sys.version_info.minor < 6:
19361938
d = store[metadata_key].decode() # pragma: no cover
19371939
else: # pragma: no cover
19381940
d = store[metadata_key]
1939-
metadata = json.loads(d)
1940-
self.meta_store = {k: v.encode() for k, v in metadata.items()}
1941+
meta = json.loads(d)
1942+
1943+
# check format of consolidated metadata
1944+
consolidated_format = meta.get('zarr_consolidated_format', None)
1945+
if consolidated_format != 1:
1946+
raise MetadataError('unsupported zarr consolidated metadata format: %s' %
1947+
consolidated_format)
1948+
1949+
# decode metadata
1950+
self.meta_store = meta['metadata']
19411951

19421952
def __getitem__(self, key):
19431953
return self.meta_store[key]

0 commit comments

Comments
 (0)