Skip to content

Commit 67df487

Browse files
committed
initial support for json metadata
1 parent db9241f commit 67df487

File tree

2 files changed

+110
-64
lines changed

2 files changed

+110
-64
lines changed

zarr/ext.pyx

Lines changed: 41 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,14 @@ import sys
1313
import os
1414
import struct
1515
import ctypes
16-
import pickle
1716
import shutil
1817
import tempfile
1918
from collections import namedtuple
2019
from glob import glob
2120
import fasteners
2221

2322

24-
from zarr import util as _util
25-
from zarr import defaults
23+
from zarr import util as _util, meta as _meta, defaults as _defaults
2624

2725

2826
###############################################################################
@@ -118,21 +116,21 @@ def _normalize_cparams(cname=None, clevel=None, shuffle=None):
118116
"""
119117

120118
# determine compressor
121-
cname = cname if cname is not None else defaults.cname
119+
cname = cname if cname is not None else _defaults.cname
122120
if type(cname) != bytes:
123-
cname = cname.encode()
121+
cname = cname.encode('ascii')
124122
# check compressor is available
125123
if blosc_compname_to_compcode(cname) < 0:
126124
raise ValueError("compressor not available: %s" % cname)
127125

128126
# determine compression level
129-
clevel = clevel if clevel is not None else defaults.clevel
127+
clevel = clevel if clevel is not None else _defaults.clevel
130128
clevel = int(clevel)
131129
if clevel < 0 or clevel > 9:
132130
raise ValueError('invalid compression level: %s' % clevel)
133131

134132
# determine shuffle filter
135-
shuffle = shuffle if shuffle is not None else defaults.shuffle
133+
shuffle = shuffle if shuffle is not None else _defaults.shuffle
136134
shuffle = int(shuffle)
137135
if shuffle not in [0, 1, 2]:
138136
raise ValueError('invalid shuffle: %s' % shuffle)
@@ -678,28 +676,6 @@ def _normalize_chunks(chunks, tuple shape):
678676
return chunks
679677

680678

681-
def _read_array_metadata(path):
682-
683-
# check path exists
684-
if not os.path.exists(path):
685-
raise ValueError('path not found: %s' % path)
686-
687-
# check metadata file
688-
meta_path = os.path.join(path, defaults.metapath)
689-
if not os.path.exists(meta_path):
690-
raise ValueError('array metadata not found: %s' % path)
691-
692-
with open(meta_path, 'rb') as f:
693-
meta = pickle.load(f)
694-
return meta
695-
696-
697-
def _write_array_metadata(path, meta):
698-
meta_path = os.path.join(path, defaults.metapath)
699-
with open(meta_path, 'wb') as f:
700-
pickle.dump(meta, f, protocol=0)
701-
702-
703679
def _array_resize(BaseArray array, *args):
704680

705681
# normalize new shape argument
@@ -1147,7 +1123,7 @@ cdef class PersistentArray(BaseArray):
11471123
# a : read/write if exists, create otherwise (default)
11481124

11491125
# use metadata file as indicator of array existence
1150-
meta_path = os.path.join(path, defaults.metapath)
1126+
meta_path = os.path.join(path, _defaults.metapath)
11511127

11521128
if mode in ['r', 'r+']:
11531129
self._open(path, **kwargs)
@@ -1195,7 +1171,7 @@ cdef class PersistentArray(BaseArray):
11951171
cname=None, clevel=None, shuffle=None, fill_value=None):
11961172

11971173
# create directories
1198-
data_path = os.path.join(path, defaults.datapath)
1174+
data_path = os.path.join(path, _defaults.datapath)
11991175
if not os.path.exists(data_path):
12001176
os.makedirs(data_path)
12011177

@@ -1208,20 +1184,20 @@ cdef class PersistentArray(BaseArray):
12081184
self._fill_value = fill_value
12091185

12101186
# write metadata
1211-
metadata = {'shape': self._shape,
1212-
'chunks': self._chunks,
1213-
'dtype': self._dtype,
1214-
'cname': self._cname,
1215-
'clevel': self._clevel,
1216-
'shuffle': self._shuffle,
1217-
'fill_value': self._fill_value}
1218-
_write_array_metadata(path, metadata)
1187+
_meta.write_array_metadata(path,
1188+
shape=self._shape,
1189+
chunks=self._chunks,
1190+
dtype=self._dtype,
1191+
cname=self._cname,
1192+
clevel=self._clevel,
1193+
shuffle=self._shuffle,
1194+
fill_value=self._fill_value)
12191195

12201196
def _open(self, path, shape=None, chunks=None, dtype=None, cname=None,
12211197
clevel=None, shuffle=None, fill_value=None):
12221198

12231199
# read metadata
1224-
metadata = _read_array_metadata(path)
1200+
metadata = _meta.read_array_metadata(path)
12251201

12261202
# set attributes
12271203
self._shape = metadata['shape']
@@ -1258,8 +1234,8 @@ cdef class PersistentArray(BaseArray):
12581234
return self._cdata[cidx]
12591235

12601236
cdef object get_chunk_path(self, tuple cidx):
1261-
chunk_filename = '.'.join(map(str, cidx)) + defaults.datasuffix
1262-
chunk_path = os.path.join(self._path, defaults.datapath,
1237+
chunk_filename = '.'.join(map(str, cidx)) + _defaults.datasuffix
1238+
chunk_path = os.path.join(self._path, _defaults.datapath,
12631239
chunk_filename)
12641240
return chunk_path
12651241

@@ -1278,14 +1254,14 @@ cdef class PersistentArray(BaseArray):
12781254
_array_resize(self, *args)
12791255

12801256
# write metadata
1281-
metadata = {'shape': self._shape,
1282-
'chunks': self._chunks,
1283-
'dtype': self._dtype,
1284-
'cname': self._cname,
1285-
'clevel': self._clevel,
1286-
'shuffle': self._shuffle,
1287-
'fill_value': self._fill_value}
1288-
_write_array_metadata(self._path, metadata)
1257+
_meta.write_array_metadata(self._path,
1258+
shape=self._shape,
1259+
chunks=self._chunks,
1260+
dtype=self._dtype,
1261+
cname=self._cname,
1262+
clevel=self._clevel,
1263+
shuffle=self._shuffle,
1264+
fill_value=self._fill_value)
12891265

12901266
def __setitem__(self, key, value):
12911267
if self._mode == 'r':
@@ -1322,6 +1298,7 @@ cdef class SynchronizedPersistentArray(PersistentArray):
13221298
###############################################################################
13231299

13241300

1301+
# noinspection PyUnresolvedReferences,PyProtectedMember
13251302
cdef _lazy_get_chunk(BaseArray array, tuple cidx):
13261303
try:
13271304
chunk = array._cdata[cidx]
@@ -1464,18 +1441,18 @@ cdef class LazyPersistentArray(PersistentArray):
14641441
def __get__(self):
14651442
# N.B., chunk objects are instantiated lazily, so there may be
14661443
# data on disk but no corresponding chunk object yet
1467-
data_dir = os.path.join(self._path, defaults.datapath)
1444+
data_dir = os.path.join(self._path, _defaults.datapath)
14681445
return sum(os.path.getsize(os.path.join(data_dir, fn))
14691446
for fn in os.listdir(data_dir))
14701447

14711448
property is_initialized:
14721449
def __get__(self):
14731450
# N.B., chunk objects are instantiated lazily, so there may be
14741451
# data on disk but no corresponding chunk object yet
1475-
data_dir = os.path.join(self._path, defaults.datapath)
1452+
data_dir = os.path.join(self._path, _defaults.datapath)
14761453
a = np.zeros(self._cdata_shape, dtype='b1')
1477-
for fn in glob(os.path.join(data_dir, '*' + defaults.datasuffix)):
1478-
bn = os.path.basename(fn)[:-len(defaults.datasuffix)]
1454+
for fn in glob(os.path.join(data_dir, '*' + _defaults.datasuffix)):
1455+
bn = os.path.basename(fn)[:-len(_defaults.datasuffix)]
14791456
cidx = tuple(map(int, bn.split('.')))
14801457
a[cidx] = True
14811458
return a
@@ -1517,14 +1494,14 @@ cdef class LazyPersistentArray(PersistentArray):
15171494
_lazy_resize(self, *args)
15181495

15191496
# write metadata
1520-
metadata = {'shape': self._shape,
1521-
'chunks': self._chunks,
1522-
'dtype': self._dtype,
1523-
'cname': self._cname,
1524-
'clevel': self._clevel,
1525-
'shuffle': self._shuffle,
1526-
'fill_value': self._fill_value}
1527-
_write_array_metadata(self._path, metadata)
1497+
_meta.write_array_metadata(self._path,
1498+
shape=self._shape,
1499+
chunks=self._chunks,
1500+
dtype=self._dtype,
1501+
cname=self._cname,
1502+
clevel=self._clevel,
1503+
shuffle=self._shuffle,
1504+
fill_value=self._fill_value)
15281505

15291506

15301507
# noinspection PyAbstractClass
@@ -1539,8 +1516,8 @@ cdef class SynchronizedLazyPersistentArray(LazyPersistentArray):
15391516
return _lazy_get_chunk(self, cidx)
15401517

15411518
cdef BaseChunk create_chunk(self, tuple cidx):
1542-
chunk_filename = '.'.join(map(str, cidx)) + defaults.datasuffix
1543-
chunk_path = os.path.join(self._path, defaults.datapath,
1519+
chunk_filename = '.'.join(map(str, cidx)) + _defaults.datasuffix
1520+
chunk_path = os.path.join(self._path, _defaults.datapath,
15441521
chunk_filename)
15451522
return SynchronizedPersistentChunk(
15461523
path=chunk_path, shape=self._chunks, dtype=self._dtype,

zarr/meta.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
# -*- coding: utf-8 -*-
2+
from __future__ import absolute_import, print_function, division
3+
4+
5+
import os
6+
import json
7+
import ast
8+
import numpy as np
9+
10+
11+
from zarr import defaults as _defaults
12+
13+
14+
def read_array_metadata(path):
15+
16+
# check path exists
17+
if not os.path.exists(path):
18+
raise ValueError('path not found: %s' % path)
19+
20+
# check metadata file
21+
meta_path = os.path.join(path, _defaults.metapath)
22+
if not os.path.exists(meta_path):
23+
raise ValueError('array metadata not found: %s' % path)
24+
25+
# read from file
26+
with open(meta_path) as f:
27+
meta = json.load(f)
28+
29+
# decode some values
30+
meta['shape'] = tuple(meta['shape'])
31+
meta['chunks'] = tuple(meta['chunks'])
32+
meta['cname'] = meta['cname'].encode('ascii')
33+
meta['dtype'] = decode_dtype(meta['dtype'])
34+
35+
return meta
36+
37+
38+
def write_array_metadata(path, shape, chunks, dtype, cname, clevel, shuffle,
39+
fill_value):
40+
41+
# construct metadata dictionary
42+
meta = dict(
43+
shape=shape,
44+
chunks=chunks,
45+
dtype=encode_dtype(dtype),
46+
cname=str(cname, 'ascii'),
47+
clevel=clevel,
48+
shuffle=shuffle,
49+
fill_value=fill_value,
50+
)
51+
52+
# write to file
53+
meta_path = os.path.join(path, _defaults.metapath)
54+
with open(meta_path, 'w') as f:
55+
json.dump(meta, f, indent=4, sort_keys=True)
56+
57+
58+
def encode_dtype(d):
59+
if d.fields is None:
60+
return d.str
61+
else:
62+
return str(d)
63+
64+
65+
def decode_dtype(s):
66+
try:
67+
return np.dtype(s)
68+
except ValueError:
69+
return np.dtype(ast.literal_eval(s))

0 commit comments

Comments
 (0)