Skip to content

Commit 7261bf4

Browse files
committed
all tests passing again
1 parent 6d0bb01 commit 7261bf4

12 files changed

+332
-230
lines changed

README.rst

Lines changed: 28 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
zarr
22
====
33

4-
A minimal implementation of chunked, compressed, N-dimensional arrays for
4+
A minimal implementation of chunked, compressed, N-dimensional arrays for
55
Python.
66

77
* Source code: https://github.com/alimanfoo/zarr
@@ -44,15 +44,17 @@ Create an array::
4444
>>> import zarr
4545
>>> z = zarr.empty((10000, 1000), dtype='i4', chunks=(1000, 100))
4646
>>> z
47-
zarr.ext.Array((10000, 1000), int32, chunks=(1000, 100), cname='blosclz', clevel=5, shuffle=1)
48-
nbytes: 38.1M; cbytes: 0
47+
zarr.ext.SynchronizedArray((10000, 1000), int32, chunks=(1000, 100))
48+
cname: 'blosclz'; clevel: 5; shuffle: 1 (BYTESHUFFLE)
49+
nbytes: 38.1M; cbytes: 0; initialized: 0/100
4950

5051
Fill it with some data::
5152

5253
>>> z[:] = np.arange(10000000, dtype='i4').reshape(10000, 1000)
5354
>>> z
54-
zarr.ext.Array((10000, 1000), int32, chunks=(1000, 100), cname='blosclz', clevel=5, shuffle=1)
55-
nbytes: 38.1M; cbytes: 2.0M; ratio: 19.3
55+
zarr.ext.SynchronizedArray((10000, 1000), int32, chunks=(1000, 100))
56+
cname: 'blosclz'; clevel: 5; shuffle: 1 (BYTESHUFFLE)
57+
nbytes: 38.1M; cbytes: 2.0M; ratio: 19.3; initialized: 100/100
5658

5759
Obtain a NumPy array by slicing::
5860

@@ -85,47 +87,52 @@ Resize the array and add more data::
8587

8688
>>> z.resize(20000, 1000)
8789
>>> z
88-
zarr.ext.Array((20000, 1000), int32, chunks=(1000, 100), cname='blosclz', clevel=5, shuffle=1)
89-
nbytes: 76.3M; cbytes: 2.0M; ratio: 38.5
90+
zarr.ext.SynchronizedArray((20000, 1000), int32, chunks=(1000, 100))
91+
cname: 'blosclz'; clevel: 5; shuffle: 1 (BYTESHUFFLE)
92+
nbytes: 76.3M; cbytes: 2.0M; ratio: 38.5; initialized: 100/200
9093
>>> z[10000:, :] = np.arange(10000000, dtype='i4').reshape(10000, 1000)
9194
>>> z
92-
zarr.ext.Array((20000, 1000), int32, chunks=(1000, 100), cname='blosclz', clevel=5, shuffle=1)
93-
nbytes: 76.3M; cbytes: 4.0M; ratio: 19.3
95+
zarr.ext.SynchronizedArray((20000, 1000), int32, chunks=(1000, 100))
96+
cname: 'blosclz'; clevel: 5; shuffle: 1 (BYTESHUFFLE)
97+
nbytes: 76.3M; cbytes: 4.0M; ratio: 19.3; initialized: 200/200
9498

9599
For convenience, an ``append()`` method is also available, which can be used to
96100
append data to any axis::
97101

98102
>>> a = np.arange(10000000, dtype='i4').reshape(10000, 1000)
99103
>>> z = zarr.array(a, chunks=(1000, 100))
100104
>>> z
101-
zarr.ext.Array((10000, 1000), int32, chunks=(1000, 100), cname='blosclz', clevel=5, shuffle=1)
102-
nbytes: 38.1M; cbytes: 2.0M; ratio: 19.3
105+
zarr.ext.SynchronizedArray((10000, 1000), int32, chunks=(1000, 100))
106+
cname: 'blosclz'; clevel: 5; shuffle: 1 (BYTESHUFFLE)
107+
nbytes: 38.1M; cbytes: 2.0M; ratio: 19.3; initialized: 100/100
103108
>>> z.append(a+a)
104109
>>> z
105-
zarr.ext.Array((20000, 1000), int32, chunks=(1000, 100), cname='blosclz', clevel=5, shuffle=1)
106-
nbytes: 76.3M; cbytes: 3.6M; ratio: 21.2
110+
zarr.ext.SynchronizedArray((20000, 1000), int32, chunks=(1000, 100))
111+
cname: 'blosclz'; clevel: 5; shuffle: 1 (BYTESHUFFLE)
112+
nbytes: 76.3M; cbytes: 3.6M; ratio: 21.2; initialized: 200/200
107113
>>> z.append(np.vstack([a, a]), axis=1)
108114
>>> z
109-
zarr.ext.Array((20000, 2000), int32, chunks=(1000, 100), cname='blosclz', clevel=5, shuffle=1)
110-
nbytes: 152.6M; cbytes: 7.6M; ratio: 20.2
115+
zarr.ext.SynchronizedArray((20000, 2000), int32, chunks=(1000, 100))
116+
cname: 'blosclz'; clevel: 5; shuffle: 1 (BYTESHUFFLE)
117+
nbytes: 152.6M; cbytes: 7.6M; ratio: 20.2; initialized: 400/400
111118

112119
Tuning
113120
------
114121

115-
``zarr`` is designed for use in parallel computations working chunk-wise
122+
``zarr`` is designed for use in parallel computations working chunk-wise
116123
over data. Try it with `dask.array
117124
<http://dask.pydata.org/en/latest/array.html>`_.
118125

119-
``zarr`` is optimised for accessing and storing data in contiguous slices,
120-
of the same size or larger than chunks. It is not and will never be
121-
optimised for single item access.
126+
``zarr`` is optimised for accessing and storing data in contiguous slices,
127+
of the same size or larger than chunks. It is not and will never be
128+
optimised for single item access.
122129

123-
Chunks sizes >= 1M are generally good. Optimal chunk shape will depend on
130+
Chunks sizes >= 1M are generally good. Optimal chunk shape will depend on
124131
the correlation structure in your data.
125132

126133
Acknowledgments
127134
---------------
128135

129136
``zarr`` uses `c-blosc <https://github.com/Blosc/c-blosc>`_ internally for
130-
compression and decompression and borrows code heavily from
137+
compression and decompression and borrows code heavily from
131138
`bcolz <http://bcolz.blosc.org/>`_.

zarr/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,7 @@
22
from __future__ import absolute_import, print_function, division
33

44

5-
from zarr.ext import Chunk, Array, SynchronizedChunk, blosc_version, \
6-
PersistentChunk, PersistentArray
75
from zarr.core import empty, zeros, ones, full, array
86
from zarr import defaults
7+
from zarr import constants
98
from zarr.version import version as __version__

zarr/constants.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
__author__ = 'aliman'
2+
3+
4+
NOSHUFFLE = 0
5+
BYTESHUFFLE = 1
6+
BITSHUFFLE = 2

zarr/core.py

Lines changed: 32 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -30,17 +30,20 @@ def empty(shape, chunks, dtype=None, cname=None, clevel=None, shuffle=None,
3030
bit shuffle.
3131
synchronized : bool, optional
3232
If True, each chunk will be protected with a lock to prevent data
33-
collision during write operations.
33+
collision during concurrent write operations.
3434
3535
Returns
3636
-------
3737
z : zarr.ext.Array
3838
3939
"""
4040

41-
return _ext.Array(shape, chunks=chunks, dtype=dtype, cname=cname,
42-
clevel=clevel, shuffle=shuffle,
43-
synchronized=synchronized)
41+
if synchronized:
42+
cls = _ext.SynchronizedArray
43+
else:
44+
cls = _ext.Array
45+
return cls(shape=shape, chunks=chunks, dtype=dtype, cname=cname,
46+
clevel=clevel, shuffle=shuffle)
4447

4548

4649
def zeros(shape, chunks, dtype=None, cname=None, clevel=None, shuffle=None,
@@ -66,17 +69,20 @@ def zeros(shape, chunks, dtype=None, cname=None, clevel=None, shuffle=None,
6669
bit shuffle.
6770
synchronized : bool, optional
6871
If True, each chunk will be protected with a lock to prevent data
69-
collision during write operations.
72+
collision during concurrent write operations.
7073
7174
Returns
7275
-------
7376
z : zarr.ext.Array
7477
7578
"""
7679

77-
return _ext.Array(shape, chunks=chunks, dtype=dtype, cname=cname,
78-
clevel=clevel, shuffle=shuffle, fill_value=0,
79-
synchronized=synchronized)
80+
if synchronized:
81+
cls = _ext.SynchronizedArray
82+
else:
83+
cls = _ext.Array
84+
return cls(shape=shape, chunks=chunks, dtype=dtype, cname=cname,
85+
clevel=clevel, shuffle=shuffle, fill_value=0)
8086

8187

8288
def ones(shape, chunks, dtype=None, cname=None, clevel=None, shuffle=None,
@@ -110,10 +116,12 @@ def ones(shape, chunks, dtype=None, cname=None, clevel=None, shuffle=None,
110116
111117
"""
112118

113-
114-
return _ext.Array(shape, chunks=chunks, dtype=dtype, cname=cname,
115-
clevel=clevel, shuffle=shuffle, fill_value=1,
116-
synchronized=synchronized)
119+
if synchronized:
120+
cls = _ext.SynchronizedArray
121+
else:
122+
cls = _ext.Array
123+
return cls(shape=shape, chunks=chunks, dtype=dtype, cname=cname,
124+
clevel=clevel, shuffle=shuffle, fill_value=1)
117125

118126

119127
def full(shape, chunks, fill_value, dtype=None, cname=None, clevel=None,
@@ -149,9 +157,12 @@ def full(shape, chunks, fill_value, dtype=None, cname=None, clevel=None,
149157
150158
"""
151159

152-
return _ext.Array(shape, chunks=chunks, dtype=dtype, cname=cname,
153-
clevel=clevel, shuffle=shuffle, fill_value=fill_value,
154-
synchronized=synchronized)
160+
if synchronized:
161+
cls = _ext.SynchronizedArray
162+
else:
163+
cls = _ext.Array
164+
return cls(shape=shape, chunks=chunks, dtype=dtype, cname=cname,
165+
clevel=clevel, shuffle=shuffle, fill_value=fill_value)
155166

156167

157168
def array(data, chunks=None, dtype=None, cname=None, clevel=None,
@@ -209,9 +220,12 @@ def array(data, chunks=None, dtype=None, cname=None, clevel=None,
209220
raise ValueError('chunks must be specified')
210221

211222
# create array
212-
z = _ext.Array(shape, chunks=chunks, dtype=dtype, cname=cname,
213-
clevel=clevel, shuffle=shuffle,
214-
synchronized=synchronized, fill_value=fill_value)
223+
if synchronized:
224+
cls = _ext.SynchronizedArray
225+
else:
226+
cls = _ext.Array
227+
z = cls(shape=shape, chunks=chunks, dtype=dtype, cname=cname,
228+
clevel=clevel, shuffle=shuffle, fill_value=fill_value)
215229

216230
# fill with data
217231
z[:] = data

zarr/defaults.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
# -*- coding: utf-8 -*-
22
from __future__ import absolute_import, print_function, division
33

4-
4+
# compression detaults
55
cname = b'blosclz'
66
clevel = 5
7-
shuffle = 1
7+
shuffle = 1 # byte shuffle
8+
9+
# for persistence
10+
metapath = '__zmeta__'
11+
datapath = '__zdata__'
12+
datasuffix = '.blosc'

zarr/ext.pxd

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ cdef class PersistentChunk(BaseChunk):
3131
cdef object _path
3232
cdef object _basename
3333
cdef object _dirname
34-
cdef tuple read_header(self)
34+
cdef object read_header(self)
3535
cdef bytes read(self)
3636
cdef void write(self, bytes data)
3737

@@ -43,6 +43,7 @@ cdef class SynchronizedPersistentChunk(PersistentChunk):
4343

4444
cdef class BaseArray:
4545
cdef tuple _shape
46+
cdef tuple _cdata_shape
4647
cdef tuple _chunks
4748
cdef dtype _dtype
4849
cdef size_t _size
@@ -54,7 +55,7 @@ cdef class BaseArray:
5455
cdef object _fill_value
5556
# abstract methods
5657
cdef BaseChunk create_chunk(self, tuple cidx)
57-
cdef BaseChunk get_chunk(self, tuple cidx)
58+
cpdef BaseChunk get_chunk(self, tuple cidx)
5859

5960

6061
cdef class Array(BaseArray):
@@ -66,6 +67,7 @@ cdef class SynchronizedArray(Array):
6667

6768

6869
cdef class PersistentArray(BaseArray):
70+
cdef ndarray _cdata
6971
cdef object _mode
7072
cdef object _path
7173

0 commit comments

Comments
 (0)