Skip to content

Commit a820700

Browse files
committed
lazy arrays
1 parent 2f18f18 commit a820700

File tree

4 files changed

+205
-36
lines changed

4 files changed

+205
-36
lines changed

zarr/core.py

Lines changed: 73 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@
55
import numpy as np
66

77

8-
import zarr.ext as _ext
8+
from zarr import ext as _ext
99

1010

1111
def empty(shape, chunks, dtype=None, cname=None, clevel=None, shuffle=None,
12-
synchronized=True):
12+
synchronized=True, lazy=False):
1313
"""Create an empty array.
1414
1515
Parameters
@@ -31,6 +31,10 @@ def empty(shape, chunks, dtype=None, cname=None, clevel=None, shuffle=None,
3131
synchronized : bool, optional
3232
If True, each chunk will be protected with a lock to prevent data
3333
collision during concurrent write operations.
34+
lazy : bool, optional
35+
If True, an alternative array class is used which instantiates chunk
36+
objects only on demand. This may reduce overhead when working with
37+
small regions of very large arrays with a large number of chunks.
3438
3539
Returns
3640
-------
@@ -39,15 +43,21 @@ def empty(shape, chunks, dtype=None, cname=None, clevel=None, shuffle=None,
3943
"""
4044

4145
if synchronized:
42-
cls = _ext.SynchronizedArray
46+
if lazy:
47+
cls = _ext.SynchronizedLazyArray
48+
else:
49+
cls = _ext.SynchronizedArray
4350
else:
44-
cls = _ext.Array
51+
if lazy:
52+
cls = _ext.LazyArray
53+
else:
54+
cls = _ext.Array
4555
return cls(shape=shape, chunks=chunks, dtype=dtype, cname=cname,
4656
clevel=clevel, shuffle=shuffle)
4757

4858

4959
def zeros(shape, chunks, dtype=None, cname=None, clevel=None, shuffle=None,
50-
synchronized=True):
60+
synchronized=True, lazy=False):
5161
"""Create an array, with zero being used as the default value for
5262
uninitialised portions of the array.
5363
@@ -70,6 +80,10 @@ def zeros(shape, chunks, dtype=None, cname=None, clevel=None, shuffle=None,
7080
synchronized : bool, optional
7181
If True, each chunk will be protected with a lock to prevent data
7282
collision during concurrent write operations.
83+
lazy : bool, optional
84+
If True, an alternative array class is used which instantiates chunk
85+
objects only on demand. This may reduce overhead when working with
86+
small regions of very large arrays with a large number of chunks.
7387
7488
Returns
7589
-------
@@ -78,15 +92,21 @@ def zeros(shape, chunks, dtype=None, cname=None, clevel=None, shuffle=None,
7892
"""
7993

8094
if synchronized:
81-
cls = _ext.SynchronizedArray
95+
if lazy:
96+
cls = _ext.SynchronizedLazyArray
97+
else:
98+
cls = _ext.SynchronizedArray
8299
else:
83-
cls = _ext.Array
100+
if lazy:
101+
cls = _ext.LazyArray
102+
else:
103+
cls = _ext.Array
84104
return cls(shape=shape, chunks=chunks, dtype=dtype, cname=cname,
85105
clevel=clevel, shuffle=shuffle, fill_value=0)
86106

87107

88108
def ones(shape, chunks, dtype=None, cname=None, clevel=None, shuffle=None,
89-
synchronized=True):
109+
synchronized=True, lazy=False):
90110
"""Create an array, with one being used as the default value for
91111
uninitialised portions of the array.
92112
@@ -109,6 +129,10 @@ def ones(shape, chunks, dtype=None, cname=None, clevel=None, shuffle=None,
109129
synchronized : bool, optional
110130
If True, each chunk will be protected with a lock to prevent data
111131
collision during write operations.
132+
lazy : bool, optional
133+
If True, an alternative array class is used which instantiates chunk
134+
objects only on demand. This may reduce overhead when working with
135+
small regions of very large arrays with a large number of chunks.
112136
113137
Returns
114138
-------
@@ -117,15 +141,21 @@ def ones(shape, chunks, dtype=None, cname=None, clevel=None, shuffle=None,
117141
"""
118142

119143
if synchronized:
120-
cls = _ext.SynchronizedArray
144+
if lazy:
145+
cls = _ext.SynchronizedLazyArray
146+
else:
147+
cls = _ext.SynchronizedArray
121148
else:
122-
cls = _ext.Array
149+
if lazy:
150+
cls = _ext.LazyArray
151+
else:
152+
cls = _ext.Array
123153
return cls(shape=shape, chunks=chunks, dtype=dtype, cname=cname,
124154
clevel=clevel, shuffle=shuffle, fill_value=1)
125155

126156

127157
def full(shape, chunks, fill_value, dtype=None, cname=None, clevel=None,
128-
shuffle=None, synchronized=True):
158+
shuffle=None, synchronized=True, lazy=False):
129159
"""Create an array, with `fill_value` being used as the default value for
130160
uninitialised portions of the array.
131161
@@ -150,6 +180,10 @@ def full(shape, chunks, fill_value, dtype=None, cname=None, clevel=None,
150180
synchronized : bool, optional
151181
If True, each chunk will be protected with a lock to prevent data
152182
collision during write operations.
183+
lazy : bool, optional
184+
If True, an alternative array class is used which instantiates chunk
185+
objects only on demand. This may reduce overhead when working with
186+
small regions of very large arrays with a large number of chunks.
153187
154188
Returns
155189
-------
@@ -158,15 +192,21 @@ def full(shape, chunks, fill_value, dtype=None, cname=None, clevel=None,
158192
"""
159193

160194
if synchronized:
161-
cls = _ext.SynchronizedArray
195+
if lazy:
196+
cls = _ext.SynchronizedLazyArray
197+
else:
198+
cls = _ext.SynchronizedArray
162199
else:
163-
cls = _ext.Array
200+
if lazy:
201+
cls = _ext.LazyArray
202+
else:
203+
cls = _ext.Array
164204
return cls(shape=shape, chunks=chunks, dtype=dtype, cname=cname,
165205
clevel=clevel, shuffle=shuffle, fill_value=fill_value)
166206

167207

168208
def array(data, chunks=None, dtype=None, cname=None, clevel=None,
169-
shuffle=None, fill_value=None, synchronized=True):
209+
shuffle=None, fill_value=None, synchronized=True, lazy=False):
170210
"""Create an array filled with `data`.
171211
172212
Parameters
@@ -190,13 +230,29 @@ def array(data, chunks=None, dtype=None, cname=None, clevel=None,
190230
synchronized : bool, optional
191231
If True, each chunk will be protected with a lock to prevent data
192232
collision during write operations.
233+
lazy : bool, optional
234+
If True, an alternative array class is used which instantiates chunk
235+
objects only on demand. This may reduce overhead when working with
236+
small regions of very large arrays with a large number of chunks.
193237
194238
Returns
195239
-------
196240
z : zarr Array
197241
198242
"""
199243

244+
# determine array class to use
245+
if synchronized:
246+
if lazy:
247+
cls = _ext.SynchronizedLazyArray
248+
else:
249+
cls = _ext.SynchronizedArray
250+
else:
251+
if lazy:
252+
cls = _ext.LazyArray
253+
else:
254+
cls = _ext.Array
255+
200256
# ensure data is array-like
201257
if not hasattr(data, 'shape') or not hasattr(data, 'dtype'):
202258
data = np.asanyarray(data)
@@ -219,11 +275,7 @@ def array(data, chunks=None, dtype=None, cname=None, clevel=None,
219275
else:
220276
raise ValueError('chunks must be specified')
221277

222-
# create array
223-
if synchronized:
224-
cls = _ext.SynchronizedArray
225-
else:
226-
cls = _ext.Array
278+
# instantiate array
227279
z = cls(shape=shape, chunks=chunks, dtype=dtype, cname=cname,
228280
clevel=clevel, shuffle=shuffle, fill_value=fill_value)
229281

@@ -273,6 +325,8 @@ def open(path, mode='a', shape=None, chunks=None, dtype=None, cname=None,
273325
274326
"""
275327

328+
# TODO lazy option
329+
276330
if synchronized:
277331
cls = _ext.SynchronizedPersistentArray
278332
else:

zarr/ext.pxd

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ cdef class BaseArray:
5555
cdef object _fill_value
5656
# abstract methods
5757
cdef BaseChunk create_chunk(self, tuple cidx)
58-
cpdef BaseChunk get_chunk(self, tuple cidx)
58+
cdef BaseChunk get_chunk(self, tuple cidx)
5959

6060

6161
cdef class Array(BaseArray):
@@ -77,16 +77,14 @@ cdef class SynchronizedPersistentArray(PersistentArray):
7777

7878

7979
cdef class LazyArray(BaseArray):
80-
# TODO
81-
pass
80+
cdef dict _cdata
8281

8382

84-
cdef class LazyPersistentArray(BaseArray):
85-
# TODO
83+
cdef class SynchronizedLazyArray(LazyArray):
8684
pass
8785

8886

89-
cdef class SynchronizedLazyArray(BaseArray):
87+
cdef class LazyPersistentArray(BaseArray):
9088
# TODO
9189
pass
9290

0 commit comments

Comments
 (0)