Skip to content

Commit 08048d3

Browse files
committed
doco
1 parent 18dba11 commit 08048d3

File tree

4 files changed

+187
-18
lines changed

4 files changed

+187
-18
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ over data. Try it with [dask.array](http://dask.pydata.org/en/latest/array.html)
8282
of the same size or larger than chunks. It is not and will never be
8383
optimised for single item access.
8484

85-
Chunks sizes > 1M are generally good. Optimal chunk shape will depend on
85+
Chunks sizes >= 1M are generally good. Optimal chunk shape will depend on
8686
the correlation structure in your data.
8787

8888
## Acknowledgments

setup.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,16 @@
3434
),
3535
])
3636

37+
description = 'A minimal implementation of chunked, compressed, ' \
38+
'N-dimensional arrays for Python.'
39+
40+
with open('README.md') as f:
41+
long_description = f.read()
3742

3843
setup(
3944
name='zarr',
40-
description='A minimal implementation of chunked, compressed, N-dimensional arrays',
41-
long_description='TODO',
45+
description=description,
46+
long_description=long_description,
4247
use_scm_version={
4348
'version_scheme': 'guess-next-dev',
4449
'local_scheme': 'dirty-tag',

zarr/core.py

Lines changed: 139 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,33 @@
1010

1111
def empty(shape, chunks, dtype=None, cname=None, clevel=None, shuffle=None,
1212
synchronized=True):
13-
"""TODO"""
13+
"""Create an empty array.
14+
15+
Parameters
16+
----------
17+
shape : int or tuple of ints
18+
Array shape.
19+
chunks : int or tuple of ints
20+
Chunk shape.
21+
dtype : string or dtype, optional
22+
NumPy dtype.
23+
cname : string, optional
24+
Name of compression library to use, e.g., 'blosclz', 'lz4', 'zlib',
25+
'snappy'.
26+
clevel : int, optional
27+
Compression level, 0 means no compression.
28+
shuffle : int, optional
29+
Shuffle filter, 0 means no shuffle, 1 means byte shuffle, 2 means
30+
bit shuffle.
31+
synchronized : bool, optional
32+
If True, each chunk will be protected with a lock to prevent data
33+
collision during write operations.
34+
35+
Returns
36+
-------
37+
z : zarr.ext.Array
38+
39+
"""
1440

1541
return _ext.Array(shape, chunks=chunks, dtype=dtype, cname=cname,
1642
clevel=clevel, shuffle=shuffle,
@@ -19,16 +45,69 @@ def empty(shape, chunks, dtype=None, cname=None, clevel=None, shuffle=None,
1945

2046
def zeros(shape, chunks, dtype=None, cname=None, clevel=None, shuffle=None,
2147
synchronized=True):
22-
"""TODO"""
48+
"""Create an array filled with zeros.
49+
50+
Parameters
51+
----------
52+
shape : int or tuple of ints
53+
Array shape.
54+
chunks : int or tuple of ints
55+
Chunk shape.
56+
dtype : string or dtype, optional
57+
NumPy dtype.
58+
cname : string, optional
59+
Name of compression library to use, e.g., 'blosclz', 'lz4', 'zlib',
60+
'snappy'.
61+
clevel : int, optional
62+
Compression level, 0 means no compression.
63+
shuffle : int, optional
64+
Shuffle filter, 0 means no shuffle, 1 means byte shuffle, 2 means
65+
bit shuffle.
66+
synchronized : bool, optional
67+
If True, each chunk will be protected with a lock to prevent data
68+
collision during write operations.
69+
70+
Returns
71+
-------
72+
z : zarr.ext.Array
73+
74+
"""
2375

2476
return _ext.Array(shape, chunks=chunks, dtype=dtype, cname=cname,
2577
clevel=clevel, shuffle=shuffle, fill_value=0,
2678
synchronized=synchronized)
2779

2880

2981
def ones(shape, chunks, dtype=None, cname=None, clevel=None, shuffle=None,
30-
synchronized=True):
31-
"""TODO"""
82+
synchronized=True):
83+
"""Create an array filled with ones.
84+
85+
Parameters
86+
----------
87+
shape : int or tuple of ints
88+
Array shape.
89+
chunks : int or tuple of ints
90+
Chunk shape.
91+
dtype : string or dtype, optional
92+
NumPy dtype.
93+
cname : string, optional
94+
Name of compression library to use, e.g., 'blosclz', 'lz4', 'zlib',
95+
'snappy'.
96+
clevel : int, optional
97+
Compression level, 0 means no compression.
98+
shuffle : int, optional
99+
Shuffle filter, 0 means no shuffle, 1 means byte shuffle, 2 means
100+
bit shuffle.
101+
synchronized : bool, optional
102+
If True, each chunk will be protected with a lock to prevent data
103+
collision during write operations.
104+
105+
Returns
106+
-------
107+
z : zarr.ext.Array
108+
109+
"""
110+
32111

33112
return _ext.Array(shape, chunks=chunks, dtype=dtype, cname=cname,
34113
clevel=clevel, shuffle=shuffle, fill_value=1,
@@ -37,7 +116,35 @@ def ones(shape, chunks, dtype=None, cname=None, clevel=None, shuffle=None,
37116

38117
def full(shape, chunks, fill_value, dtype=None, cname=None, clevel=None,
39118
shuffle=None, synchronized=True):
40-
"""TODO"""
119+
"""Create an array filled with `fill_value`.
120+
121+
Parameters
122+
----------
123+
shape : int or tuple of ints
124+
Array shape.
125+
chunks : int or tuple of ints
126+
Chunk shape.
127+
fill_value : object
128+
Default value to use for uninitialised portions of the array.
129+
dtype : string or dtype, optional
130+
NumPy dtype.
131+
cname : string, optional
132+
Name of compression library to use, e.g., 'blosclz', 'lz4', 'zlib',
133+
'snappy'.
134+
clevel : int, optional
135+
Compression level, 0 means no compression.
136+
shuffle : int, optional
137+
Shuffle filter, 0 means no shuffle, 1 means byte shuffle, 2 means
138+
bit shuffle.
139+
synchronized : bool, optional
140+
If True, each chunk will be protected with a lock to prevent data
141+
collision during write operations.
142+
143+
Returns
144+
-------
145+
z : zarr.ext.Array
146+
147+
"""
41148

42149
return _ext.Array(shape, chunks=chunks, dtype=dtype, cname=cname,
43150
clevel=clevel, shuffle=shuffle, fill_value=fill_value,
@@ -46,7 +153,33 @@ def full(shape, chunks, fill_value, dtype=None, cname=None, clevel=None,
46153

47154
def array(data, chunks=None, dtype=None, cname=None, clevel=None,
48155
shuffle=None, synchronized=True, fill_value=None):
49-
"""TODO"""
156+
"""Create an array filled with `data`.
157+
158+
Parameters
159+
----------
160+
data : array_like
161+
Data to store.
162+
chunks : int or tuple of ints
163+
Chunk shape.
164+
dtype : string or dtype, optional
165+
NumPy dtype.
166+
cname : string, optional
167+
Name of compression library to use, e.g., 'blosclz', 'lz4', 'zlib',
168+
'snappy'.
169+
clevel : int, optional
170+
Compression level, 0 means no compression.
171+
shuffle : int, optional
172+
Shuffle filter, 0 means no shuffle, 1 means byte shuffle, 2 means
173+
bit shuffle.
174+
synchronized : bool, optional
175+
If True, each chunk will be protected with a lock to prevent data
176+
collision during write operations.
177+
178+
Returns
179+
-------
180+
z : zarr.ext.Array
181+
182+
"""
50183

51184
# ensure data is array-like
52185
if not hasattr(data, 'shape') or not hasattr(data, 'dtype'):

zarr/ext.pyx

Lines changed: 40 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
# -*- coding: utf-8 -*-
2+
# cython: embedsignature=True
3+
# cython: profile=True
24
from __future__ import absolute_import, print_function, division
35
from threading import RLock
46
import itertools
@@ -48,6 +50,8 @@ from zarr import defaults
4850

4951

5052
def blosc_version():
53+
"""Return the version of c-blosc that zarr was compiled with."""
54+
5155
# all the 'decode' contorsions are for Python 3 returning actual strings
5256
ver_str = <char *> BLOSC_VERSION_STRING
5357
if hasattr(ver_str, "decode"):
@@ -59,6 +63,23 @@ def blosc_version():
5963

6064

6165
def get_cparams(cname=None, clevel=None, shuffle=None):
66+
"""Convenience function to normalise compression parameters.
67+
68+
If any values are None, they will be substituted with values from the
69+
`zarr.defaults` module.
70+
71+
Parameters
72+
----------
73+
cname : string, optional
74+
Name of compression library to use, e.g., 'blosclz', 'lz4', 'zlib',
75+
'snappy'.
76+
clevel : int, optional
77+
Compression level, 0 means no compression.
78+
shuffle : int, optional
79+
Shuffle filter, 0 means no shuffle, 1 means byte shuffle, 2 means
80+
bit shuffle.
81+
82+
"""
6283

6384
# determine compressor
6485
cname = cname if cname is not None else defaults.cname
@@ -84,6 +105,10 @@ def get_cparams(cname=None, clevel=None, shuffle=None):
84105

85106

86107
def is_total_slice(item, shape):
108+
"""Determine whether `item` specifies a complete slice of array with the
109+
given `shape`. Used to optimise __setitem__ operations on the Chunk
110+
class."""
111+
87112
if item == Ellipsis:
88113
return True
89114
if item == slice(None):
@@ -104,11 +129,7 @@ cdef class Chunk:
104129
shuffle=None, fill_value=None):
105130

106131
# set shape and dtype
107-
if isinstance(shape, int):
108-
shape = (shape,)
109-
else:
110-
shape = tuple(shape)
111-
self.shape = shape
132+
self.shape = normalise_shape(shape)
112133
self.dtype = np.dtype(dtype)
113134

114135
# set compression options
@@ -139,7 +160,6 @@ cdef class Chunk:
139160

140161
else:
141162
# ensure array is C contiguous
142-
# TODO adapt to either C or F layout
143163
array = np.ascontiguousarray(value, dtype=self.dtype)
144164
if array.shape != self.shape:
145165
raise ValueError('bad value shape')
@@ -161,7 +181,7 @@ cdef class Chunk:
161181
size_t nbytes, nbytes_check, cbytes, blocksize, itemsize
162182
char *dest
163183

164-
# ensure any existing data is cleared
184+
# ensure any existing data is cleared and memory freed
165185
self.clear()
166186

167187
# compute the total number of bytes in the array
@@ -204,7 +224,7 @@ cdef class Chunk:
204224
array = np.empty(self.shape, dtype=self.dtype)
205225

206226
if self.data == NULL:
207-
# data not initialised
227+
# data not initialised, use fill_value
208228
if self.fill_value is not None:
209229
array.fill(self.fill_value)
210230

@@ -263,6 +283,8 @@ class Synchronized(object):
263283

264284

265285
def normalise_array_selection(item, shape):
286+
"""Convenience function to normalise a selection within an array with
287+
the given `shape`."""
266288

267289
# normalise item
268290
if isinstance(item, int):
@@ -290,6 +312,9 @@ def normalise_array_selection(item, shape):
290312

291313

292314
def normalise_axis_selection(item, l):
315+
"""Convenience function to normalise a selection within a single axis
316+
of size `l`."""
317+
293318
if isinstance(item, int):
294319
if item < 0:
295320
# handle wraparound
@@ -300,7 +325,7 @@ def normalise_axis_selection(item, l):
300325

301326
elif isinstance(item, slice):
302327
if item.step is not None and item.step != 1:
303-
raise NotImplementedError('TODO')
328+
raise NotImplementedError('slice with step not supported')
304329
start = 0 if item.start is None else item.start
305330
stop = l if item.stop is None else item.stop
306331
if start < 0:
@@ -318,12 +343,15 @@ def normalise_axis_selection(item, l):
318343

319344

320345
def get_chunk_range(selection, chunks):
346+
"""Convenience function to get a range over all chunk indices,
347+
for iterating over chunks."""
321348
chunk_range = [range(start//l, int(np.ceil(stop/l)))
322349
for (start, stop), l in zip(selection, chunks)]
323350
return chunk_range
324351

325352

326353
def normalise_shape(shape):
354+
"""Convenience function to normalise the `shape` argument."""
327355
if isinstance(shape, int):
328356
shape = (shape,)
329357
else:
@@ -332,11 +360,14 @@ def normalise_shape(shape):
332360

333361

334362
def normalise_chunks(chunks, shape):
363+
"""Convenience function to normalise the `chunks` argument for an array
364+
with the given `shape`."""
335365
if isinstance(chunks, int):
336366
chunks = (chunks,)
337367
else:
338368
chunks = tuple(chunks)
339369
if len(chunks) < len(shape):
370+
# assume chunks across remaining dimensions
340371
chunks += shape[len(chunks):]
341372
if len(chunks) != len(shape):
342373
raise ValueError('chunks and shape not compatible: %r, %r' %

0 commit comments

Comments
 (0)