Skip to content

Commit a7587b5

Browse files
committed
implement Array.view(); use PermissionError
1 parent 14cd08c commit a7587b5

File tree

9 files changed

+237
-43
lines changed

9 files changed

+237
-43
lines changed

docs/api/core.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@ The Array class (``zarr.core``)
88
.. automethod:: __setitem__
99
.. automethod:: resize
1010
.. automethod:: append
11+
.. automethod:: view

zarr/attrs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66

77
from zarr.compat import text_type
8-
from zarr.errors import ReadOnlyError
8+
from zarr.errors import PermissionError
99

1010

1111
class Attributes(MutableMapping):
@@ -31,7 +31,7 @@ def _write_op(self, f, *args, **kwargs):
3131

3232
# guard condition
3333
if self.read_only:
34-
raise ReadOnlyError('attributes are read-only')
34+
raise PermissionError('attributes are read-only')
3535

3636
# synchronization
3737
if self.synchronizer is None:

zarr/core.py

Lines changed: 194 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,11 @@
1010
from zarr.compressors import get_compressor_cls
1111
from zarr.util import is_total_slice, normalize_array_selection, \
1212
get_chunk_range, human_readable_size, normalize_resize_args, \
13-
normalize_storage_path
13+
normalize_storage_path, normalize_shape, normalize_chunks
1414
from zarr.storage import array_meta_key, attrs_key, listdir, getsize
1515
from zarr.meta import decode_array_metadata, encode_array_metadata
1616
from zarr.attrs import Attributes
17-
from zarr.errors import ReadOnlyError
17+
from zarr.errors import PermissionError
1818
from zarr.compat import reduce
1919
from zarr.filters import get_filters
2020

@@ -59,13 +59,15 @@ class Array(object):
5959
nbytes_stored
6060
initialized
6161
cdata_shape
62+
is_view
6263
6364
Methods
6465
-------
6566
__getitem__
6667
__setitem__
6768
resize
6869
append
70+
view
6971
7072
""" # flake8: noqa
7173

@@ -106,14 +108,16 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None,
106108
compressor_cls = get_compressor_cls(self._compression)
107109
self._compressor = compressor_cls(self._compression_opts)
108110
self._filters = get_filters(meta['filters'])
109-
# TODO validate filter dtypes
111+
self._is_view = False
110112

111113
# initialize attributes
112114
akey = self._key_prefix + attrs_key
113115
self._attrs = Attributes(store, key=akey, read_only=read_only,
114116
synchronizer=synchronizer)
115117

116118
def _flush_metadata(self):
119+
if self._is_view:
120+
raise PermissionError('operation not permitted for views')
117121
meta = dict(shape=self._shape, chunks=self._chunks, dtype=self._dtype,
118122
compression=self._compression,
119123
compression_opts=self._compression_opts,
@@ -260,12 +264,18 @@ def cdata_shape(self):
260264
int(np.ceil(s / c)) for s, c in zip(self._shape, self._chunks)
261265
)
262266

267+
@property
268+
def is_view(self):
269+
"""A boolean, True if this array is a view on another array."""
270+
return self._is_view
271+
263272
def __eq__(self, other):
264273
return (
265274
isinstance(other, Array) and
266275
self.store == other.store and
267276
self.read_only == other.read_only and
268-
self.path == other.path
277+
self.path == other.path and
278+
not self._is_view
269279
# N.B., no need to compare other properties, should be covered by
270280
# store comparison
271281
)
@@ -469,7 +479,7 @@ def __setitem__(self, key, value):
469479

470480
# guard conditions
471481
if self._read_only:
472-
raise ReadOnlyError('array is read-only')
482+
raise PermissionError('array is read-only')
473483

474484
# normalize selection
475485
selection = normalize_array_selection(key, self._shape)
@@ -735,7 +745,7 @@ def _write_op(self, f, *args, **kwargs):
735745

736746
# guard condition
737747
if self._read_only:
738-
raise ReadOnlyError('array is read-only')
748+
raise PermissionError('array is read-only')
739749

740750
# synchronization
741751
if self._synchronizer is None:
@@ -789,6 +799,10 @@ def _resize_nosync(self, *args):
789799
old_shape = self._shape
790800
new_shape = normalize_resize_args(old_shape, *args)
791801

802+
# update metadata
803+
self._shape = new_shape
804+
self._flush_metadata()
805+
792806
# determine the new number and arrangement of chunks
793807
chunks = self._chunks
794808
new_cdata_shape = tuple(int(np.ceil(s / c))
@@ -803,10 +817,6 @@ def _resize_nosync(self, *args):
803817
else:
804818
del self._chunk_store[self._key_prefix + key]
805819

806-
# update metadata
807-
self._shape = new_shape
808-
self._flush_metadata()
809-
810820
def append(self, data, axis=0):
811821
"""Append `data` to `axis`.
812822
@@ -882,3 +892,177 @@ def _append_nosync(self, data, axis=0):
882892
for i in range(len(self._shape))
883893
)
884894
self[append_selection] = data
895+
896+
def view(self, shape=None, chunks=None, dtype=None,
897+
fill_value=None, filters=None, read_only=None,
898+
synchronizer=None):
899+
"""Return an array sharing the same data.
900+
901+
Parameters
902+
----------
903+
shape : int or tuple of ints
904+
Array shape.
905+
chunks : int or tuple of ints, optional
906+
Chunk shape.
907+
dtype : string or dtype, optional
908+
NumPy dtype.
909+
fill_value : object
910+
Default value to use for uninitialized portions of the array.
911+
filters : sequence, optional
912+
Sequence of filters to use to encode chunk data prior to
913+
compression.
914+
read_only : bool, optional
915+
True if array should be protected against modification.
916+
synchronizer : object, optional
917+
Array synchronizer.
918+
919+
Notes
920+
-----
921+
WARNING: This is an experimental feature and should be used with care.
922+
There are plenty of ways to generate errors and/or cause data
923+
corruption.
924+
925+
Examples
926+
--------
927+
928+
Bypass filters:
929+
930+
>>> import zarr
931+
>>> import numpy as np
932+
>>> np.random.seed(42)
933+
>>> labels = [b'female', b'male']
934+
>>> data = np.random.choice(labels, size=10000)
935+
>>> filters = [zarr.CategoryFilter(labels=labels,
936+
... dtype=data.dtype,
937+
... astype='u1')]
938+
>>> a = zarr.array(data, chunks=1000, compression=None,
939+
... filters=filters)
940+
>>> a
941+
zarr.core.Array((10000,), |S6, chunks=(1000,), order=C)
942+
compression: none; compression_opts: None
943+
nbytes: 58.6K; nbytes_stored: 10.2K; ratio: 5.7; initialized: 10/10
944+
filters: category
945+
store: builtins.dict
946+
>>> a[:]
947+
array([b'female', b'male', b'female', ..., b'male', b'male', b'female'],
948+
dtype='|S6')
949+
>>> v = a.view(dtype='u1', filters=[])
950+
>>> v
951+
zarr.core.Array((10000,), uint8, chunks=(1000,), order=C)
952+
compression: none; compression_opts: None
953+
nbytes: 9.8K; nbytes_stored: 10.2K; ratio: 1.0; initialized: 10/10
954+
store: builtins.dict
955+
>>> v.is_view
956+
True
957+
>>> v[:]
958+
array([1, 2, 1, ..., 2, 2, 1], dtype=uint8)
959+
960+
Views can be used to modify data:
961+
962+
>>> x = v[:]
963+
>>> x.sort()
964+
>>> v[:] = x
965+
>>> v[:]
966+
array([1, 1, 1, ..., 2, 2, 2], dtype=uint8)
967+
>>> a[:]
968+
array([b'female', b'female', b'female', ..., b'male', b'male', b'male'],
969+
dtype='|S6')
970+
971+
View as a different dtype with the same itemsize:
972+
973+
>>> data = np.random.randint(0, 2, size=10000, dtype='u1')
974+
>>> a = zarr.array(data, chunks=1000, compression='zlib')
975+
>>> a
976+
zarr.core.Array((10000,), uint8, chunks=(1000,), order=C)
977+
compression: zlib; compression_opts: 1
978+
nbytes: 9.8K; nbytes_stored: 2.7K; ratio: 3.6; initialized: 10/10
979+
store: builtins.dict
980+
>>> a[:]
981+
array([0, 0, 1, ..., 1, 0, 0], dtype=uint8)
982+
>>> v = a.view(dtype=bool)
983+
>>> v
984+
zarr.core.Array((10000,), bool, chunks=(1000,), order=C)
985+
compression: zlib; compression_opts: 1
986+
nbytes: 9.8K; nbytes_stored: 2.7K; ratio: 3.6; initialized: 10/10
987+
store: builtins.dict
988+
>>> v[:]
989+
array([False, False, True, ..., True, False, False], dtype=bool)
990+
>>> np.all(a[:].view(dtype=bool) == v[:])
991+
True
992+
993+
An array can be viewed with a dtype with a different itemsize, however
994+
some care is needed to adjust the shape and chunk shape so that chunk
995+
data is interpreted correctly:
996+
997+
>>> data = np.arange(10000, dtype='u2')
998+
>>> a = zarr.array(data, chunks=1000, compression=None)
999+
>>> a
1000+
zarr.core.Array((10000,), uint16, chunks=(1000,), order=C)
1001+
compression: none; compression_opts: None
1002+
nbytes: 19.5K; nbytes_stored: 19.8K; ratio: 1.0; initialized: 10/10
1003+
store: builtins.dict
1004+
>>> a[:10]
1005+
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint16)
1006+
>>> v = a.view(dtype='u1', shape=20000, chunks=2000)
1007+
>>> v
1008+
zarr.core.Array((20000,), uint8, chunks=(2000,), order=C)
1009+
compression: none; compression_opts: None
1010+
nbytes: 19.5K; nbytes_stored: 19.8K; ratio: 1.0; initialized: 10/10
1011+
store: builtins.dict
1012+
>>> v[:10]
1013+
array([0, 0, 1, 0, 2, 0, 3, 0, 4, 0], dtype=uint8)
1014+
>>> np.all(a[:].view('u1') == v[:])
1015+
True
1016+
1017+
Change fill value for uninitialized chunks:
1018+
1019+
>>> a = zarr.full(10000, chunks=1000, fill_value=-1, dtype='i1')
1020+
>>> a[:]
1021+
array([-1, -1, -1, ..., -1, -1, -1], dtype=int8)
1022+
>>> v = a.view(fill_value=42)
1023+
>>> v[:]
1024+
array([42, 42, 42, ..., 42, 42, 42], dtype=int8)
1025+
1026+
Note that resizing or appending to views is not permitted:
1027+
1028+
>>> a = zarr.empty(10000)
1029+
>>> v = a.view()
1030+
>>> try:
1031+
... v.resize(20000)
1032+
... except PermissionError as e:
1033+
... print(e)
1034+
operation not permitted for views
1035+
1036+
""" # flake8: noqa
1037+
1038+
store = self._store
1039+
chunk_store = self._chunk_store
1040+
path = self._path
1041+
if read_only is None:
1042+
read_only = self._read_only
1043+
if synchronizer is None:
1044+
synchronizer = self._synchronizer
1045+
a = Array(store=store, path=path, chunk_store=chunk_store,
1046+
read_only=read_only, synchronizer=synchronizer)
1047+
a._is_view = True
1048+
1049+
# allow override of some properties
1050+
if dtype is None:
1051+
dtype = self._dtype
1052+
else:
1053+
dtype = np.dtype(dtype)
1054+
a._dtype = dtype
1055+
if shape is None:
1056+
shape = self._shape
1057+
else:
1058+
shape = normalize_shape(shape)
1059+
a._shape = shape
1060+
if chunks is not None:
1061+
chunks = normalize_chunks(chunks, shape, dtype.itemsize)
1062+
a._chunks = chunks
1063+
if fill_value is not None:
1064+
a._fill_value = fill_value
1065+
if filters is not None:
1066+
a._filters = filters
1067+
1068+
return a

zarr/errors.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,17 @@
22
from __future__ import absolute_import, print_function, division
33

44

5-
class ReadOnlyError(Exception):
6-
pass
5+
from zarr.compat import PY2
6+
7+
8+
if PY2: # pragma: no cover
9+
10+
class PermissionError(Exception):
11+
pass
12+
13+
else:
14+
15+
PermissionError = PermissionError
716

817

918
class MetadataError(Exception):

zarr/hierarchy.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from zarr.creation import array, create, empty, zeros, ones, full, \
1515
empty_like, zeros_like, ones_like, full_like
1616
from zarr.util import normalize_storage_path, normalize_shape
17-
from zarr.errors import ReadOnlyError
17+
from zarr.errors import PermissionError
1818
from zarr.meta import decode_group_metadata
1919

2020

@@ -395,7 +395,7 @@ def _write_op(self, f, *args, **kwargs):
395395

396396
# guard condition
397397
if self._read_only:
398-
raise ReadOnlyError('group is read-only')
398+
raise PermissionError('group is read-only')
399399

400400
# synchronization
401401
if self._synchronizer is None:

zarr/tests/test_attrs.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from zarr.attrs import Attributes
1111
from zarr.compat import binary_type, text_type
12-
from zarr.errors import ReadOnlyError
12+
from zarr.errors import PermissionError
1313

1414

1515
class TestAttributes(unittest.TestCase):
@@ -80,9 +80,9 @@ def test_read_only(self):
8080
store['attrs'] = json.dumps(dict(foo='bar', baz=42)).encode('ascii')
8181
eq(a['foo'], 'bar')
8282
eq(a['baz'], 42)
83-
with assert_raises(ReadOnlyError):
83+
with assert_raises(PermissionError):
8484
a['foo'] = 'quux'
85-
with assert_raises(ReadOnlyError):
85+
with assert_raises(PermissionError):
8686
del a['foo']
87-
with assert_raises(ReadOnlyError):
87+
with assert_raises(PermissionError):
8888
a.update(foo='quux')

zarr/tests/test_core.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
from zarr.storage import DirectoryStore, ZipStore, init_array, init_group
1818
from zarr.core import Array
19-
from zarr.errors import ReadOnlyError
19+
from zarr.errors import PermissionError
2020
from zarr.compat import PY2
2121
from zarr.util import buffer_size
2222
from zarr.filters import DeltaFilter, FixedScaleOffsetFilter
@@ -546,11 +546,11 @@ def test_read_only(self):
546546

547547
z = self.create_array(shape=1000, chunks=100, read_only=True)
548548
assert_true(z.read_only)
549-
with assert_raises(ReadOnlyError):
549+
with assert_raises(PermissionError):
550550
z[:] = 42
551-
with assert_raises(ReadOnlyError):
551+
with assert_raises(PermissionError):
552552
z.resize(2000)
553-
with assert_raises(ReadOnlyError):
553+
with assert_raises(PermissionError):
554554
z.append(np.arange(1000))
555555

556556
def test_pickle(self):

0 commit comments

Comments
 (0)