10
10
from zarr .compressors import get_compressor_cls
11
11
from zarr .util import is_total_slice , normalize_array_selection , \
12
12
get_chunk_range , human_readable_size , normalize_resize_args , \
13
- normalize_storage_path
13
+ normalize_storage_path , normalize_shape , normalize_chunks
14
14
from zarr .storage import array_meta_key , attrs_key , listdir , getsize
15
15
from zarr .meta import decode_array_metadata , encode_array_metadata
16
16
from zarr .attrs import Attributes
17
- from zarr .errors import ReadOnlyError
17
+ from zarr .errors import PermissionError
18
18
from zarr .compat import reduce
19
19
from zarr .filters import get_filters
20
20
@@ -59,13 +59,15 @@ class Array(object):
59
59
nbytes_stored
60
60
initialized
61
61
cdata_shape
62
+ is_view
62
63
63
64
Methods
64
65
-------
65
66
__getitem__
66
67
__setitem__
67
68
resize
68
69
append
70
+ view
69
71
70
72
""" # flake8: noqa
71
73
@@ -106,14 +108,16 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None,
106
108
compressor_cls = get_compressor_cls (self ._compression )
107
109
self ._compressor = compressor_cls (self ._compression_opts )
108
110
self ._filters = get_filters (meta ['filters' ])
109
- # TODO validate filter dtypes
111
+ self . _is_view = False
110
112
111
113
# initialize attributes
112
114
akey = self ._key_prefix + attrs_key
113
115
self ._attrs = Attributes (store , key = akey , read_only = read_only ,
114
116
synchronizer = synchronizer )
115
117
116
118
def _flush_metadata (self ):
119
+ if self ._is_view :
120
+ raise PermissionError ('operation not permitted for views' )
117
121
meta = dict (shape = self ._shape , chunks = self ._chunks , dtype = self ._dtype ,
118
122
compression = self ._compression ,
119
123
compression_opts = self ._compression_opts ,
@@ -260,12 +264,18 @@ def cdata_shape(self):
260
264
int (np .ceil (s / c )) for s , c in zip (self ._shape , self ._chunks )
261
265
)
262
266
267
+ @property
268
+ def is_view (self ):
269
+ """A boolean, True if this array is a view on another array."""
270
+ return self ._is_view
271
+
263
272
def __eq__ (self , other ):
264
273
return (
265
274
isinstance (other , Array ) and
266
275
self .store == other .store and
267
276
self .read_only == other .read_only and
268
- self .path == other .path
277
+ self .path == other .path and
278
+ not self ._is_view
269
279
# N.B., no need to compare other properties, should be covered by
270
280
# store comparison
271
281
)
@@ -469,7 +479,7 @@ def __setitem__(self, key, value):
469
479
470
480
# guard conditions
471
481
if self ._read_only :
472
- raise ReadOnlyError ('array is read-only' )
482
+ raise PermissionError ('array is read-only' )
473
483
474
484
# normalize selection
475
485
selection = normalize_array_selection (key , self ._shape )
@@ -735,7 +745,7 @@ def _write_op(self, f, *args, **kwargs):
735
745
736
746
# guard condition
737
747
if self ._read_only :
738
- raise ReadOnlyError ('array is read-only' )
748
+ raise PermissionError ('array is read-only' )
739
749
740
750
# synchronization
741
751
if self ._synchronizer is None :
@@ -789,6 +799,10 @@ def _resize_nosync(self, *args):
789
799
old_shape = self ._shape
790
800
new_shape = normalize_resize_args (old_shape , * args )
791
801
802
+ # update metadata
803
+ self ._shape = new_shape
804
+ self ._flush_metadata ()
805
+
792
806
# determine the new number and arrangement of chunks
793
807
chunks = self ._chunks
794
808
new_cdata_shape = tuple (int (np .ceil (s / c ))
@@ -803,10 +817,6 @@ def _resize_nosync(self, *args):
803
817
else :
804
818
del self ._chunk_store [self ._key_prefix + key ]
805
819
806
- # update metadata
807
- self ._shape = new_shape
808
- self ._flush_metadata ()
809
-
810
820
def append (self , data , axis = 0 ):
811
821
"""Append `data` to `axis`.
812
822
@@ -882,3 +892,177 @@ def _append_nosync(self, data, axis=0):
882
892
for i in range (len (self ._shape ))
883
893
)
884
894
self [append_selection ] = data
895
+
896
+ def view (self , shape = None , chunks = None , dtype = None ,
897
+ fill_value = None , filters = None , read_only = None ,
898
+ synchronizer = None ):
899
+ """Return an array sharing the same data.
900
+
901
+ Parameters
902
+ ----------
903
+ shape : int or tuple of ints
904
+ Array shape.
905
+ chunks : int or tuple of ints, optional
906
+ Chunk shape.
907
+ dtype : string or dtype, optional
908
+ NumPy dtype.
909
+ fill_value : object
910
+ Default value to use for uninitialized portions of the array.
911
+ filters : sequence, optional
912
+ Sequence of filters to use to encode chunk data prior to
913
+ compression.
914
+ read_only : bool, optional
915
+ True if array should be protected against modification.
916
+ synchronizer : object, optional
917
+ Array synchronizer.
918
+
919
+ Notes
920
+ -----
921
+ WARNING: This is an experimental feature and should be used with care.
922
+ There are plenty of ways to generate errors and/or cause data
923
+ corruption.
924
+
925
+ Examples
926
+ --------
927
+
928
+ Bypass filters:
929
+
930
+ >>> import zarr
931
+ >>> import numpy as np
932
+ >>> np.random.seed(42)
933
+ >>> labels = [b'female', b'male']
934
+ >>> data = np.random.choice(labels, size=10000)
935
+ >>> filters = [zarr.CategoryFilter(labels=labels,
936
+ ... dtype=data.dtype,
937
+ ... astype='u1')]
938
+ >>> a = zarr.array(data, chunks=1000, compression=None,
939
+ ... filters=filters)
940
+ >>> a
941
+ zarr.core.Array((10000,), |S6, chunks=(1000,), order=C)
942
+ compression: none; compression_opts: None
943
+ nbytes: 58.6K; nbytes_stored: 10.2K; ratio: 5.7; initialized: 10/10
944
+ filters: category
945
+ store: builtins.dict
946
+ >>> a[:]
947
+ array([b'female', b'male', b'female', ..., b'male', b'male', b'female'],
948
+ dtype='|S6')
949
+ >>> v = a.view(dtype='u1', filters=[])
950
+ >>> v
951
+ zarr.core.Array((10000,), uint8, chunks=(1000,), order=C)
952
+ compression: none; compression_opts: None
953
+ nbytes: 9.8K; nbytes_stored: 10.2K; ratio: 1.0; initialized: 10/10
954
+ store: builtins.dict
955
+ >>> v.is_view
956
+ True
957
+ >>> v[:]
958
+ array([1, 2, 1, ..., 2, 2, 1], dtype=uint8)
959
+
960
+ Views can be used to modify data:
961
+
962
+ >>> x = v[:]
963
+ >>> x.sort()
964
+ >>> v[:] = x
965
+ >>> v[:]
966
+ array([1, 1, 1, ..., 2, 2, 2], dtype=uint8)
967
+ >>> a[:]
968
+ array([b'female', b'female', b'female', ..., b'male', b'male', b'male'],
969
+ dtype='|S6')
970
+
971
+ View as a different dtype with the same itemsize:
972
+
973
+ >>> data = np.random.randint(0, 2, size=10000, dtype='u1')
974
+ >>> a = zarr.array(data, chunks=1000, compression='zlib')
975
+ >>> a
976
+ zarr.core.Array((10000,), uint8, chunks=(1000,), order=C)
977
+ compression: zlib; compression_opts: 1
978
+ nbytes: 9.8K; nbytes_stored: 2.7K; ratio: 3.6; initialized: 10/10
979
+ store: builtins.dict
980
+ >>> a[:]
981
+ array([0, 0, 1, ..., 1, 0, 0], dtype=uint8)
982
+ >>> v = a.view(dtype=bool)
983
+ >>> v
984
+ zarr.core.Array((10000,), bool, chunks=(1000,), order=C)
985
+ compression: zlib; compression_opts: 1
986
+ nbytes: 9.8K; nbytes_stored: 2.7K; ratio: 3.6; initialized: 10/10
987
+ store: builtins.dict
988
+ >>> v[:]
989
+ array([False, False, True, ..., True, False, False], dtype=bool)
990
+ >>> np.all(a[:].view(dtype=bool) == v[:])
991
+ True
992
+
993
+ An array can be viewed with a dtype with a different itemsize, however
994
+ some care is needed to adjust the shape and chunk shape so that chunk
995
+ data is interpreted correctly:
996
+
997
+ >>> data = np.arange(10000, dtype='u2')
998
+ >>> a = zarr.array(data, chunks=1000, compression=None)
999
+ >>> a
1000
+ zarr.core.Array((10000,), uint16, chunks=(1000,), order=C)
1001
+ compression: none; compression_opts: None
1002
+ nbytes: 19.5K; nbytes_stored: 19.8K; ratio: 1.0; initialized: 10/10
1003
+ store: builtins.dict
1004
+ >>> a[:10]
1005
+ array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint16)
1006
+ >>> v = a.view(dtype='u1', shape=20000, chunks=2000)
1007
+ >>> v
1008
+ zarr.core.Array((20000,), uint8, chunks=(2000,), order=C)
1009
+ compression: none; compression_opts: None
1010
+ nbytes: 19.5K; nbytes_stored: 19.8K; ratio: 1.0; initialized: 10/10
1011
+ store: builtins.dict
1012
+ >>> v[:10]
1013
+ array([0, 0, 1, 0, 2, 0, 3, 0, 4, 0], dtype=uint8)
1014
+ >>> np.all(a[:].view('u1') == v[:])
1015
+ True
1016
+
1017
+ Change fill value for uninitialized chunks:
1018
+
1019
+ >>> a = zarr.full(10000, chunks=1000, fill_value=-1, dtype='i1')
1020
+ >>> a[:]
1021
+ array([-1, -1, -1, ..., -1, -1, -1], dtype=int8)
1022
+ >>> v = a.view(fill_value=42)
1023
+ >>> v[:]
1024
+ array([42, 42, 42, ..., 42, 42, 42], dtype=int8)
1025
+
1026
+ Note that resizing or appending to views is not permitted:
1027
+
1028
+ >>> a = zarr.empty(10000)
1029
+ >>> v = a.view()
1030
+ >>> try:
1031
+ ... v.resize(20000)
1032
+ ... except PermissionError as e:
1033
+ ... print(e)
1034
+ operation not permitted for views
1035
+
1036
+ """ # flake8: noqa
1037
+
1038
+ store = self ._store
1039
+ chunk_store = self ._chunk_store
1040
+ path = self ._path
1041
+ if read_only is None :
1042
+ read_only = self ._read_only
1043
+ if synchronizer is None :
1044
+ synchronizer = self ._synchronizer
1045
+ a = Array (store = store , path = path , chunk_store = chunk_store ,
1046
+ read_only = read_only , synchronizer = synchronizer )
1047
+ a ._is_view = True
1048
+
1049
+ # allow override of some properties
1050
+ if dtype is None :
1051
+ dtype = self ._dtype
1052
+ else :
1053
+ dtype = np .dtype (dtype )
1054
+ a ._dtype = dtype
1055
+ if shape is None :
1056
+ shape = self ._shape
1057
+ else :
1058
+ shape = normalize_shape (shape )
1059
+ a ._shape = shape
1060
+ if chunks is not None :
1061
+ chunks = normalize_chunks (chunks , shape , dtype .itemsize )
1062
+ a ._chunks = chunks
1063
+ if fill_value is not None :
1064
+ a ._fill_value = fill_value
1065
+ if filters is not None :
1066
+ a ._filters = filters
1067
+
1068
+ return a
0 commit comments