23
23
from cubed .primitive .memory import get_buffer_copies
24
24
from cubed .primitive .rechunk import rechunk as primitive_rechunk
25
25
from cubed .spec import spec_from_config
26
- from cubed .storage .backend import open_backend_array
26
+ from cubed .storage .backend import is_backend_storage_array , open_backend_array
27
27
from cubed .storage .zarr import lazy_zarr_array
28
28
from cubed .types import T_RegularChunks , T_Shape
29
29
from cubed .utils import (
@@ -125,7 +125,13 @@ def from_zarr(store, path=None, spec=None) -> "Array":
125
125
return Array (name , target , spec , plan )
126
126
127
127
128
- def store (sources : Union ["Array" , Sequence ["Array" ]], targets , executor = None , ** kwargs ):
128
+ def store (
129
+ sources : Union ["Array" , Sequence ["Array" ]],
130
+ targets ,
131
+ regions : tuple [slice , ...] | list [tuple [slice , ...]] | None = None ,
132
+ executor = None ,
133
+ ** kwargs ,
134
+ ):
129
135
"""Save source arrays to array-like objects.
130
136
131
137
In the current implementation ``targets`` must be Zarr arrays.
@@ -135,10 +141,12 @@ def store(sources: Union["Array", Sequence["Array"]], targets, executor=None, **
135
141
136
142
Parameters
137
143
----------
138
- x : cubed.Array or collection of cubed.Array
144
+ sources : cubed.Array or collection of cubed.Array
139
145
Arrays to save
140
- store : zarr.Array or collection of zarr.Array
146
+ targets : string or Zarr store or collection of strings or Zarr stores
141
147
Zarr arrays to write to
148
+ regions : tuple of slices or list of tuple of slices, optional
149
+ The regions of data that should be written to in targets.
142
150
executor : cubed.runtime.types.Executor, optional
143
151
The executor to use to run the computation.
144
152
Defaults to using the in-process Python executor.
@@ -155,19 +163,38 @@ def store(sources: Union["Array", Sequence["Array"]], targets, executor=None, **
155
163
f"Different number of sources ({ len (sources )} ) and targets ({ len (targets )} )"
156
164
)
157
165
166
+ if isinstance (regions , tuple ) or regions is None :
167
+ regions_list = [regions ] * len (sources )
168
+ else :
169
+ regions_list = list (regions )
170
+ if len (sources ) != len (regions_list ):
171
+ raise ValueError (
172
+ f"Different number of sources [{ len (sources )} ] and "
173
+ f"targets [{ len (targets )} ] than regions [{ len (regions_list )} ]"
174
+ )
175
+
158
176
arrays = []
159
- for source , target in zip (sources , targets ):
160
- identity = lambda a : a
161
- ind = tuple (range (source .ndim ))
177
+ for source , target , region in zip (sources , targets , regions_list ):
178
+ array = _store_array (source , target , region = region )
179
+ arrays .append (array )
180
+ compute (* arrays , executor = executor , _return_in_memory_array = False , ** kwargs )
162
181
163
- if target is not None and not isinstance (target , zarr .Array ):
164
- target = lazy_zarr_array (
165
- target ,
166
- shape = source .shape ,
167
- dtype = source .dtype ,
168
- chunks = source .chunksize ,
169
- )
170
- array = blockwise (
182
+
183
+ def _store_array (source : "Array" , target , path = None , region = None ):
184
+ if target is not None and not is_backend_storage_array (target ):
185
+ target = lazy_zarr_array (
186
+ target ,
187
+ shape = source .shape ,
188
+ dtype = source .dtype ,
189
+ chunks = source .chunksize ,
190
+ path = path ,
191
+ )
192
+ if target is None and region is not None :
193
+ raise ValueError ("Target store must be specified when setting a region" )
194
+ identity = lambda a : a
195
+ if region is None or all (r == slice (None ) for r in region ):
196
+ ind = tuple (range (source .ndim ))
197
+ return blockwise (
171
198
identity ,
172
199
ind ,
173
200
source ,
@@ -176,11 +203,50 @@ def store(sources: Union["Array", Sequence["Array"]], targets, executor=None, **
176
203
align_arrays = False ,
177
204
target_store = target ,
178
205
)
179
- arrays .append (array )
180
- compute (* arrays , executor = executor , _return_in_memory_array = False , ** kwargs )
206
+ else :
207
+ # treat a region as an offset within the target store
208
+ shape = target .shape
209
+ chunks = target .chunks
210
+ for i , (sl , cs ) in enumerate (zip (region , chunks )):
211
+ if sl .start % cs != 0 or (sl .stop % cs != 0 and sl .stop != shape [i ]):
212
+ raise ValueError (
213
+ f"Region { region } does not align with target chunks { chunks } "
214
+ )
215
+ block_offsets = [sl .start // cs for sl , cs in zip (region , chunks )]
216
+
217
+ def key_function (out_key ):
218
+ out_coords = out_key [1 :]
219
+ in_coords = tuple (bi - off for bi , off in zip (out_coords , block_offsets ))
220
+ return ((source .name , * in_coords ),)
221
+
222
+ # calculate output block ids from region selection
223
+ indexer = _create_zarr_indexer (region , shape , chunks )
224
+ if source .shape != indexer .shape :
225
+ raise ValueError (
226
+ f"Source array shape { source .shape } does not match region shape { indexer .shape } "
227
+ )
228
+ # TODO(#800): make Zarr indexer pickle-able so we don't have to materialize all the block IDs
229
+ output_blocks = map (
230
+ lambda chunk_projection : list (chunk_projection [0 ]), list (indexer )
231
+ )
232
+
233
+ out = general_blockwise (
234
+ identity ,
235
+ key_function ,
236
+ source ,
237
+ shapes = [shape ],
238
+ dtypes = [source .dtype ],
239
+ chunkss = [chunks ],
240
+ target_stores = [target ],
241
+ output_blocks = output_blocks ,
242
+ )
243
+ from cubed import Array
181
244
245
+ assert isinstance (out , Array ) # single output
246
+ return out
182
247
183
- def to_zarr (x : "Array" , store , path = None , executor = None , ** kwargs ):
248
+
249
+ def to_zarr (x : "Array" , store , path = None , region = None , executor = None , ** kwargs ):
184
250
"""Save an array to Zarr storage.
185
251
186
252
Note that this operation is eager, and will run the computation
@@ -190,35 +256,17 @@ def to_zarr(x: "Array", store, path=None, executor=None, **kwargs):
190
256
----------
191
257
x : cubed.Array
192
258
Array to save
193
- store : string or Zarr Store
259
+ store : string or Zarr store
194
260
Output Zarr store
195
261
path : string, optional
196
262
Group path
263
+ region : tuple of slices, optional
264
+ The region of data that should be written to in target.
197
265
executor : cubed.runtime.types.Executor, optional
198
266
The executor to use to run the computation.
199
267
Defaults to using the in-process Python executor.
200
268
"""
201
- # Note that the intermediate write to x's store will be optimized away
202
- # by map fusion (if it was produced with a blockwise operation).
203
- identity = lambda a : a
204
- ind = tuple (range (x .ndim ))
205
- if store is not None and not isinstance (store , zarr .Array ):
206
- store = lazy_zarr_array (
207
- store ,
208
- shape = x .shape ,
209
- dtype = x .dtype ,
210
- chunks = x .chunksize ,
211
- path = path ,
212
- )
213
- out = blockwise (
214
- identity ,
215
- ind ,
216
- x ,
217
- ind ,
218
- dtype = x .dtype ,
219
- align_arrays = False ,
220
- target_store = store ,
221
- )
269
+ out = _store_array (x , store , path = path , region = region )
222
270
out .compute (executor = executor , _return_in_memory_array = False , ** kwargs )
223
271
224
272
@@ -466,7 +514,7 @@ def _general_blockwise(
466
514
spec = check_array_specs (arrays )
467
515
buffer_copies = get_buffer_copies (spec )
468
516
469
- if isinstance (target_stores , list ): # multiple outputs
517
+ if isinstance (target_stores , list ) and len ( target_stores ) > 1 : # multiple outputs
470
518
name = [gensym () for _ in range (len (target_stores ))]
471
519
target_stores = [
472
520
ts if ts is not None else context_dir_path (spec = spec )
0 commit comments