Skip to content

Commit 3999909

Browse files
authored
Merge branch 'main' into array-metadata-strategy
2 parents f9a3492 + 870265a commit 3999909

File tree

22 files changed

+324
-114
lines changed

22 files changed

+324
-114
lines changed

changes/2755.bugfix.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
The array returned by ``zarr.empty`` and an empty ``zarr.core.buffer.cpu.NDBuffer`` will now be filled with the
2+
specified fill value, or with zeros if no fill value is provided.
3+
This fixes a bug where Zarr format 2 data with no fill value was written with un-predictable chunk sizes.

changes/2784.feature.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Avoid reading chunks during writes where possible. :issue:`757`

changes/2799.bugfix.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Enitialise empty chunks to the default fill value during writing and add default fill values for datetime, timedelta, structured, and other (void* fixed size) data types

changes/2817.bugfix.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix fancy indexing (e.g. arr[5, [0, 1]]) with the sharding codec

src/zarr/abc/codec.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -357,7 +357,7 @@ async def encode(
357357
@abstractmethod
358358
async def read(
359359
self,
360-
batch_info: Iterable[tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple]],
360+
batch_info: Iterable[tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple, bool]],
361361
out: NDBuffer,
362362
drop_axes: tuple[int, ...] = (),
363363
) -> None:
@@ -379,7 +379,7 @@ async def read(
379379
@abstractmethod
380380
async def write(
381381
self,
382-
batch_info: Iterable[tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple]],
382+
batch_info: Iterable[tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple, bool]],
383383
value: NDBuffer,
384384
drop_axes: tuple[int, ...] = (),
385385
) -> None:

src/zarr/api/asynchronous.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1065,7 +1065,8 @@ async def create(
10651065
async def empty(
10661066
shape: ChunkCoords, **kwargs: Any
10671067
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
1068-
"""Create an empty array.
1068+
"""Create an empty array with the specified shape. The contents will be filled with the
1069+
array's fill value or zeros if no fill value is provided.
10691070
10701071
Parameters
10711072
----------
@@ -1087,7 +1088,8 @@ async def empty(
10871088
async def empty_like(
10881089
a: ArrayLike, **kwargs: Any
10891090
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
1090-
"""Create an empty array like `a`.
1091+
"""Create an empty array like `a`. The contents will be filled with the
1092+
array's fill value or zeros if no fill value is provided.
10911093
10921094
Parameters
10931095
----------
@@ -1100,6 +1102,12 @@ async def empty_like(
11001102
-------
11011103
Array
11021104
The new array.
1105+
1106+
Notes
1107+
-----
1108+
The contents of an empty Zarr array are not defined. On attempting to
1109+
retrieve data from an empty Zarr array, any values may be returned,
1110+
and these are not guaranteed to be stable from one access to the next.
11031111
"""
11041112
like_kwargs = _like_args(a, kwargs)
11051113
return await empty(**like_kwargs)

src/zarr/api/synchronous.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -902,7 +902,8 @@ def create_array(
902902

903903
# TODO: add type annotations for kwargs
904904
def empty(shape: ChunkCoords, **kwargs: Any) -> Array:
905-
"""Create an empty array.
905+
"""Create an empty array with the specified shape. The contents will be filled with the
906+
array's fill value or zeros if no fill value is provided.
906907
907908
Parameters
908909
----------
@@ -928,7 +929,8 @@ def empty(shape: ChunkCoords, **kwargs: Any) -> Array:
928929
# TODO: move ArrayLike to common module
929930
# TODO: add type annotations for kwargs
930931
def empty_like(a: ArrayLike, **kwargs: Any) -> Array:
931-
"""Create an empty array like another array.
932+
"""Create an empty array like another array. The contents will be filled with the
933+
array's fill value or zeros if no fill value is provided.
932934
933935
Parameters
934936
----------
@@ -941,6 +943,12 @@ def empty_like(a: ArrayLike, **kwargs: Any) -> Array:
941943
-------
942944
Array
943945
The new array.
946+
947+
Notes
948+
-----
949+
The contents of an empty Zarr array are not defined. On attempting to
950+
retrieve data from an empty Zarr array, any values may be returned,
951+
and these are not guaranteed to be stable from one access to the next.
944952
"""
945953
return Array(sync(async_api.empty_like(a, **kwargs)))
946954

src/zarr/codecs/sharding.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -455,8 +455,9 @@ async def _decode_single(
455455
chunk_spec,
456456
chunk_selection,
457457
out_selection,
458+
is_complete_shard,
458459
)
459-
for chunk_coords, chunk_selection, out_selection in indexer
460+
for chunk_coords, chunk_selection, out_selection, is_complete_shard in indexer
460461
],
461462
out,
462463
)
@@ -486,7 +487,7 @@ async def _decode_partial_single(
486487
)
487488

488489
indexed_chunks = list(indexer)
489-
all_chunk_coords = {chunk_coords for chunk_coords, _, _ in indexed_chunks}
490+
all_chunk_coords = {chunk_coords for chunk_coords, *_ in indexed_chunks}
490491

491492
# reading bytes of all requested chunks
492493
shard_dict: ShardMapping = {}
@@ -524,12 +525,17 @@ async def _decode_partial_single(
524525
chunk_spec,
525526
chunk_selection,
526527
out_selection,
528+
is_complete_shard,
527529
)
528-
for chunk_coords, chunk_selection, out_selection in indexer
530+
for chunk_coords, chunk_selection, out_selection, is_complete_shard in indexer
529531
],
530532
out,
531533
)
532-
return out
534+
535+
if hasattr(indexer, "sel_shape"):
536+
return out.reshape(indexer.sel_shape)
537+
else:
538+
return out
533539

534540
async def _encode_single(
535541
self,
@@ -558,8 +564,9 @@ async def _encode_single(
558564
chunk_spec,
559565
chunk_selection,
560566
out_selection,
567+
is_complete_shard,
561568
)
562-
for chunk_coords, chunk_selection, out_selection in indexer
569+
for chunk_coords, chunk_selection, out_selection, is_complete_shard in indexer
563570
],
564571
shard_array,
565572
)
@@ -601,8 +608,9 @@ async def _encode_partial_single(
601608
chunk_spec,
602609
chunk_selection,
603610
out_selection,
611+
is_complete_shard,
604612
)
605-
for chunk_coords, chunk_selection, out_selection in indexer
613+
for chunk_coords, chunk_selection, out_selection, is_complete_shard in indexer
606614
],
607615
shard_array,
608616
)

src/zarr/core/array.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1290,8 +1290,9 @@ async def _get_selection(
12901290
self.metadata.get_chunk_spec(chunk_coords, _config, prototype=prototype),
12911291
chunk_selection,
12921292
out_selection,
1293+
is_complete_chunk,
12931294
)
1294-
for chunk_coords, chunk_selection, out_selection in indexer
1295+
for chunk_coords, chunk_selection, out_selection, is_complete_chunk in indexer
12951296
],
12961297
out_buffer,
12971298
drop_axes=indexer.drop_axes,
@@ -1417,8 +1418,9 @@ async def _set_selection(
14171418
self.metadata.get_chunk_spec(chunk_coords, _config, prototype),
14181419
chunk_selection,
14191420
out_selection,
1421+
is_complete_chunk,
14201422
)
1421-
for chunk_coords, chunk_selection, out_selection in indexer
1423+
for chunk_coords, chunk_selection, out_selection, is_complete_chunk in indexer
14221424
],
14231425
value_buffer,
14241426
drop_axes=indexer.drop_axes,

src/zarr/core/buffer/cpu.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -154,10 +154,10 @@ def create(
154154
order: Literal["C", "F"] = "C",
155155
fill_value: Any | None = None,
156156
) -> Self:
157-
ret = cls(np.empty(shape=tuple(shape), dtype=dtype, order=order))
158-
if fill_value is not None:
159-
ret.fill(fill_value)
160-
return ret
157+
if fill_value is None:
158+
return cls(np.zeros(shape=tuple(shape), dtype=dtype, order=order))
159+
else:
160+
return cls(np.full(shape=tuple(shape), fill_value=fill_value, dtype=dtype, order=order))
161161

162162
@classmethod
163163
def from_numpy_array(cls, array_like: npt.ArrayLike) -> Self:

0 commit comments

Comments
 (0)