Skip to content

Commit 723e716

Browse files
committed
add ArrayConfig
1 parent d2c7838 commit 723e716

File tree

11 files changed

+179
-75
lines changed

11 files changed

+179
-75
lines changed

src/zarr/api/asynchronous.py

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
ChunkCoords,
1717
MemoryOrder,
1818
ZarrFormat,
19+
_warn_order_kwarg,
1920
_warn_write_empty_chunks_kwarg,
2021
)
2122
from zarr.core.config import config
@@ -725,6 +726,7 @@ async def create(
725726
read_only: bool | None = None,
726727
object_codec: Codec | None = None, # TODO: type has changed
727728
dimension_separator: Literal[".", "/"] | None = None,
729+
write_empty_chunks: bool | None = None,
728730
zarr_version: ZarrFormat | None = None, # deprecated
729731
zarr_format: ZarrFormat | None = None,
730732
meta_array: Any | None = None, # TODO: need type
@@ -760,6 +762,7 @@ async def create(
760762
fill_value : object
761763
Default value to use for uninitialized portions of the array.
762764
order : {'C', 'F'}, optional
765+
Deprecated in favor of the `array.order` configuration variable.
763766
Memory layout to be used within each chunk.
764767
Default is set in Zarr's config (`array.order`).
765768
store : Store or str
@@ -794,6 +797,19 @@ async def create(
794797
795798
.. versionadded:: 2.8
796799
800+
write_empty_chunks : bool, optional
801+
Deprecated in favor of the `array.write_empty_chunks` configuration variable.
802+
803+
If True (default), all chunks will be stored regardless of their
804+
contents. If False, each chunk is compared to the array's fill value
805+
prior to storing. If a chunk is uniformly equal to the fill value, then
806+
that chunk is not be stored, and the store entry for that chunk's key
807+
is deleted. This setting enables sparser storage, as only chunks with
808+
non-fill-value data are stored, at the expense of overhead associated
809+
with checking the data of each chunk.
810+
811+
.. versionadded:: 2.11
812+
797813
zarr_format : {2, 3, None}, optional
798814
The zarr format to use when saving.
799815
meta_array : array-like, optional
@@ -839,17 +855,11 @@ async def create(
839855
raise ValueError(
840856
"dimension_separator is not supported for zarr format 3, use chunk_key_encoding instead"
841857
)
842-
else:
843-
warnings.warn(
844-
"dimension_separator is not yet implemented",
845-
RuntimeWarning,
846-
stacklevel=2,
847-
)
848858

849-
if "write_empty_chunks" in kwargs:
850-
# warn users if the write_empty_chunks kwarg was used
851-
write_empty_chunks = kwargs.pop("write_empty_chunks")
852-
_warn_write_empty_chunks_kwarg(write_empty_chunks)
859+
if order is not None:
860+
_warn_order_kwarg()
861+
if write_empty_chunks is not None:
862+
_warn_write_empty_chunks_kwarg()
853863

854864
if meta_array is not None:
855865
warnings.warn("meta_array is not yet implemented", RuntimeWarning, stacklevel=2)
@@ -878,6 +888,7 @@ async def create(
878888
dimension_names=dimension_names,
879889
attributes=attributes,
880890
order=order,
891+
write_empty_chunks=write_empty_chunks,
881892
**kwargs,
882893
)
883894

@@ -1051,10 +1062,10 @@ async def open_array(
10511062

10521063
zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format)
10531064

1065+
if "order" in kwargs:
1066+
_warn_order_kwarg()
10541067
if "write_empty_chunks" in kwargs:
1055-
# warn users if the write_empty_chunks kwarg was used
1056-
write_empty_chunks = kwargs.pop("write_empty_chunks")
1057-
_warn_write_empty_chunks_kwarg(write_empty_chunks)
1068+
_warn_write_empty_chunks_kwarg()
10581069

10591070
try:
10601071
return await AsyncArray.open(store_path, zarr_format=zarr_format)

src/zarr/codecs/pipeline.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,6 @@ async def write_batch(
331331
value: NDBuffer,
332332
drop_axes: tuple[int, ...] = (),
333333
) -> None:
334-
write_empty_chunks = config.get("array.write_empty_chunks") == True # noqa: E712
335334
if self.supports_partial_encode:
336335
await self.encode_partial_batch(
337336
[
@@ -385,7 +384,9 @@ async def _read_key(
385384
if chunk_array is None:
386385
chunk_array_batch.append(None) # type: ignore[unreachable]
387386
else:
388-
if not write_empty_chunks and chunk_array.all_equal(chunk_spec.fill_value):
387+
if not chunk_spec.config.write_empty_chunks and chunk_array.all_equal(
388+
chunk_spec.fill_value
389+
):
389390
chunk_array_batch.append(None)
390391
else:
391392
chunk_array_batch.append(chunk_array)

src/zarr/codecs/sharding.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from zarr.abc.store import ByteGetter, ByteRangeRequest, ByteSetter
2121
from zarr.codecs.bytes import BytesCodec
2222
from zarr.codecs.crc32c_ import Crc32cCodec
23-
from zarr.core.array_spec import ArraySpec
23+
from zarr.core.array_spec import ArrayConfig, ArraySpec
2424
from zarr.core.buffer import (
2525
Buffer,
2626
BufferPrototype,
@@ -665,7 +665,9 @@ def _get_index_chunk_spec(self, chunks_per_shard: ChunkCoords) -> ArraySpec:
665665
shape=chunks_per_shard + (2,),
666666
dtype=np.dtype("<u8"),
667667
fill_value=MAX_UINT_64,
668-
order="C", # Note: this is hard-coded for simplicity -- it is not surfaced into user code
668+
config=ArrayConfig(
669+
order="C", write_empty_chunks=False
670+
), # Note: this is hard-coded for simplicity -- it is not surfaced into user code,
669671
prototype=numpy_buffer_prototype(),
670672
)
671673

@@ -674,7 +676,7 @@ def _get_chunk_spec(self, shard_spec: ArraySpec) -> ArraySpec:
674676
shape=self.chunk_shape,
675677
dtype=shard_spec.dtype,
676678
fill_value=shard_spec.fill_value,
677-
order=shard_spec.order,
679+
config=shard_spec.config,
678680
prototype=shard_spec.prototype,
679681
)
680682

src/zarr/codecs/transpose.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec:
8484
shape=tuple(chunk_spec.shape[self.order[i]] for i in range(chunk_spec.ndim)),
8585
dtype=chunk_spec.dtype,
8686
fill_value=chunk_spec.fill_value,
87-
order=chunk_spec.order,
87+
config=chunk_spec.config,
8888
prototype=chunk_spec.prototype,
8989
)
9090

0 commit comments

Comments
 (0)