Skip to content
8 changes: 4 additions & 4 deletions src/zarr/api/synchronous.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import zarr.api.asynchronous as async_api
import zarr.core.array
from zarr._compat import _deprecate_positional_args
from zarr.core.array import Array, AsyncArray, CompressorLike
from zarr.core.array import DEFAULT_FILL_VALUE, Array, AsyncArray, CompressorLike
from zarr.core.group import Group
from zarr.core.sync import sync
from zarr.core.sync_group import create_hierarchy
Expand Down Expand Up @@ -606,7 +606,7 @@ def create(
chunks: ChunkCoords | int | bool | None = None,
dtype: ZDTypeLike | None = None,
compressor: CompressorLike = "auto",
fill_value: Any | None = None, # TODO: need type
fill_value: Any | None = DEFAULT_FILL_VALUE, # TODO: need type
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for posterity, we now have a type alias for the scalar types of all the dtypes which we can use instead of Any here

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

although maybe we should just use object, since we do want to allow inputs to be castable

order: MemoryOrder | None = None,
store: str | StoreLike | None = None,
synchronizer: Any | None = None,
Expand Down Expand Up @@ -763,7 +763,7 @@ def create_array(
filters: FiltersLike = "auto",
compressors: CompressorsLike = "auto",
serializer: SerializerLike = "auto",
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
order: MemoryOrder | None = None,
zarr_format: ZarrFormat | None = 3,
attributes: dict[str, JSON] | None = None,
Expand Down Expand Up @@ -929,7 +929,7 @@ def from_array(
filters: FiltersLike | Literal["keep"] = "keep",
compressors: CompressorsLike | Literal["keep"] = "keep",
serializer: SerializerLike | Literal["keep"] = "keep",
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
order: MemoryOrder | None = None,
zarr_format: ZarrFormat | None = None,
attributes: dict[str, JSON] | None = None,
Expand Down
70 changes: 49 additions & 21 deletions src/zarr/core/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,11 +138,32 @@


# Array and AsyncArray are defined in the base ``zarr`` namespace
__all__ = ["create_codec_pipeline", "parse_array_metadata"]
__all__ = [
"DEFAULT_FILL_VALUE",
"DefaultFillValue",
"create_codec_pipeline",
"parse_array_metadata",
]

logger = getLogger(__name__)


class DefaultFillValue:
"""
Sentinel class to indicate that the default fill value should be used.

This class exists because the normal values used to convey "defaultness"
like ``None`` or ``"auto"` do not work for the fill value parameter of a Zarr array.
``None`` is a valid fill value for Zarr V2 (resulting in ``"fill_value: null"`` in array metadata).
A string like ``"auto"`` is a valid fill value for an array with a string data type.
But an instance of this class lies outside the space of valid fill values, which makes it
suitable for expressing that the default fill value should be used.
"""


DEFAULT_FILL_VALUE = DefaultFillValue()


def parse_array_metadata(data: Any) -> ArrayMetadata:
if isinstance(data, ArrayMetadata):
return data
Expand Down Expand Up @@ -296,7 +317,7 @@ async def create(
shape: ShapeLike,
dtype: ZDTypeLike,
zarr_format: Literal[2],
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
attributes: dict[str, JSON] | None = None,
chunks: ShapeLike | None = None,
dimension_separator: Literal[".", "/"] | None = None,
Expand All @@ -320,7 +341,7 @@ async def create(
shape: ShapeLike,
dtype: ZDTypeLike,
zarr_format: Literal[3],
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
attributes: dict[str, JSON] | None = None,
# v3 only
chunk_shape: ShapeLike | None = None,
Expand Down Expand Up @@ -348,7 +369,7 @@ async def create(
shape: ShapeLike,
dtype: ZDTypeLike,
zarr_format: Literal[3] = 3,
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
attributes: dict[str, JSON] | None = None,
# v3 only
chunk_shape: ShapeLike | None = None,
Expand Down Expand Up @@ -376,7 +397,7 @@ async def create(
shape: ShapeLike,
dtype: ZDTypeLike,
zarr_format: ZarrFormat,
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
attributes: dict[str, JSON] | None = None,
# v3 only
chunk_shape: ShapeLike | None = None,
Expand Down Expand Up @@ -411,7 +432,7 @@ async def create(
shape: ShapeLike,
dtype: ZDTypeLike,
zarr_format: ZarrFormat = 3,
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
attributes: dict[str, JSON] | None = None,
# v3 only
chunk_shape: ShapeLike | None = None,
Expand Down Expand Up @@ -552,7 +573,7 @@ async def _create(
shape: ShapeLike,
dtype: ZDTypeLike | ZDType[TBaseDType, TBaseScalar],
zarr_format: ZarrFormat = 3,
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
attributes: dict[str, JSON] | None = None,
# v3 only
chunk_shape: ShapeLike | None = None,
Expand Down Expand Up @@ -673,7 +694,7 @@ def _create_metadata_v3(
shape: ShapeLike,
dtype: ZDType[TBaseDType, TBaseScalar],
chunk_shape: ChunkCoords,
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
chunk_key_encoding: ChunkKeyEncodingLike | None = None,
codecs: Iterable[Codec | dict[str, JSON]] | None = None,
dimension_names: DimensionNames = None,
Expand All @@ -698,8 +719,9 @@ def _create_metadata_v3(
else:
chunk_key_encoding_parsed = chunk_key_encoding

if fill_value is None:
# v3 spec will not allow a null fill value
if isinstance(fill_value, DefaultFillValue) or fill_value is None:
# Use dtype's default scalar for DefaultFillValue sentinel
# For v3, None is converted to DefaultFillValue behavior
fill_value_parsed = dtype.default_scalar()
else:
fill_value_parsed = fill_value
Expand All @@ -725,7 +747,7 @@ async def _create_v3(
dtype: ZDType[TBaseDType, TBaseScalar],
chunk_shape: ChunkCoords,
config: ArrayConfig,
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
chunk_key_encoding: (
ChunkKeyEncodingLike
| tuple[Literal["default"], Literal[".", "/"]]
Expand Down Expand Up @@ -774,22 +796,28 @@ def _create_metadata_v2(
chunks: ChunkCoords,
order: MemoryOrder,
dimension_separator: Literal[".", "/"] | None = None,
fill_value: float | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None,
compressor: CompressorLikev2 = None,
attributes: dict[str, JSON] | None = None,
) -> ArrayV2Metadata:
if dimension_separator is None:
dimension_separator = "."
if fill_value is None:
fill_value = dtype.default_scalar() # type: ignore[assignment]

# Handle DefaultFillValue sentinel
if isinstance(fill_value, DefaultFillValue):
fill_value_parsed: Any = dtype.default_scalar()
else:
# For v2, preserve None as-is (backward compatibility)
fill_value_parsed = fill_value

return ArrayV2Metadata(
shape=shape,
dtype=dtype,
chunks=chunks,
order=order,
dimension_separator=dimension_separator,
fill_value=fill_value,
fill_value=fill_value_parsed,
compressor=compressor,
filters=filters,
attributes=attributes,
Expand All @@ -806,7 +834,7 @@ async def _create_v2(
order: MemoryOrder,
config: ArrayConfig,
dimension_separator: Literal[".", "/"] | None = None,
fill_value: float | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None,
compressor: CompressorLike = "auto",
attributes: dict[str, JSON] | None = None,
Expand Down Expand Up @@ -1750,7 +1778,7 @@ def create(
shape: ChunkCoords,
dtype: ZDTypeLike,
zarr_format: ZarrFormat = 3,
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
attributes: dict[str, JSON] | None = None,
# v3 only
chunk_shape: ChunkCoords | None = None,
Expand Down Expand Up @@ -1879,7 +1907,7 @@ def _create(
shape: ChunkCoords,
dtype: ZDTypeLike,
zarr_format: ZarrFormat = 3,
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
attributes: dict[str, JSON] | None = None,
# v3 only
chunk_shape: ChunkCoords | None = None,
Expand Down Expand Up @@ -3836,7 +3864,7 @@ async def from_array(
filters: FiltersLike | Literal["keep"] = "keep",
compressors: CompressorsLike | Literal["keep"] = "keep",
serializer: SerializerLike | Literal["keep"] = "keep",
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
order: MemoryOrder | None = None,
zarr_format: ZarrFormat | None = None,
attributes: dict[str, JSON] | None = None,
Expand Down Expand Up @@ -4098,7 +4126,7 @@ async def init_array(
filters: FiltersLike = "auto",
compressors: CompressorsLike = "auto",
serializer: SerializerLike = "auto",
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
order: MemoryOrder | None = None,
zarr_format: ZarrFormat | None = 3,
attributes: dict[str, JSON] | None = None,
Expand Down Expand Up @@ -4319,7 +4347,7 @@ async def create_array(
filters: FiltersLike = "auto",
compressors: CompressorsLike = "auto",
serializer: SerializerLike = "auto",
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
order: MemoryOrder | None = None,
zarr_format: ZarrFormat | None = 3,
attributes: dict[str, JSON] | None = None,
Expand Down
22 changes: 22 additions & 0 deletions tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1012,6 +1012,28 @@ def test_default_fill_value(dtype: ZDType[Any, Any], store: Store) -> None:
else:
assert a.fill_value == dtype.default_scalar()

@staticmethod
# @pytest.mark.parametrize("zarr_format", [2, 3])
@pytest.mark.parametrize("dtype", zdtype_examples)
@pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
def test_default_fill_value_None(
dtype: ZDType[Any, Any], store: Store, zarr_format: ZarrFormat
) -> None:
"""
Test that the fill value of an array is set to the default value for an explicit None argument for
Zarr Format 3, and to null for Zarr Format 2
"""
a = zarr.create_array(
store, shape=(5,), chunks=(5,), dtype=dtype, fill_value=None, zarr_format=zarr_format
)
if zarr_format == 3:
if isinstance(dtype, DateTime64 | TimeDelta64) and np.isnat(a.fill_value):
assert np.isnat(dtype.default_scalar())
else:
assert a.fill_value == dtype.default_scalar()
elif zarr_format == 2:
assert a.fill_value is None

@staticmethod
@pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
@pytest.mark.parametrize("dtype", zdtype_examples)
Expand Down
Loading