diff --git a/changes/3198.bugfix.rst b/changes/3198.bugfix.rst new file mode 100644 index 0000000000..840996641c --- /dev/null +++ b/changes/3198.bugfix.rst @@ -0,0 +1,4 @@ +Restores the ability to create a Zarr V2 array with a ``null`` fill value by introducing a new +class ``DefaultFillValue``, and setting the default value of the ``fill_value`` parameter in array +creation routines to an instance of ``DefaultFillValue``. For Zarr V3 arrays, ``None`` will act as an +alias for a ``DefaultFillValue`` instance, thus preserving compatibility with existing code. \ No newline at end of file diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py index f2dc8757d6..b60f69a673 100644 --- a/src/zarr/api/synchronous.py +++ b/src/zarr/api/synchronous.py @@ -7,7 +7,7 @@ import zarr.api.asynchronous as async_api import zarr.core.array from zarr._compat import _deprecate_positional_args -from zarr.core.array import Array, AsyncArray, CompressorLike +from zarr.core.array import DEFAULT_FILL_VALUE, Array, AsyncArray, CompressorLike from zarr.core.group import Group from zarr.core.sync import sync from zarr.core.sync_group import create_hierarchy @@ -606,7 +606,7 @@ def create( chunks: ChunkCoords | int | bool | None = None, dtype: ZDTypeLike | None = None, compressor: CompressorLike = "auto", - fill_value: Any | None = None, # TODO: need type + fill_value: Any | None = DEFAULT_FILL_VALUE, # TODO: need type order: MemoryOrder | None = None, store: str | StoreLike | None = None, synchronizer: Any | None = None, @@ -763,7 +763,7 @@ def create_array( filters: FiltersLike = "auto", compressors: CompressorsLike = "auto", serializer: SerializerLike = "auto", - fill_value: Any | None = None, + fill_value: Any | None = DEFAULT_FILL_VALUE, order: MemoryOrder | None = None, zarr_format: ZarrFormat | None = 3, attributes: dict[str, JSON] | None = None, @@ -929,7 +929,7 @@ def from_array( filters: FiltersLike | Literal["keep"] = "keep", compressors: CompressorsLike | Literal["keep"] = "keep", serializer: SerializerLike | Literal["keep"] = "keep", - fill_value: Any | None = None, + fill_value: Any | None = DEFAULT_FILL_VALUE, order: MemoryOrder | None = None, zarr_format: ZarrFormat | None = None, attributes: dict[str, JSON] | None = None, diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index cd6b33a28c..a44a4b55d1 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -138,11 +138,34 @@ # Array and AsyncArray are defined in the base ``zarr`` namespace -__all__ = ["create_codec_pipeline", "parse_array_metadata"] +__all__ = [ + "DEFAULT_FILL_VALUE", + "DefaultFillValue", + "create_codec_pipeline", + "parse_array_metadata", +] logger = getLogger(__name__) +class DefaultFillValue: + """ + Sentinel class to indicate that the default fill value should be used. + + This class exists because conventional values used to convey "defaultness" like ``None`` or + ``"auto"` are ambiguous when specifying the fill value parameter of a Zarr array. + The value ``None`` is ambiguous because it is a valid fill value for Zarr V2 + (resulting in ``"fill_value": null`` in array metadata). + A string like ``"auto"`` is ambiguous because such a string is a valid fill value for an array + with a string data type. + An instance of this class lies outside the space of valid fill values, which means it can + umambiguously express that the default fill value should be used. + """ + + +DEFAULT_FILL_VALUE = DefaultFillValue() + + def parse_array_metadata(data: Any) -> ArrayMetadata: if isinstance(data, ArrayMetadata): return data @@ -296,7 +319,7 @@ async def create( shape: ShapeLike, dtype: ZDTypeLike, zarr_format: Literal[2], - fill_value: Any | None = None, + fill_value: Any | None = DEFAULT_FILL_VALUE, attributes: dict[str, JSON] | None = None, chunks: ShapeLike | None = None, dimension_separator: Literal[".", "/"] | None = None, @@ -320,7 +343,7 @@ async def create( shape: ShapeLike, dtype: ZDTypeLike, zarr_format: Literal[3], - fill_value: Any | None = None, + fill_value: Any | None = DEFAULT_FILL_VALUE, attributes: dict[str, JSON] | None = None, # v3 only chunk_shape: ShapeLike | None = None, @@ -348,7 +371,7 @@ async def create( shape: ShapeLike, dtype: ZDTypeLike, zarr_format: Literal[3] = 3, - fill_value: Any | None = None, + fill_value: Any | None = DEFAULT_FILL_VALUE, attributes: dict[str, JSON] | None = None, # v3 only chunk_shape: ShapeLike | None = None, @@ -376,7 +399,7 @@ async def create( shape: ShapeLike, dtype: ZDTypeLike, zarr_format: ZarrFormat, - fill_value: Any | None = None, + fill_value: Any | None = DEFAULT_FILL_VALUE, attributes: dict[str, JSON] | None = None, # v3 only chunk_shape: ShapeLike | None = None, @@ -411,7 +434,7 @@ async def create( shape: ShapeLike, dtype: ZDTypeLike, zarr_format: ZarrFormat = 3, - fill_value: Any | None = None, + fill_value: Any | None = DEFAULT_FILL_VALUE, attributes: dict[str, JSON] | None = None, # v3 only chunk_shape: ShapeLike | None = None, @@ -552,7 +575,7 @@ async def _create( shape: ShapeLike, dtype: ZDTypeLike | ZDType[TBaseDType, TBaseScalar], zarr_format: ZarrFormat = 3, - fill_value: Any | None = None, + fill_value: Any | None = DEFAULT_FILL_VALUE, attributes: dict[str, JSON] | None = None, # v3 only chunk_shape: ShapeLike | None = None, @@ -673,7 +696,7 @@ def _create_metadata_v3( shape: ShapeLike, dtype: ZDType[TBaseDType, TBaseScalar], chunk_shape: ChunkCoords, - fill_value: Any | None = None, + fill_value: Any | None = DEFAULT_FILL_VALUE, chunk_key_encoding: ChunkKeyEncodingLike | None = None, codecs: Iterable[Codec | dict[str, JSON]] | None = None, dimension_names: DimensionNames = None, @@ -698,8 +721,9 @@ def _create_metadata_v3( else: chunk_key_encoding_parsed = chunk_key_encoding - if fill_value is None: - # v3 spec will not allow a null fill value + if isinstance(fill_value, DefaultFillValue) or fill_value is None: + # Use dtype's default scalar for DefaultFillValue sentinel + # For v3, None is converted to DefaultFillValue behavior fill_value_parsed = dtype.default_scalar() else: fill_value_parsed = fill_value @@ -725,7 +749,7 @@ async def _create_v3( dtype: ZDType[TBaseDType, TBaseScalar], chunk_shape: ChunkCoords, config: ArrayConfig, - fill_value: Any | None = None, + fill_value: Any | None = DEFAULT_FILL_VALUE, chunk_key_encoding: ( ChunkKeyEncodingLike | tuple[Literal["default"], Literal[".", "/"]] @@ -774,22 +798,28 @@ def _create_metadata_v2( chunks: ChunkCoords, order: MemoryOrder, dimension_separator: Literal[".", "/"] | None = None, - fill_value: float | None = None, + fill_value: Any | None = DEFAULT_FILL_VALUE, filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None, compressor: CompressorLikev2 = None, attributes: dict[str, JSON] | None = None, ) -> ArrayV2Metadata: if dimension_separator is None: dimension_separator = "." - if fill_value is None: - fill_value = dtype.default_scalar() # type: ignore[assignment] + + # Handle DefaultFillValue sentinel + if isinstance(fill_value, DefaultFillValue): + fill_value_parsed: Any = dtype.default_scalar() + else: + # For v2, preserve None as-is (backward compatibility) + fill_value_parsed = fill_value + return ArrayV2Metadata( shape=shape, dtype=dtype, chunks=chunks, order=order, dimension_separator=dimension_separator, - fill_value=fill_value, + fill_value=fill_value_parsed, compressor=compressor, filters=filters, attributes=attributes, @@ -806,7 +836,7 @@ async def _create_v2( order: MemoryOrder, config: ArrayConfig, dimension_separator: Literal[".", "/"] | None = None, - fill_value: float | None = None, + fill_value: Any | None = DEFAULT_FILL_VALUE, filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None, compressor: CompressorLike = "auto", attributes: dict[str, JSON] | None = None, @@ -1750,7 +1780,7 @@ def create( shape: ChunkCoords, dtype: ZDTypeLike, zarr_format: ZarrFormat = 3, - fill_value: Any | None = None, + fill_value: Any | None = DEFAULT_FILL_VALUE, attributes: dict[str, JSON] | None = None, # v3 only chunk_shape: ChunkCoords | None = None, @@ -1879,7 +1909,7 @@ def _create( shape: ChunkCoords, dtype: ZDTypeLike, zarr_format: ZarrFormat = 3, - fill_value: Any | None = None, + fill_value: Any | None = DEFAULT_FILL_VALUE, attributes: dict[str, JSON] | None = None, # v3 only chunk_shape: ChunkCoords | None = None, @@ -3836,7 +3866,7 @@ async def from_array( filters: FiltersLike | Literal["keep"] = "keep", compressors: CompressorsLike | Literal["keep"] = "keep", serializer: SerializerLike | Literal["keep"] = "keep", - fill_value: Any | None = None, + fill_value: Any | None = DEFAULT_FILL_VALUE, order: MemoryOrder | None = None, zarr_format: ZarrFormat | None = None, attributes: dict[str, JSON] | None = None, @@ -4098,7 +4128,7 @@ async def init_array( filters: FiltersLike = "auto", compressors: CompressorsLike = "auto", serializer: SerializerLike = "auto", - fill_value: Any | None = None, + fill_value: Any | None = DEFAULT_FILL_VALUE, order: MemoryOrder | None = None, zarr_format: ZarrFormat | None = 3, attributes: dict[str, JSON] | None = None, @@ -4319,7 +4349,7 @@ async def create_array( filters: FiltersLike = "auto", compressors: CompressorsLike = "auto", serializer: SerializerLike = "auto", - fill_value: Any | None = None, + fill_value: Any | None = DEFAULT_FILL_VALUE, order: MemoryOrder | None = None, zarr_format: ZarrFormat | None = 3, attributes: dict[str, JSON] | None = None, diff --git a/tests/test_array.py b/tests/test_array.py index bc27a30593..fe23bc1284 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -1012,6 +1012,28 @@ def test_default_fill_value(dtype: ZDType[Any, Any], store: Store) -> None: else: assert a.fill_value == dtype.default_scalar() + @staticmethod + # @pytest.mark.parametrize("zarr_format", [2, 3]) + @pytest.mark.parametrize("dtype", zdtype_examples) + @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") + def test_default_fill_value_None( + dtype: ZDType[Any, Any], store: Store, zarr_format: ZarrFormat + ) -> None: + """ + Test that the fill value of an array is set to the default value for an explicit None argument for + Zarr Format 3, and to null for Zarr Format 2 + """ + a = zarr.create_array( + store, shape=(5,), chunks=(5,), dtype=dtype, fill_value=None, zarr_format=zarr_format + ) + if zarr_format == 3: + if isinstance(dtype, DateTime64 | TimeDelta64) and np.isnat(a.fill_value): + assert np.isnat(dtype.default_scalar()) + else: + assert a.fill_value == dtype.default_scalar() + elif zarr_format == 2: + assert a.fill_value is None + @staticmethod @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") @pytest.mark.parametrize("dtype", zdtype_examples)