From 544e8831bba412273edc9052e6ec8b7077a1cc67 Mon Sep 17 00:00:00 2001 From: Ian Hunt-Isaak Date: Wed, 2 Jul 2025 12:40:14 -0400 Subject: [PATCH 1/9] fix: restore fill_value=None for zarr_format=2 --- src/zarr/core/array.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index cd6b33a28c..c7322362a1 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -774,15 +774,13 @@ def _create_metadata_v2( chunks: ChunkCoords, order: MemoryOrder, dimension_separator: Literal[".", "/"] | None = None, - fill_value: float | None = None, + fill_value: Any | None = None, filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None, compressor: CompressorLikev2 = None, attributes: dict[str, JSON] | None = None, ) -> ArrayV2Metadata: if dimension_separator is None: dimension_separator = "." - if fill_value is None: - fill_value = dtype.default_scalar() # type: ignore[assignment] return ArrayV2Metadata( shape=shape, dtype=dtype, @@ -806,7 +804,7 @@ async def _create_v2( order: MemoryOrder, config: ArrayConfig, dimension_separator: Literal[".", "/"] | None = None, - fill_value: float | None = None, + fill_value: Any | None = None, filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None, compressor: CompressorLike = "auto", attributes: dict[str, JSON] | None = None, From 732820501627df3ee36bf98dd6371976bf61e47d Mon Sep 17 00:00:00 2001 From: Ian Hunt-Isaak Date: Wed, 2 Jul 2025 15:41:59 -0400 Subject: [PATCH 2/9] Fix: Restore Zarr Format 2 default fill value behavior Introduce a new Singleton sentinel class to indicate that the default scalar for the dtype should be used as the fill_value. This allows for preserving the zarr_format 2 behavior of None -> null. For zarr format 3 either the DefaulFillValue or None imply using the dtype default scalar value. --- src/zarr/api/synchronous.py | 8 ++-- src/zarr/core/array.py | 79 ++++++++++++++++++++++++++++--------- 2 files changed, 64 insertions(+), 23 deletions(-) diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py index f2dc8757d6..b60f69a673 100644 --- a/src/zarr/api/synchronous.py +++ b/src/zarr/api/synchronous.py @@ -7,7 +7,7 @@ import zarr.api.asynchronous as async_api import zarr.core.array from zarr._compat import _deprecate_positional_args -from zarr.core.array import Array, AsyncArray, CompressorLike +from zarr.core.array import DEFAULT_FILL_VALUE, Array, AsyncArray, CompressorLike from zarr.core.group import Group from zarr.core.sync import sync from zarr.core.sync_group import create_hierarchy @@ -606,7 +606,7 @@ def create( chunks: ChunkCoords | int | bool | None = None, dtype: ZDTypeLike | None = None, compressor: CompressorLike = "auto", - fill_value: Any | None = None, # TODO: need type + fill_value: Any | None = DEFAULT_FILL_VALUE, # TODO: need type order: MemoryOrder | None = None, store: str | StoreLike | None = None, synchronizer: Any | None = None, @@ -763,7 +763,7 @@ def create_array( filters: FiltersLike = "auto", compressors: CompressorsLike = "auto", serializer: SerializerLike = "auto", - fill_value: Any | None = None, + fill_value: Any | None = DEFAULT_FILL_VALUE, order: MemoryOrder | None = None, zarr_format: ZarrFormat | None = 3, attributes: dict[str, JSON] | None = None, @@ -929,7 +929,7 @@ def from_array( filters: FiltersLike | Literal["keep"] = "keep", compressors: CompressorsLike | Literal["keep"] = "keep", serializer: SerializerLike | Literal["keep"] = "keep", - fill_value: Any | None = None, + fill_value: Any | None = DEFAULT_FILL_VALUE, order: MemoryOrder | None = None, zarr_format: ZarrFormat | None = None, attributes: dict[str, JSON] | None = None, diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index c7322362a1..0abe3a2874 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -138,11 +138,43 @@ # Array and AsyncArray are defined in the base ``zarr`` namespace -__all__ = ["create_codec_pipeline", "parse_array_metadata"] +__all__ = [ + "DEFAULT_FILL_VALUE", + "DefaultFillValue", + "create_codec_pipeline", + "parse_array_metadata", +] logger = getLogger(__name__) +class DefaultFillValue: + """ + Sentinel class to indicate that the default fill value should be used. + + This class is used to distinguish between: + Zarr Format 3: + - fill_value = None: dtype default value + Zarr Format 2: + - fill_value = None: fill_value saved as null + + This allows backwards compatibility with zarr format 2. + + This is implemented as a singleton. + """ + + _instance = None + + def __new__(cls) -> Self: + if cls._instance is None: + cls._instance = super().__new__(cls) + return cls._instance + + +# Create the singleton instance +DEFAULT_FILL_VALUE = DefaultFillValue() + + def parse_array_metadata(data: Any) -> ArrayMetadata: if isinstance(data, ArrayMetadata): return data @@ -296,7 +328,7 @@ async def create( shape: ShapeLike, dtype: ZDTypeLike, zarr_format: Literal[2], - fill_value: Any | None = None, + fill_value: Any | None = DEFAULT_FILL_VALUE, attributes: dict[str, JSON] | None = None, chunks: ShapeLike | None = None, dimension_separator: Literal[".", "/"] | None = None, @@ -320,7 +352,7 @@ async def create( shape: ShapeLike, dtype: ZDTypeLike, zarr_format: Literal[3], - fill_value: Any | None = None, + fill_value: Any | None = DEFAULT_FILL_VALUE, attributes: dict[str, JSON] | None = None, # v3 only chunk_shape: ShapeLike | None = None, @@ -348,7 +380,7 @@ async def create( shape: ShapeLike, dtype: ZDTypeLike, zarr_format: Literal[3] = 3, - fill_value: Any | None = None, + fill_value: Any | None = DEFAULT_FILL_VALUE, attributes: dict[str, JSON] | None = None, # v3 only chunk_shape: ShapeLike | None = None, @@ -376,7 +408,7 @@ async def create( shape: ShapeLike, dtype: ZDTypeLike, zarr_format: ZarrFormat, - fill_value: Any | None = None, + fill_value: Any | None = DEFAULT_FILL_VALUE, attributes: dict[str, JSON] | None = None, # v3 only chunk_shape: ShapeLike | None = None, @@ -411,7 +443,7 @@ async def create( shape: ShapeLike, dtype: ZDTypeLike, zarr_format: ZarrFormat = 3, - fill_value: Any | None = None, + fill_value: Any | None = DEFAULT_FILL_VALUE, attributes: dict[str, JSON] | None = None, # v3 only chunk_shape: ShapeLike | None = None, @@ -552,7 +584,7 @@ async def _create( shape: ShapeLike, dtype: ZDTypeLike | ZDType[TBaseDType, TBaseScalar], zarr_format: ZarrFormat = 3, - fill_value: Any | None = None, + fill_value: Any | None = DEFAULT_FILL_VALUE, attributes: dict[str, JSON] | None = None, # v3 only chunk_shape: ShapeLike | None = None, @@ -673,7 +705,7 @@ def _create_metadata_v3( shape: ShapeLike, dtype: ZDType[TBaseDType, TBaseScalar], chunk_shape: ChunkCoords, - fill_value: Any | None = None, + fill_value: Any | None = DEFAULT_FILL_VALUE, chunk_key_encoding: ChunkKeyEncodingLike | None = None, codecs: Iterable[Codec | dict[str, JSON]] | None = None, dimension_names: DimensionNames = None, @@ -698,8 +730,9 @@ def _create_metadata_v3( else: chunk_key_encoding_parsed = chunk_key_encoding - if fill_value is None: - # v3 spec will not allow a null fill value + if isinstance(fill_value, DefaultFillValue) or fill_value is None: + # Use dtype's default scalar for DefaultFillValue sentinel + # For v3, None is converted to DefaultFillValue behavior fill_value_parsed = dtype.default_scalar() else: fill_value_parsed = fill_value @@ -725,7 +758,7 @@ async def _create_v3( dtype: ZDType[TBaseDType, TBaseScalar], chunk_shape: ChunkCoords, config: ArrayConfig, - fill_value: Any | None = None, + fill_value: Any | None = DEFAULT_FILL_VALUE, chunk_key_encoding: ( ChunkKeyEncodingLike | tuple[Literal["default"], Literal[".", "/"]] @@ -774,20 +807,28 @@ def _create_metadata_v2( chunks: ChunkCoords, order: MemoryOrder, dimension_separator: Literal[".", "/"] | None = None, - fill_value: Any | None = None, + fill_value: Any | None = DEFAULT_FILL_VALUE, filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None, compressor: CompressorLikev2 = None, attributes: dict[str, JSON] | None = None, ) -> ArrayV2Metadata: if dimension_separator is None: dimension_separator = "." + + # Handle DefaultFillValue sentinel + if isinstance(fill_value, DefaultFillValue): + fill_value_parsed: Any = dtype.default_scalar() + else: + # For v2, preserve None as-is (backward compatibility) + fill_value_parsed = fill_value + return ArrayV2Metadata( shape=shape, dtype=dtype, chunks=chunks, order=order, dimension_separator=dimension_separator, - fill_value=fill_value, + fill_value=fill_value_parsed, compressor=compressor, filters=filters, attributes=attributes, @@ -804,7 +845,7 @@ async def _create_v2( order: MemoryOrder, config: ArrayConfig, dimension_separator: Literal[".", "/"] | None = None, - fill_value: Any | None = None, + fill_value: Any | None = DEFAULT_FILL_VALUE, filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None, compressor: CompressorLike = "auto", attributes: dict[str, JSON] | None = None, @@ -1748,7 +1789,7 @@ def create( shape: ChunkCoords, dtype: ZDTypeLike, zarr_format: ZarrFormat = 3, - fill_value: Any | None = None, + fill_value: Any | None = DEFAULT_FILL_VALUE, attributes: dict[str, JSON] | None = None, # v3 only chunk_shape: ChunkCoords | None = None, @@ -1877,7 +1918,7 @@ def _create( shape: ChunkCoords, dtype: ZDTypeLike, zarr_format: ZarrFormat = 3, - fill_value: Any | None = None, + fill_value: Any | None = DEFAULT_FILL_VALUE, attributes: dict[str, JSON] | None = None, # v3 only chunk_shape: ChunkCoords | None = None, @@ -3834,7 +3875,7 @@ async def from_array( filters: FiltersLike | Literal["keep"] = "keep", compressors: CompressorsLike | Literal["keep"] = "keep", serializer: SerializerLike | Literal["keep"] = "keep", - fill_value: Any | None = None, + fill_value: Any | None = DEFAULT_FILL_VALUE, order: MemoryOrder | None = None, zarr_format: ZarrFormat | None = None, attributes: dict[str, JSON] | None = None, @@ -4096,7 +4137,7 @@ async def init_array( filters: FiltersLike = "auto", compressors: CompressorsLike = "auto", serializer: SerializerLike = "auto", - fill_value: Any | None = None, + fill_value: Any | None = DEFAULT_FILL_VALUE, order: MemoryOrder | None = None, zarr_format: ZarrFormat | None = 3, attributes: dict[str, JSON] | None = None, @@ -4317,7 +4358,7 @@ async def create_array( filters: FiltersLike = "auto", compressors: CompressorsLike = "auto", serializer: SerializerLike = "auto", - fill_value: Any | None = None, + fill_value: Any | None = DEFAULT_FILL_VALUE, order: MemoryOrder | None = None, zarr_format: ZarrFormat | None = 3, attributes: dict[str, JSON] | None = None, From 61a1d0184a46400f8165444e9b4753e4803292c1 Mon Sep 17 00:00:00 2001 From: Ian Hunt-Isaak Date: Wed, 2 Jul 2025 17:08:52 -0400 Subject: [PATCH 3/9] simplify default fill value class --- src/zarr/core/array.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 0abe3a2874..3a8f8896fd 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -159,19 +159,9 @@ class DefaultFillValue: - fill_value = None: fill_value saved as null This allows backwards compatibility with zarr format 2. - - This is implemented as a singleton. """ - _instance = None - - def __new__(cls) -> Self: - if cls._instance is None: - cls._instance = super().__new__(cls) - return cls._instance - -# Create the singleton instance DEFAULT_FILL_VALUE = DefaultFillValue() From 96e013b6d89e607ca48747f1fbb817237229987c Mon Sep 17 00:00:00 2001 From: Ian Hunt-Isaak Date: Wed, 2 Jul 2025 17:39:12 -0400 Subject: [PATCH 4/9] test: add test of None backwards compat --- tests/test_array.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/test_array.py b/tests/test_array.py index bc27a30593..b5878f1068 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -1012,6 +1012,28 @@ def test_default_fill_value(dtype: ZDType[Any, Any], store: Store) -> None: else: assert a.fill_value == dtype.default_scalar() + @staticmethod + # @pytest.mark.parametrize("zarr_format", [2, 3]) + @pytest.mark.parametrize("dtype", zdtype_examples) + @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") + def test_default_fill_value_None( + dtype: ZDType[Any, Any], store: Store, zarr_format: ZarrFormat + ) -> None: + """ + Test that the fill value of an array is set to the default value for an explicit None arguement for + Zarr Format 3, and to null for Zarr Format 2 + """ + a = zarr.create_array( + store, shape=(5,), chunks=(5,), dtype=dtype, fill_value=None, zarr_format=zarr_format + ) + if zarr_format == 3: + if isinstance(dtype, DateTime64 | TimeDelta64) and np.isnat(a.fill_value): + assert np.isnat(dtype.default_scalar()) + else: + assert a.fill_value == dtype.default_scalar() + elif zarr_format == 2: + assert a.fill_value is None + @staticmethod @pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning") @pytest.mark.parametrize("dtype", zdtype_examples) From 15f9e3f84fe6d5836fc883afca33c59fa1eed1f1 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 3 Jul 2025 10:11:39 +0200 Subject: [PATCH 5/9] Update tests/test_array.py --- tests/test_array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_array.py b/tests/test_array.py index b5878f1068..fe23bc1284 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -1020,7 +1020,7 @@ def test_default_fill_value_None( dtype: ZDType[Any, Any], store: Store, zarr_format: ZarrFormat ) -> None: """ - Test that the fill value of an array is set to the default value for an explicit None arguement for + Test that the fill value of an array is set to the default value for an explicit None argument for Zarr Format 3, and to null for Zarr Format 2 """ a = zarr.create_array( From 879d36b741c4ee163320c60e5f266f4f5b5adb27 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 3 Jul 2025 10:35:16 +0200 Subject: [PATCH 6/9] Update src/zarr/core/array.py --- src/zarr/core/array.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 3a8f8896fd..6349ae41e7 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -152,13 +152,12 @@ class DefaultFillValue: """ Sentinel class to indicate that the default fill value should be used. - This class is used to distinguish between: - Zarr Format 3: - - fill_value = None: dtype default value - Zarr Format 2: - - fill_value = None: fill_value saved as null - - This allows backwards compatibility with zarr format 2. + This class exists because the normal values used to convey "defaultness" + like ``None`` or ``"auto"` do not work for the fill value parameter of a Zarr array. + ``None`` is a valid fill value for Zarr V2 (resulting in ``"fill_value: null"`` in array metadata). + A string like ``"auto"`` is a valid fill value for an array with a string data type. + But an instance of this class lies outside the space of valid fill values, which makes it + suitable for expressing that the default fill value should be used. """ From 7e1d988c9bc44ece4df6a91fc59fa12669bc32b5 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 3 Jul 2025 10:36:09 +0200 Subject: [PATCH 7/9] Update src/zarr/core/array.py --- src/zarr/core/array.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 6349ae41e7..7a6fdbdd36 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -152,12 +152,12 @@ class DefaultFillValue: """ Sentinel class to indicate that the default fill value should be used. - This class exists because the normal values used to convey "defaultness" - like ``None`` or ``"auto"` do not work for the fill value parameter of a Zarr array. - ``None`` is a valid fill value for Zarr V2 (resulting in ``"fill_value: null"`` in array metadata). - A string like ``"auto"`` is a valid fill value for an array with a string data type. - But an instance of this class lies outside the space of valid fill values, which makes it - suitable for expressing that the default fill value should be used. + This class exists because the normal values used to convey "defaultness" + like ``None`` or ``"auto"` do not work for the fill value parameter of a Zarr array. + ``None`` is a valid fill value for Zarr V2 (resulting in ``"fill_value: null"`` in array metadata). + A string like ``"auto"`` is a valid fill value for an array with a string data type. + But an instance of this class lies outside the space of valid fill values, which makes it + suitable for expressing that the default fill value should be used. """ From 2b605ad6286706fba98aeb567a94abc696261771 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Thu, 3 Jul 2025 10:51:30 +0200 Subject: [PATCH 8/9] changelog --- changes/3198.bugfix.rst | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 changes/3198.bugfix.rst diff --git a/changes/3198.bugfix.rst b/changes/3198.bugfix.rst new file mode 100644 index 0000000000..840996641c --- /dev/null +++ b/changes/3198.bugfix.rst @@ -0,0 +1,4 @@ +Restores the ability to create a Zarr V2 array with a ``null`` fill value by introducing a new +class ``DefaultFillValue``, and setting the default value of the ``fill_value`` parameter in array +creation routines to an instance of ``DefaultFillValue``. For Zarr V3 arrays, ``None`` will act as an +alias for a ``DefaultFillValue`` instance, thus preserving compatibility with existing code. \ No newline at end of file From e767be5824de01b131f9954e58565a24c065d54b Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Thu, 3 Jul 2025 10:57:56 +0200 Subject: [PATCH 9/9] docstring --- src/zarr/core/array.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 7a6fdbdd36..a44a4b55d1 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -152,12 +152,14 @@ class DefaultFillValue: """ Sentinel class to indicate that the default fill value should be used. - This class exists because the normal values used to convey "defaultness" - like ``None`` or ``"auto"` do not work for the fill value parameter of a Zarr array. - ``None`` is a valid fill value for Zarr V2 (resulting in ``"fill_value: null"`` in array metadata). - A string like ``"auto"`` is a valid fill value for an array with a string data type. - But an instance of this class lies outside the space of valid fill values, which makes it - suitable for expressing that the default fill value should be used. + This class exists because conventional values used to convey "defaultness" like ``None`` or + ``"auto"` are ambiguous when specifying the fill value parameter of a Zarr array. + The value ``None`` is ambiguous because it is a valid fill value for Zarr V2 + (resulting in ``"fill_value": null`` in array metadata). + A string like ``"auto"`` is ambiguous because such a string is a valid fill value for an array + with a string data type. + An instance of this class lies outside the space of valid fill values, which means it can + umambiguously express that the default fill value should be used. """