Skip to content
Merged
4 changes: 4 additions & 0 deletions changes/3198.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Restores the ability to create a Zarr V2 array with a ``null`` fill value by introducing a new
class ``DefaultFillValue``, and setting the default value of the ``fill_value`` parameter in array
creation routines to an instance of ``DefaultFillValue``. For Zarr V3 arrays, ``None`` will act as an
alias for a ``DefaultFillValue`` instance, thus preserving compatibility with existing code.
8 changes: 4 additions & 4 deletions src/zarr/api/synchronous.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import zarr.api.asynchronous as async_api
import zarr.core.array
from zarr._compat import _deprecate_positional_args
from zarr.core.array import Array, AsyncArray, CompressorLike
from zarr.core.array import DEFAULT_FILL_VALUE, Array, AsyncArray, CompressorLike
from zarr.core.group import Group
from zarr.core.sync import sync
from zarr.core.sync_group import create_hierarchy
Expand Down Expand Up @@ -606,7 +606,7 @@ def create(
chunks: ChunkCoords | int | bool | None = None,
dtype: ZDTypeLike | None = None,
compressor: CompressorLike = "auto",
fill_value: Any | None = None, # TODO: need type
fill_value: Any | None = DEFAULT_FILL_VALUE, # TODO: need type
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for posterity, we now have a type alias for the scalar types of all the dtypes which we can use instead of Any here

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

although maybe we should just use object, since we do want to allow inputs to be castable

order: MemoryOrder | None = None,
store: str | StoreLike | None = None,
synchronizer: Any | None = None,
Expand Down Expand Up @@ -763,7 +763,7 @@ def create_array(
filters: FiltersLike = "auto",
compressors: CompressorsLike = "auto",
serializer: SerializerLike = "auto",
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
order: MemoryOrder | None = None,
zarr_format: ZarrFormat | None = 3,
attributes: dict[str, JSON] | None = None,
Expand Down Expand Up @@ -929,7 +929,7 @@ def from_array(
filters: FiltersLike | Literal["keep"] = "keep",
compressors: CompressorsLike | Literal["keep"] = "keep",
serializer: SerializerLike | Literal["keep"] = "keep",
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
order: MemoryOrder | None = None,
zarr_format: ZarrFormat | None = None,
attributes: dict[str, JSON] | None = None,
Expand Down
72 changes: 51 additions & 21 deletions src/zarr/core/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,11 +138,34 @@


# Array and AsyncArray are defined in the base ``zarr`` namespace
__all__ = ["create_codec_pipeline", "parse_array_metadata"]
__all__ = [
"DEFAULT_FILL_VALUE",
"DefaultFillValue",
"create_codec_pipeline",
"parse_array_metadata",
]

logger = getLogger(__name__)


class DefaultFillValue:
"""
Sentinel class to indicate that the default fill value should be used.

This class exists because conventional values used to convey "defaultness" like ``None`` or
``"auto"` are ambiguous when specifying the fill value parameter of a Zarr array.
The value ``None`` is ambiguous because it is a valid fill value for Zarr V2
(resulting in ``"fill_value": null`` in array metadata).
A string like ``"auto"`` is ambiguous because such a string is a valid fill value for an array
with a string data type.
An instance of this class lies outside the space of valid fill values, which means it can
umambiguously express that the default fill value should be used.
"""


DEFAULT_FILL_VALUE = DefaultFillValue()


def parse_array_metadata(data: Any) -> ArrayMetadata:
if isinstance(data, ArrayMetadata):
return data
Expand Down Expand Up @@ -296,7 +319,7 @@
shape: ShapeLike,
dtype: ZDTypeLike,
zarr_format: Literal[2],
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
attributes: dict[str, JSON] | None = None,
chunks: ShapeLike | None = None,
dimension_separator: Literal[".", "/"] | None = None,
Expand All @@ -320,7 +343,7 @@
shape: ShapeLike,
dtype: ZDTypeLike,
zarr_format: Literal[3],
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
attributes: dict[str, JSON] | None = None,
# v3 only
chunk_shape: ShapeLike | None = None,
Expand Down Expand Up @@ -348,7 +371,7 @@
shape: ShapeLike,
dtype: ZDTypeLike,
zarr_format: Literal[3] = 3,
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
attributes: dict[str, JSON] | None = None,
# v3 only
chunk_shape: ShapeLike | None = None,
Expand Down Expand Up @@ -376,7 +399,7 @@
shape: ShapeLike,
dtype: ZDTypeLike,
zarr_format: ZarrFormat,
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
attributes: dict[str, JSON] | None = None,
# v3 only
chunk_shape: ShapeLike | None = None,
Expand Down Expand Up @@ -411,7 +434,7 @@
shape: ShapeLike,
dtype: ZDTypeLike,
zarr_format: ZarrFormat = 3,
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
attributes: dict[str, JSON] | None = None,
# v3 only
chunk_shape: ShapeLike | None = None,
Expand Down Expand Up @@ -552,7 +575,7 @@
shape: ShapeLike,
dtype: ZDTypeLike | ZDType[TBaseDType, TBaseScalar],
zarr_format: ZarrFormat = 3,
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
attributes: dict[str, JSON] | None = None,
# v3 only
chunk_shape: ShapeLike | None = None,
Expand Down Expand Up @@ -673,7 +696,7 @@
shape: ShapeLike,
dtype: ZDType[TBaseDType, TBaseScalar],
chunk_shape: ChunkCoords,
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
chunk_key_encoding: ChunkKeyEncodingLike | None = None,
codecs: Iterable[Codec | dict[str, JSON]] | None = None,
dimension_names: DimensionNames = None,
Expand All @@ -698,8 +721,9 @@
else:
chunk_key_encoding_parsed = chunk_key_encoding

if fill_value is None:
# v3 spec will not allow a null fill value
if isinstance(fill_value, DefaultFillValue) or fill_value is None:
# Use dtype's default scalar for DefaultFillValue sentinel
# For v3, None is converted to DefaultFillValue behavior
fill_value_parsed = dtype.default_scalar()
else:
fill_value_parsed = fill_value
Expand All @@ -725,7 +749,7 @@
dtype: ZDType[TBaseDType, TBaseScalar],
chunk_shape: ChunkCoords,
config: ArrayConfig,
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
chunk_key_encoding: (
ChunkKeyEncodingLike
| tuple[Literal["default"], Literal[".", "/"]]
Expand Down Expand Up @@ -774,22 +798,28 @@
chunks: ChunkCoords,
order: MemoryOrder,
dimension_separator: Literal[".", "/"] | None = None,
fill_value: float | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None,
compressor: CompressorLikev2 = None,
attributes: dict[str, JSON] | None = None,
) -> ArrayV2Metadata:
if dimension_separator is None:
dimension_separator = "."
if fill_value is None:
fill_value = dtype.default_scalar() # type: ignore[assignment]

# Handle DefaultFillValue sentinel
if isinstance(fill_value, DefaultFillValue):
fill_value_parsed: Any = dtype.default_scalar()

Check warning on line 811 in src/zarr/core/array.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/core/array.py#L811

Added line #L811 was not covered by tests
else:
# For v2, preserve None as-is (backward compatibility)
fill_value_parsed = fill_value

return ArrayV2Metadata(
shape=shape,
dtype=dtype,
chunks=chunks,
order=order,
dimension_separator=dimension_separator,
fill_value=fill_value,
fill_value=fill_value_parsed,
compressor=compressor,
filters=filters,
attributes=attributes,
Expand All @@ -806,7 +836,7 @@
order: MemoryOrder,
config: ArrayConfig,
dimension_separator: Literal[".", "/"] | None = None,
fill_value: float | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None,
compressor: CompressorLike = "auto",
attributes: dict[str, JSON] | None = None,
Expand Down Expand Up @@ -1750,7 +1780,7 @@
shape: ChunkCoords,
dtype: ZDTypeLike,
zarr_format: ZarrFormat = 3,
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
attributes: dict[str, JSON] | None = None,
# v3 only
chunk_shape: ChunkCoords | None = None,
Expand Down Expand Up @@ -1879,7 +1909,7 @@
shape: ChunkCoords,
dtype: ZDTypeLike,
zarr_format: ZarrFormat = 3,
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
attributes: dict[str, JSON] | None = None,
# v3 only
chunk_shape: ChunkCoords | None = None,
Expand Down Expand Up @@ -3836,7 +3866,7 @@
filters: FiltersLike | Literal["keep"] = "keep",
compressors: CompressorsLike | Literal["keep"] = "keep",
serializer: SerializerLike | Literal["keep"] = "keep",
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
order: MemoryOrder | None = None,
zarr_format: ZarrFormat | None = None,
attributes: dict[str, JSON] | None = None,
Expand Down Expand Up @@ -4098,7 +4128,7 @@
filters: FiltersLike = "auto",
compressors: CompressorsLike = "auto",
serializer: SerializerLike = "auto",
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
order: MemoryOrder | None = None,
zarr_format: ZarrFormat | None = 3,
attributes: dict[str, JSON] | None = None,
Expand Down Expand Up @@ -4319,7 +4349,7 @@
filters: FiltersLike = "auto",
compressors: CompressorsLike = "auto",
serializer: SerializerLike = "auto",
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
order: MemoryOrder | None = None,
zarr_format: ZarrFormat | None = 3,
attributes: dict[str, JSON] | None = None,
Expand Down
22 changes: 22 additions & 0 deletions tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1012,6 +1012,28 @@ def test_default_fill_value(dtype: ZDType[Any, Any], store: Store) -> None:
else:
assert a.fill_value == dtype.default_scalar()

@staticmethod
# @pytest.mark.parametrize("zarr_format", [2, 3])
@pytest.mark.parametrize("dtype", zdtype_examples)
@pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
def test_default_fill_value_None(
dtype: ZDType[Any, Any], store: Store, zarr_format: ZarrFormat
) -> None:
"""
Test that the fill value of an array is set to the default value for an explicit None argument for
Zarr Format 3, and to null for Zarr Format 2
"""
a = zarr.create_array(
store, shape=(5,), chunks=(5,), dtype=dtype, fill_value=None, zarr_format=zarr_format
)
if zarr_format == 3:
if isinstance(dtype, DateTime64 | TimeDelta64) and np.isnat(a.fill_value):
assert np.isnat(dtype.default_scalar())
else:
assert a.fill_value == dtype.default_scalar()
elif zarr_format == 2:
assert a.fill_value is None

@staticmethod
@pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
@pytest.mark.parametrize("dtype", zdtype_examples)
Expand Down