Skip to content
8 changes: 4 additions & 4 deletions src/zarr/api/synchronous.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import zarr.api.asynchronous as async_api
import zarr.core.array
from zarr._compat import _deprecate_positional_args
from zarr.core.array import Array, AsyncArray, CompressorLike
from zarr.core.array import DEFAULT_FILL_VALUE, Array, AsyncArray, CompressorLike
from zarr.core.group import Group
from zarr.core.sync import sync
from zarr.core.sync_group import create_hierarchy
Expand Down Expand Up @@ -606,7 +606,7 @@ def create(
chunks: ChunkCoords | int | bool | None = None,
dtype: ZDTypeLike | None = None,
compressor: CompressorLike = "auto",
fill_value: Any | None = None, # TODO: need type
fill_value: Any | None = DEFAULT_FILL_VALUE, # TODO: need type
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for posterity, we now have a type alias for the scalar types of all the dtypes which we can use instead of Any here

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

although maybe we should just use object, since we do want to allow inputs to be castable

order: MemoryOrder | None = None,
store: str | StoreLike | None = None,
synchronizer: Any | None = None,
Expand Down Expand Up @@ -763,7 +763,7 @@ def create_array(
filters: FiltersLike = "auto",
compressors: CompressorsLike = "auto",
serializer: SerializerLike = "auto",
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
order: MemoryOrder | None = None,
zarr_format: ZarrFormat | None = 3,
attributes: dict[str, JSON] | None = None,
Expand Down Expand Up @@ -929,7 +929,7 @@ def from_array(
filters: FiltersLike | Literal["keep"] = "keep",
compressors: CompressorsLike | Literal["keep"] = "keep",
serializer: SerializerLike | Literal["keep"] = "keep",
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
order: MemoryOrder | None = None,
zarr_format: ZarrFormat | None = None,
attributes: dict[str, JSON] | None = None,
Expand Down
71 changes: 50 additions & 21 deletions src/zarr/core/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,11 +138,33 @@


# Array and AsyncArray are defined in the base ``zarr`` namespace
__all__ = ["create_codec_pipeline", "parse_array_metadata"]
__all__ = [
"DEFAULT_FILL_VALUE",
"DefaultFillValue",
"create_codec_pipeline",
"parse_array_metadata",
]

logger = getLogger(__name__)


class DefaultFillValue:
"""
Sentinel class to indicate that the default fill value should be used.
This class is used to distinguish between:
Zarr Format 3:
- fill_value = None: dtype default value
Zarr Format 2:
- fill_value = None: fill_value saved as null
This allows backwards compatibility with zarr format 2.
"""


DEFAULT_FILL_VALUE = DefaultFillValue()


def parse_array_metadata(data: Any) -> ArrayMetadata:
if isinstance(data, ArrayMetadata):
return data
Expand Down Expand Up @@ -296,7 +318,7 @@
shape: ShapeLike,
dtype: ZDTypeLike,
zarr_format: Literal[2],
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
attributes: dict[str, JSON] | None = None,
chunks: ShapeLike | None = None,
dimension_separator: Literal[".", "/"] | None = None,
Expand All @@ -320,7 +342,7 @@
shape: ShapeLike,
dtype: ZDTypeLike,
zarr_format: Literal[3],
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
attributes: dict[str, JSON] | None = None,
# v3 only
chunk_shape: ShapeLike | None = None,
Expand Down Expand Up @@ -348,7 +370,7 @@
shape: ShapeLike,
dtype: ZDTypeLike,
zarr_format: Literal[3] = 3,
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
attributes: dict[str, JSON] | None = None,
# v3 only
chunk_shape: ShapeLike | None = None,
Expand Down Expand Up @@ -376,7 +398,7 @@
shape: ShapeLike,
dtype: ZDTypeLike,
zarr_format: ZarrFormat,
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
attributes: dict[str, JSON] | None = None,
# v3 only
chunk_shape: ShapeLike | None = None,
Expand Down Expand Up @@ -411,7 +433,7 @@
shape: ShapeLike,
dtype: ZDTypeLike,
zarr_format: ZarrFormat = 3,
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
attributes: dict[str, JSON] | None = None,
# v3 only
chunk_shape: ShapeLike | None = None,
Expand Down Expand Up @@ -552,7 +574,7 @@
shape: ShapeLike,
dtype: ZDTypeLike | ZDType[TBaseDType, TBaseScalar],
zarr_format: ZarrFormat = 3,
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
attributes: dict[str, JSON] | None = None,
# v3 only
chunk_shape: ShapeLike | None = None,
Expand Down Expand Up @@ -673,7 +695,7 @@
shape: ShapeLike,
dtype: ZDType[TBaseDType, TBaseScalar],
chunk_shape: ChunkCoords,
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
chunk_key_encoding: ChunkKeyEncodingLike | None = None,
codecs: Iterable[Codec | dict[str, JSON]] | None = None,
dimension_names: DimensionNames = None,
Expand All @@ -698,8 +720,9 @@
else:
chunk_key_encoding_parsed = chunk_key_encoding

if fill_value is None:
# v3 spec will not allow a null fill value
if isinstance(fill_value, DefaultFillValue) or fill_value is None:
# Use dtype's default scalar for DefaultFillValue sentinel
# For v3, None is converted to DefaultFillValue behavior
fill_value_parsed = dtype.default_scalar()
else:
fill_value_parsed = fill_value
Expand All @@ -725,7 +748,7 @@
dtype: ZDType[TBaseDType, TBaseScalar],
chunk_shape: ChunkCoords,
config: ArrayConfig,
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
chunk_key_encoding: (
ChunkKeyEncodingLike
| tuple[Literal["default"], Literal[".", "/"]]
Expand Down Expand Up @@ -774,22 +797,28 @@
chunks: ChunkCoords,
order: MemoryOrder,
dimension_separator: Literal[".", "/"] | None = None,
fill_value: float | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None,
compressor: CompressorLikev2 = None,
attributes: dict[str, JSON] | None = None,
) -> ArrayV2Metadata:
if dimension_separator is None:
dimension_separator = "."
if fill_value is None:
fill_value = dtype.default_scalar() # type: ignore[assignment]

# Handle DefaultFillValue sentinel
if isinstance(fill_value, DefaultFillValue):
fill_value_parsed: Any = dtype.default_scalar()

Check warning on line 810 in src/zarr/core/array.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/core/array.py#L810

Added line #L810 was not covered by tests
else:
# For v2, preserve None as-is (backward compatibility)
fill_value_parsed = fill_value

return ArrayV2Metadata(
shape=shape,
dtype=dtype,
chunks=chunks,
order=order,
dimension_separator=dimension_separator,
fill_value=fill_value,
fill_value=fill_value_parsed,
compressor=compressor,
filters=filters,
attributes=attributes,
Expand All @@ -806,7 +835,7 @@
order: MemoryOrder,
config: ArrayConfig,
dimension_separator: Literal[".", "/"] | None = None,
fill_value: float | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None,
compressor: CompressorLike = "auto",
attributes: dict[str, JSON] | None = None,
Expand Down Expand Up @@ -1750,7 +1779,7 @@
shape: ChunkCoords,
dtype: ZDTypeLike,
zarr_format: ZarrFormat = 3,
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
attributes: dict[str, JSON] | None = None,
# v3 only
chunk_shape: ChunkCoords | None = None,
Expand Down Expand Up @@ -1879,7 +1908,7 @@
shape: ChunkCoords,
dtype: ZDTypeLike,
zarr_format: ZarrFormat = 3,
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
attributes: dict[str, JSON] | None = None,
# v3 only
chunk_shape: ChunkCoords | None = None,
Expand Down Expand Up @@ -3836,7 +3865,7 @@
filters: FiltersLike | Literal["keep"] = "keep",
compressors: CompressorsLike | Literal["keep"] = "keep",
serializer: SerializerLike | Literal["keep"] = "keep",
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
order: MemoryOrder | None = None,
zarr_format: ZarrFormat | None = None,
attributes: dict[str, JSON] | None = None,
Expand Down Expand Up @@ -4098,7 +4127,7 @@
filters: FiltersLike = "auto",
compressors: CompressorsLike = "auto",
serializer: SerializerLike = "auto",
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
order: MemoryOrder | None = None,
zarr_format: ZarrFormat | None = 3,
attributes: dict[str, JSON] | None = None,
Expand Down Expand Up @@ -4319,7 +4348,7 @@
filters: FiltersLike = "auto",
compressors: CompressorsLike = "auto",
serializer: SerializerLike = "auto",
fill_value: Any | None = None,
fill_value: Any | None = DEFAULT_FILL_VALUE,
order: MemoryOrder | None = None,
zarr_format: ZarrFormat | None = 3,
attributes: dict[str, JSON] | None = None,
Expand Down
22 changes: 22 additions & 0 deletions tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1012,6 +1012,28 @@ def test_default_fill_value(dtype: ZDType[Any, Any], store: Store) -> None:
else:
assert a.fill_value == dtype.default_scalar()

@staticmethod
# @pytest.mark.parametrize("zarr_format", [2, 3])
@pytest.mark.parametrize("dtype", zdtype_examples)
@pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
def test_default_fill_value_None(
dtype: ZDType[Any, Any], store: Store, zarr_format: ZarrFormat
) -> None:
"""
Test that the fill value of an array is set to the default value for an explicit None arguement for
Zarr Format 3, and to null for Zarr Format 2
"""
a = zarr.create_array(
store, shape=(5,), chunks=(5,), dtype=dtype, fill_value=None, zarr_format=zarr_format
)
if zarr_format == 3:
if isinstance(dtype, DateTime64 | TimeDelta64) and np.isnat(a.fill_value):
assert np.isnat(dtype.default_scalar())
else:
assert a.fill_value == dtype.default_scalar()
elif zarr_format == 2:
assert a.fill_value is None

@staticmethod
@pytest.mark.filterwarnings("ignore::zarr.core.dtype.common.UnstableSpecificationWarning")
@pytest.mark.parametrize("dtype", zdtype_examples)
Expand Down