Skip to content

Commit 7328205

Browse files
committed
Fix: Restore Zarr Format 2 default fill value behavior
Introduce a new Singleton sentinel class to indicate that the default scalar for the dtype should be used as the fill_value. This allows for preserving the zarr_format 2 behavior of None -> null. For zarr format 3 either the DefaulFillValue or None imply using the dtype default scalar value.
1 parent 544e883 commit 7328205

File tree

2 files changed

+64
-23
lines changed

2 files changed

+64
-23
lines changed

src/zarr/api/synchronous.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import zarr.api.asynchronous as async_api
88
import zarr.core.array
99
from zarr._compat import _deprecate_positional_args
10-
from zarr.core.array import Array, AsyncArray, CompressorLike
10+
from zarr.core.array import DEFAULT_FILL_VALUE, Array, AsyncArray, CompressorLike
1111
from zarr.core.group import Group
1212
from zarr.core.sync import sync
1313
from zarr.core.sync_group import create_hierarchy
@@ -606,7 +606,7 @@ def create(
606606
chunks: ChunkCoords | int | bool | None = None,
607607
dtype: ZDTypeLike | None = None,
608608
compressor: CompressorLike = "auto",
609-
fill_value: Any | None = None, # TODO: need type
609+
fill_value: Any | None = DEFAULT_FILL_VALUE, # TODO: need type
610610
order: MemoryOrder | None = None,
611611
store: str | StoreLike | None = None,
612612
synchronizer: Any | None = None,
@@ -763,7 +763,7 @@ def create_array(
763763
filters: FiltersLike = "auto",
764764
compressors: CompressorsLike = "auto",
765765
serializer: SerializerLike = "auto",
766-
fill_value: Any | None = None,
766+
fill_value: Any | None = DEFAULT_FILL_VALUE,
767767
order: MemoryOrder | None = None,
768768
zarr_format: ZarrFormat | None = 3,
769769
attributes: dict[str, JSON] | None = None,
@@ -929,7 +929,7 @@ def from_array(
929929
filters: FiltersLike | Literal["keep"] = "keep",
930930
compressors: CompressorsLike | Literal["keep"] = "keep",
931931
serializer: SerializerLike | Literal["keep"] = "keep",
932-
fill_value: Any | None = None,
932+
fill_value: Any | None = DEFAULT_FILL_VALUE,
933933
order: MemoryOrder | None = None,
934934
zarr_format: ZarrFormat | None = None,
935935
attributes: dict[str, JSON] | None = None,

src/zarr/core/array.py

Lines changed: 60 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -138,11 +138,43 @@
138138

139139

140140
# Array and AsyncArray are defined in the base ``zarr`` namespace
141-
__all__ = ["create_codec_pipeline", "parse_array_metadata"]
141+
__all__ = [
142+
"DEFAULT_FILL_VALUE",
143+
"DefaultFillValue",
144+
"create_codec_pipeline",
145+
"parse_array_metadata",
146+
]
142147

143148
logger = getLogger(__name__)
144149

145150

151+
class DefaultFillValue:
152+
"""
153+
Sentinel class to indicate that the default fill value should be used.
154+
155+
This class is used to distinguish between:
156+
Zarr Format 3:
157+
- fill_value = None: dtype default value
158+
Zarr Format 2:
159+
- fill_value = None: fill_value saved as null
160+
161+
This allows backwards compatibility with zarr format 2.
162+
163+
This is implemented as a singleton.
164+
"""
165+
166+
_instance = None
167+
168+
def __new__(cls) -> Self:
169+
if cls._instance is None:
170+
cls._instance = super().__new__(cls)
171+
return cls._instance
172+
173+
174+
# Create the singleton instance
175+
DEFAULT_FILL_VALUE = DefaultFillValue()
176+
177+
146178
def parse_array_metadata(data: Any) -> ArrayMetadata:
147179
if isinstance(data, ArrayMetadata):
148180
return data
@@ -296,7 +328,7 @@ async def create(
296328
shape: ShapeLike,
297329
dtype: ZDTypeLike,
298330
zarr_format: Literal[2],
299-
fill_value: Any | None = None,
331+
fill_value: Any | None = DEFAULT_FILL_VALUE,
300332
attributes: dict[str, JSON] | None = None,
301333
chunks: ShapeLike | None = None,
302334
dimension_separator: Literal[".", "/"] | None = None,
@@ -320,7 +352,7 @@ async def create(
320352
shape: ShapeLike,
321353
dtype: ZDTypeLike,
322354
zarr_format: Literal[3],
323-
fill_value: Any | None = None,
355+
fill_value: Any | None = DEFAULT_FILL_VALUE,
324356
attributes: dict[str, JSON] | None = None,
325357
# v3 only
326358
chunk_shape: ShapeLike | None = None,
@@ -348,7 +380,7 @@ async def create(
348380
shape: ShapeLike,
349381
dtype: ZDTypeLike,
350382
zarr_format: Literal[3] = 3,
351-
fill_value: Any | None = None,
383+
fill_value: Any | None = DEFAULT_FILL_VALUE,
352384
attributes: dict[str, JSON] | None = None,
353385
# v3 only
354386
chunk_shape: ShapeLike | None = None,
@@ -376,7 +408,7 @@ async def create(
376408
shape: ShapeLike,
377409
dtype: ZDTypeLike,
378410
zarr_format: ZarrFormat,
379-
fill_value: Any | None = None,
411+
fill_value: Any | None = DEFAULT_FILL_VALUE,
380412
attributes: dict[str, JSON] | None = None,
381413
# v3 only
382414
chunk_shape: ShapeLike | None = None,
@@ -411,7 +443,7 @@ async def create(
411443
shape: ShapeLike,
412444
dtype: ZDTypeLike,
413445
zarr_format: ZarrFormat = 3,
414-
fill_value: Any | None = None,
446+
fill_value: Any | None = DEFAULT_FILL_VALUE,
415447
attributes: dict[str, JSON] | None = None,
416448
# v3 only
417449
chunk_shape: ShapeLike | None = None,
@@ -552,7 +584,7 @@ async def _create(
552584
shape: ShapeLike,
553585
dtype: ZDTypeLike | ZDType[TBaseDType, TBaseScalar],
554586
zarr_format: ZarrFormat = 3,
555-
fill_value: Any | None = None,
587+
fill_value: Any | None = DEFAULT_FILL_VALUE,
556588
attributes: dict[str, JSON] | None = None,
557589
# v3 only
558590
chunk_shape: ShapeLike | None = None,
@@ -673,7 +705,7 @@ def _create_metadata_v3(
673705
shape: ShapeLike,
674706
dtype: ZDType[TBaseDType, TBaseScalar],
675707
chunk_shape: ChunkCoords,
676-
fill_value: Any | None = None,
708+
fill_value: Any | None = DEFAULT_FILL_VALUE,
677709
chunk_key_encoding: ChunkKeyEncodingLike | None = None,
678710
codecs: Iterable[Codec | dict[str, JSON]] | None = None,
679711
dimension_names: DimensionNames = None,
@@ -698,8 +730,9 @@ def _create_metadata_v3(
698730
else:
699731
chunk_key_encoding_parsed = chunk_key_encoding
700732

701-
if fill_value is None:
702-
# v3 spec will not allow a null fill value
733+
if isinstance(fill_value, DefaultFillValue) or fill_value is None:
734+
# Use dtype's default scalar for DefaultFillValue sentinel
735+
# For v3, None is converted to DefaultFillValue behavior
703736
fill_value_parsed = dtype.default_scalar()
704737
else:
705738
fill_value_parsed = fill_value
@@ -725,7 +758,7 @@ async def _create_v3(
725758
dtype: ZDType[TBaseDType, TBaseScalar],
726759
chunk_shape: ChunkCoords,
727760
config: ArrayConfig,
728-
fill_value: Any | None = None,
761+
fill_value: Any | None = DEFAULT_FILL_VALUE,
729762
chunk_key_encoding: (
730763
ChunkKeyEncodingLike
731764
| tuple[Literal["default"], Literal[".", "/"]]
@@ -774,20 +807,28 @@ def _create_metadata_v2(
774807
chunks: ChunkCoords,
775808
order: MemoryOrder,
776809
dimension_separator: Literal[".", "/"] | None = None,
777-
fill_value: Any | None = None,
810+
fill_value: Any | None = DEFAULT_FILL_VALUE,
778811
filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None,
779812
compressor: CompressorLikev2 = None,
780813
attributes: dict[str, JSON] | None = None,
781814
) -> ArrayV2Metadata:
782815
if dimension_separator is None:
783816
dimension_separator = "."
817+
818+
# Handle DefaultFillValue sentinel
819+
if isinstance(fill_value, DefaultFillValue):
820+
fill_value_parsed: Any = dtype.default_scalar()
821+
else:
822+
# For v2, preserve None as-is (backward compatibility)
823+
fill_value_parsed = fill_value
824+
784825
return ArrayV2Metadata(
785826
shape=shape,
786827
dtype=dtype,
787828
chunks=chunks,
788829
order=order,
789830
dimension_separator=dimension_separator,
790-
fill_value=fill_value,
831+
fill_value=fill_value_parsed,
791832
compressor=compressor,
792833
filters=filters,
793834
attributes=attributes,
@@ -804,7 +845,7 @@ async def _create_v2(
804845
order: MemoryOrder,
805846
config: ArrayConfig,
806847
dimension_separator: Literal[".", "/"] | None = None,
807-
fill_value: Any | None = None,
848+
fill_value: Any | None = DEFAULT_FILL_VALUE,
808849
filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None,
809850
compressor: CompressorLike = "auto",
810851
attributes: dict[str, JSON] | None = None,
@@ -1748,7 +1789,7 @@ def create(
17481789
shape: ChunkCoords,
17491790
dtype: ZDTypeLike,
17501791
zarr_format: ZarrFormat = 3,
1751-
fill_value: Any | None = None,
1792+
fill_value: Any | None = DEFAULT_FILL_VALUE,
17521793
attributes: dict[str, JSON] | None = None,
17531794
# v3 only
17541795
chunk_shape: ChunkCoords | None = None,
@@ -1877,7 +1918,7 @@ def _create(
18771918
shape: ChunkCoords,
18781919
dtype: ZDTypeLike,
18791920
zarr_format: ZarrFormat = 3,
1880-
fill_value: Any | None = None,
1921+
fill_value: Any | None = DEFAULT_FILL_VALUE,
18811922
attributes: dict[str, JSON] | None = None,
18821923
# v3 only
18831924
chunk_shape: ChunkCoords | None = None,
@@ -3834,7 +3875,7 @@ async def from_array(
38343875
filters: FiltersLike | Literal["keep"] = "keep",
38353876
compressors: CompressorsLike | Literal["keep"] = "keep",
38363877
serializer: SerializerLike | Literal["keep"] = "keep",
3837-
fill_value: Any | None = None,
3878+
fill_value: Any | None = DEFAULT_FILL_VALUE,
38383879
order: MemoryOrder | None = None,
38393880
zarr_format: ZarrFormat | None = None,
38403881
attributes: dict[str, JSON] | None = None,
@@ -4096,7 +4137,7 @@ async def init_array(
40964137
filters: FiltersLike = "auto",
40974138
compressors: CompressorsLike = "auto",
40984139
serializer: SerializerLike = "auto",
4099-
fill_value: Any | None = None,
4140+
fill_value: Any | None = DEFAULT_FILL_VALUE,
41004141
order: MemoryOrder | None = None,
41014142
zarr_format: ZarrFormat | None = 3,
41024143
attributes: dict[str, JSON] | None = None,
@@ -4317,7 +4358,7 @@ async def create_array(
43174358
filters: FiltersLike = "auto",
43184359
compressors: CompressorsLike = "auto",
43194360
serializer: SerializerLike = "auto",
4320-
fill_value: Any | None = None,
4361+
fill_value: Any | None = DEFAULT_FILL_VALUE,
43214362
order: MemoryOrder | None = None,
43224363
zarr_format: ZarrFormat | None = 3,
43234364
attributes: dict[str, JSON] | None = None,

0 commit comments

Comments
 (0)