Skip to content

Commit 2e7a739

Browse files
committed
Parse 0 fill value as "" for str dtype
1 parent 87557e3 commit 2e7a739

File tree

4 files changed

+27
-5
lines changed

4 files changed

+27
-5
lines changed

src/zarr/core/array.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
_warn_order_kwarg,
5959
concurrent_map,
6060
parse_dtype,
61+
parse_fill_value,
6162
parse_order,
6263
parse_shapelike,
6364
product,
@@ -3901,6 +3902,7 @@ async def init_array(
39013902

39023903
from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation
39033904

3905+
fill_value_parsed = parse_fill_value(fill_value, dtype, zarr_format)
39043906
dtype_parsed = parse_dtype(dtype, zarr_format=zarr_format)
39053907
shape_parsed = parse_shapelike(shape)
39063908
chunk_key_encoding_parsed = _parse_chunk_key_encoding(
@@ -3947,7 +3949,7 @@ async def init_array(
39473949
dtype=dtype_parsed,
39483950
chunks=chunk_shape_parsed,
39493951
dimension_separator=chunk_key_encoding_parsed.separator,
3950-
fill_value=fill_value,
3952+
fill_value=fill_value_parsed,
39513953
order=order_parsed,
39523954
filters=filters_parsed,
39533955
compressor=compressor_parsed,
@@ -3985,7 +3987,7 @@ async def init_array(
39853987
meta = AsyncArray._create_metadata_v3(
39863988
shape=shape_parsed,
39873989
dtype=dtype_parsed,
3988-
fill_value=fill_value,
3990+
fill_value=fill_value_parsed,
39893991
chunk_shape=chunks_out,
39903992
chunk_key_encoding=chunk_key_encoding_parsed,
39913993
codecs=codecs_out,

src/zarr/core/array_spec.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,9 @@ def __init__(
102102
) -> None:
103103
shape_parsed = parse_shapelike(shape)
104104
dtype_parsed = np.dtype(dtype)
105-
fill_value_parsed = parse_fill_value(fill_value)
105+
# No zarr_format available here...
106+
# fill_value_parsed = parse_fill_value(fill_value, dtype, zarr_format=2)
107+
fill_value_parsed = fill_value
106108

107109
object.__setattr__(self, "shape", shape_parsed)
108110
object.__setattr__(self, "dtype", dtype_parsed)

src/zarr/core/common.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,9 +150,12 @@ def parse_shapelike(data: int | Iterable[int]) -> tuple[int, ...]:
150150
return data_tuple
151151

152152

153-
def parse_fill_value(data: Any) -> Any:
153+
def parse_fill_value(fill_value: Any, dtype: Any, zarr_format: ZarrFormat) -> Any:
154+
if zarr_format == 2:
155+
if dtype is str or dtype == "str" and fill_value == 0:
156+
fill_value = ""
154157
# todo: real validation
155-
return data
158+
return fill_value
156159

157160

158161
def parse_order(data: Any) -> Literal["C", "F"]:

tests/test_metadata/test_v2.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,3 +298,18 @@ def test_zstd_checksum() -> None:
298298
arr.metadata.to_buffer_dict(default_buffer_prototype())[".zarray"].to_bytes()
299299
)
300300
assert "checksum" not in metadata["compressor"]
301+
302+
303+
def test_0_fill_str_type():
304+
array = zarr.create_array(
305+
store=zarr.storage.MemoryStore(),
306+
dtype=str,
307+
shape=(5,),
308+
chunks=(2,),
309+
fill_value=0,
310+
zarr_format=2,
311+
overwrite=True,
312+
)
313+
314+
# Ensure the array initializes correctly with the fill value
315+
np.testing.assert_array_equal(array[:], ["", "", "", "", ""])

0 commit comments

Comments
 (0)