Skip to content

Commit ac9f8d5

Browse files
committed
codec.from_dict does not select endian automatically
1 parent 72a28e2 commit ac9f8d5

File tree

4 files changed

+29
-62
lines changed

4 files changed

+29
-62
lines changed

src/zarr/codecs/bytes.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ def from_dict(cls, data: dict[str, JSON]) -> Self:
4747
data, "bytes", require_configuration=False
4848
)
4949
configuration_parsed = configuration_parsed or {}
50+
configuration_parsed.setdefault("endian", None)
5051
return cls(**configuration_parsed) # type: ignore[arg-type]
5152

5253
def to_dict(self) -> dict[str, JSON]:

src/zarr/codecs/sharding.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,11 @@ def __setstate__(self, state: dict[str, Any]) -> None:
378378
@classmethod
379379
def from_dict(cls, data: dict[str, JSON]) -> Self:
380380
_, configuration_parsed = parse_named_configuration(data, "sharding_indexed")
381+
configuration_parsed.setdefault(
382+
"codecs",
383+
(BytesCodec(endian=None),),
384+
)
385+
configuration_parsed.setdefault("index_codecs", (BytesCodec(endian=None), Crc32cCodec()))
381386
return cls(**configuration_parsed) # type: ignore[arg-type]
382387

383388
@property

src/zarr/core/metadata/v3.py

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,7 @@ def parse_node_type_array(data: object) -> Literal["array"]:
6464
raise NodeTypeValidationError("node_type", "array", data)
6565

6666

67-
def parse_codecs(data: object, data_type_has_endianness: bool) -> tuple[Codec, ...]:
68-
"""
69-
if data type has endianness, then codecs must specify endian attribute
70-
"""
67+
def parse_codecs(data: object) -> tuple[Codec, ...]:
7168
out: tuple[Codec, ...] = ()
7269

7370
if not isinstance(data, Iterable):
@@ -84,15 +81,6 @@ def parse_codecs(data: object, data_type_has_endianness: bool) -> tuple[Codec, .
8481
c = {"name": c}
8582
name_parsed, config_parsed = parse_named_configuration(c, require_configuration=False)
8683
codec = get_codec_class(name_parsed).from_dict(c)
87-
88-
if (
89-
hasattr(codec, "endian")
90-
and data_type_has_endianness
91-
and (config_parsed is None or "endian" not in config_parsed)
92-
):
93-
raise ValueError(
94-
f"Expected {name_parsed} codec to specify argument endian for data types for which endianness is applicable."
95-
)
9684
out += (codec,)
9785

9886
return out
@@ -295,9 +283,7 @@ def __init__(
295283
fill_value, dtype=cast(ALL_DTYPES, data_type_parsed.value)
296284
)
297285
attributes_parsed = parse_attributes(attributes)
298-
codecs_parsed_partial = parse_codecs(
299-
codecs, data_type_has_endianness=data_type_parsed.has_endianness
300-
)
286+
codecs_parsed_partial = parse_codecs(codecs)
301287
storage_transformers_parsed = parse_storage_transformers(storage_transformers)
302288

303289
array_spec = ArraySpec(

tests/test_metadata/test_v3.py

Lines changed: 21 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -454,60 +454,35 @@ def test_string_codecs() -> None:
454454

455455

456456
def test_codec_requires_endian() -> None:
457-
with pytest.raises(
458-
ValueError,
459-
match="Expected bytes codec to specify argument endian for data types for which endianness is applicable.",
460-
):
457+
raise_msg = "The `endian` configuration needs to be specified for multi-byte data types."
458+
bytes_codec_no_conf = [{"name": "bytes"}]
459+
with pytest.raises(ValueError, match=raise_msg):
461460
ArrayV3Metadata.from_dict(
462-
default_metadata_dict(data_type="int16", codecs=[{"name": "bytes"}])
461+
default_metadata_dict(data_type="int16", codecs=bytes_codec_no_conf)
463462
)
464-
465-
with pytest.raises(
466-
ValueError,
467-
match="Expected bytes codec to specify argument endian for data types for which endianness is applicable.",
468-
):
463+
bytes_codec_empty_conf = [{"name": "bytes", "configuration": {}}]
464+
with pytest.raises(ValueError, match=raise_msg):
469465
ArrayV3Metadata.from_dict(
470-
default_metadata_dict(
471-
data_type="int16", codecs=[{"name": "bytes", "configuration": {}}]
472-
)
466+
default_metadata_dict(data_type="int16", codecs=bytes_codec_empty_conf)
473467
)
474-
468+
bytes_codec_with_endian = [{"name": "bytes", "configuration": {"endian": "little"}}]
475469
ArrayV3Metadata.from_dict(
476-
default_metadata_dict(
477-
data_type="int16", codecs=[{"name": "bytes", "configuration": {"endian": "little"}}]
478-
)
470+
default_metadata_dict(data_type="int16", codecs=bytes_codec_with_endian)
479471
)
480-
472+
sharding_codec_with_endian = [
473+
{
474+
"name": "sharding_indexed",
475+
"configuration": {"chunk_shape": (1,), "codecs": bytes_codec_with_endian},
476+
}
477+
]
481478
ArrayV3Metadata.from_dict(
482-
default_metadata_dict(
483-
data_type="int16",
484-
codecs=[
485-
{
486-
"name": "sharding_indexed",
487-
"configuration": {
488-
"chunk_shape": (1,),
489-
"codecs": [{"name": "bytes", "configuration": {"endian": "little"}}]
490-
},
491-
}
492-
],
493-
)
479+
default_metadata_dict(data_type="int16", codecs=sharding_codec_with_endian)
494480
)
481+
sharding_codec_no_endian = [
482+
{"name": "sharding_indexed", "configuration": {"chunk_shape": (1,)}}
483+
]
495484

496-
#TODO
497-
with pytest.raises(
498-
ValueError,
499-
match="Expected bytes codec to specify argument endian for data types for which endianness is applicable.",
500-
):
485+
with pytest.raises(ValueError, match=raise_msg):
501486
ArrayV3Metadata.from_dict(
502-
default_metadata_dict(
503-
data_type="int16",
504-
codecs=[
505-
{
506-
"name": "sharding_indexed",
507-
"configuration": {
508-
"chunk_shape": (1,),
509-
},
510-
}
511-
],
512-
)
487+
default_metadata_dict(data_type="int16", codecs=sharding_codec_no_endian)
513488
)

0 commit comments

Comments
 (0)