Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changes/3559.misc.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Create `Bytes`, a new data type for variable-length bytes. This data type is a drop-in replacement for `VariableLengthBytes` that complies with the published [`Bytes`](https://github.com/zarr-developers/zarr-extensions/tree/main/data-types/bytes) data type spec.
2 changes: 1 addition & 1 deletion examples/custom_dtype/custom_dtype.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@
from zarr.core.common import JSON, ZarrFormat
from zarr.core.dtype import ZDType, data_type_registry
from zarr.core.dtype.common import (
DataTypeValidationError,
DTypeConfig_V2,
DTypeJSON,
check_dtype_spec_v2,
)
from zarr.errors import DataTypeValidationError

# This is the int2 array data type
int2_dtype_cls = type(np.dtype("int2"))
Expand Down
47 changes: 42 additions & 5 deletions src/zarr/core/dtype/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@
from typing import TYPE_CHECKING, Final, TypeAlias

from zarr.core.dtype.common import (
DataTypeValidationError,
DTypeJSON,
)
from zarr.core.dtype.npy.bool import Bool
from zarr.core.dtype.npy.bytes import (
Bytes,
BytesJSON_V2,
BytesJSON_V3,
NullTerminatedBytes,
NullterminatedBytesJSON_V2,
NullTerminatedBytesJSON_V3,
Expand All @@ -30,6 +32,7 @@
TimeDelta64JSON_V2,
TimeDelta64JSON_V3,
)
from zarr.errors import DataTypeValidationError

if TYPE_CHECKING:
from zarr.core.common import ZarrFormat
Expand All @@ -52,8 +55,12 @@

__all__ = [
"Bool",
"Bytes",
"BytesJSON_V2",
"BytesJSON_V3",
"Complex64",
"Complex128",
"DTypeJSON",
"DataTypeRegistry",
"DataTypeValidationError",
"DateTime64",
Expand Down Expand Up @@ -94,6 +101,8 @@
"VariableLengthUTF8JSON_V2",
"ZDType",
"data_type_registry",
"disable_legacy_bytes_dtype",
"enable_legacy_bytes_dtype",
"parse_data_type",
"parse_dtype",
]
Expand All @@ -115,8 +124,8 @@
TimeDType = DateTime64 | TimeDelta64
TIME_DTYPE: Final = DateTime64, TimeDelta64

BytesDType = RawBytes | NullTerminatedBytes | VariableLengthBytes
BYTES_DTYPE: Final = RawBytes, NullTerminatedBytes, VariableLengthBytes
BytesDType = RawBytes | NullTerminatedBytes | Bytes
BYTES_DTYPE: Final = RawBytes, NullTerminatedBytes, Bytes

AnyDType = (
Bool
Expand All @@ -127,7 +136,6 @@
| BytesDType
| Structured
| TimeDType
| VariableLengthBytes
)
# mypy has trouble inferring the type of variablelengthstring dtype, because its class definition
# depends on the installed numpy version. That's why the type: ignore statement is needed here.
Expand All @@ -140,7 +148,6 @@
*BYTES_DTYPE,
Structured,
*TIME_DTYPE,
VariableLengthBytes,
)

# These are aliases for variable-length UTF-8 strings
Expand Down Expand Up @@ -277,6 +284,36 @@ def parse_dtype(
# If the dtype request is one of the aliases for variable-length UTF-8 strings,
# return that dtype.
return VariableLengthUTF8() # type: ignore[return-value]
if dtype_spec is bytes:
# Treat the bytes type as a request for the Bytes dtype
return Bytes()
Comment on lines +287 to +289
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

flagging this change -- parse_dtype(bytes, zarr_format = 3) will now return an instance of Bytes


# otherwise, we have either a numpy dtype string, or a zarr v3 dtype string, and in either case
# we can create a native dtype from it, and do the dtype inference from that
return get_data_type_from_native_dtype(dtype_spec) # type: ignore[arg-type]


def enable_legacy_bytes_dtype() -> None:
"""
Unregister the new Bytes data type from the registry, and replace it with the
VariableLengthBytes dtype instead. Used for backwards compatibility.
"""
if (
"bytes" in data_type_registry.contents
and "variable_length_bytes" not in data_type_registry.contents
):
data_type_registry.unregister("bytes")
data_type_registry.register("variable_length_bytes", VariableLengthBytes)


def disable_legacy_bytes_dtype() -> None:
"""
Unregister the old VariableLengthBytes dtype from the registry, and replace it with
the new Bytes dtype. Used to reverse the effect of enable_legacy_bytes_dtype
"""
if (
"variable_length_bytes" in data_type_registry.contents
and "bytes" not in data_type_registry.contents
):
data_type_registry.unregister("variable_length_bytes")
data_type_registry.register("bytes", Bytes)
Comment on lines +296 to +319
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these functions let users effectively disable the changes in this PR

6 changes: 0 additions & 6 deletions src/zarr/core/dtype/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,12 +151,6 @@ def unpack_dtype_json(data: DTypeSpec_V2 | DTypeSpec_V3) -> DTypeJSON:
return data


class DataTypeValidationError(ValueError): ...


class ScalarTypeValidationError(ValueError): ...


Comment on lines -154 to -159
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these errors were moved to the main errors module

@dataclass(frozen=True, kw_only=True)
class HasLength:
"""
Expand Down
2 changes: 1 addition & 1 deletion src/zarr/core/dtype/npy/bool.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@
import numpy as np

from zarr.core.dtype.common import (
DataTypeValidationError,
DTypeConfig_V2,
DTypeJSON,
HasItemSize,
check_dtype_spec_v2,
)
from zarr.core.dtype.wrapper import TBaseDType, ZDType
from zarr.errors import DataTypeValidationError

if TYPE_CHECKING:
from zarr.core.common import JSON, ZarrFormat
Expand Down
Loading