From 610689ed896e1abaf3b0ab719a928eebb4bba9de Mon Sep 17 00:00:00 2001 From: Elliott Sales de Andrade Date: Tue, 21 Jan 2025 04:11:54 -0500 Subject: [PATCH] Use unsigned bytes to back Buffer This makes compressors consistent with v2, and buffers consistents with `bytes` types. Fixes #2735 --- src/zarr/codecs/bytes.py | 2 +- src/zarr/codecs/crc32c_.py | 2 +- src/zarr/core/buffer/core.py | 4 ++-- src/zarr/core/buffer/cpu.py | 8 ++++---- src/zarr/core/buffer/gpu.py | 10 +++++----- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/zarr/codecs/bytes.py b/src/zarr/codecs/bytes.py index 78c7b22fbc..750707d36a 100644 --- a/src/zarr/codecs/bytes.py +++ b/src/zarr/codecs/bytes.py @@ -114,7 +114,7 @@ async def _encode_single( nd_array = chunk_array.as_ndarray_like() # Flatten the nd-array (only copy if needed) and reinterpret as bytes - nd_array = nd_array.ravel().view(dtype="b") + nd_array = nd_array.ravel().view(dtype="B") return chunk_spec.prototype.buffer.from_array_like(nd_array) def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int: diff --git a/src/zarr/codecs/crc32c_.py b/src/zarr/codecs/crc32c_.py index 3a6624ad25..ab8a57eba7 100644 --- a/src/zarr/codecs/crc32c_.py +++ b/src/zarr/codecs/crc32c_.py @@ -57,7 +57,7 @@ async def _encode_single( # Calculate the checksum and "cast" it to a numpy array checksum = np.array([crc32c(cast(typing_extensions.Buffer, data))], dtype=np.uint32) # Append the checksum (as bytes) to the data - return chunk_spec.prototype.buffer.from_array_like(np.append(data, checksum.view("b"))) + return chunk_spec.prototype.buffer.from_array_like(np.append(data, checksum.view("B"))) def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int: return input_byte_length + 4 diff --git a/src/zarr/core/buffer/core.py b/src/zarr/core/buffer/core.py index ccab103e0f..05ce91acc2 100644 --- a/src/zarr/core/buffer/core.py +++ b/src/zarr/core/buffer/core.py @@ -139,7 +139,7 @@ class Buffer(ABC): def __init__(self, array_like: ArrayLike) -> None: if array_like.ndim != 1: raise ValueError("array_like: only 1-dim allowed") - if array_like.dtype != np.dtype("b"): + if array_like.dtype != np.dtype("B"): raise ValueError("array_like: only byte dtype allowed") self._data = array_like @@ -302,7 +302,7 @@ class NDBuffer: Notes ----- The two buffer classes Buffer and NDBuffer are very similar. In fact, Buffer - is a special case of NDBuffer where dim=1, stride=1, and dtype="b". However, + is a special case of NDBuffer where dim=1, stride=1, and dtype="B". However, in order to use Python's type system to differentiate between the contiguous Buffer and the n-dim (non-contiguous) NDBuffer, we keep the definition of the two classes separate. diff --git a/src/zarr/core/buffer/cpu.py b/src/zarr/core/buffer/cpu.py index 5019075496..dda2282bc1 100644 --- a/src/zarr/core/buffer/cpu.py +++ b/src/zarr/core/buffer/cpu.py @@ -49,7 +49,7 @@ def __init__(self, array_like: ArrayLike) -> None: @classmethod def create_zero_length(cls) -> Self: - return cls(np.array([], dtype="b")) + return cls(np.array([], dtype="B")) @classmethod def from_buffer(cls, buffer: core.Buffer) -> Self: @@ -92,7 +92,7 @@ def from_bytes(cls, bytes_like: BytesLike) -> Self: ------- New buffer representing `bytes_like` """ - return cls.from_array_like(np.frombuffer(bytes_like, dtype="b")) + return cls.from_array_like(np.frombuffer(bytes_like, dtype="B")) def as_numpy_array(self) -> npt.NDArray[Any]: """Returns the buffer as a NumPy array (host memory). @@ -111,7 +111,7 @@ def __add__(self, other: core.Buffer) -> Self: """Concatenate two buffers""" other_array = other.as_array_like() - assert other_array.dtype == np.dtype("b") + assert other_array.dtype == np.dtype("B") return self.__class__( np.concatenate((np.asanyarray(self._data), np.asanyarray(other_array))) ) @@ -131,7 +131,7 @@ class NDBuffer(core.NDBuffer): Notes ----- The two buffer classes Buffer and NDBuffer are very similar. In fact, Buffer - is a special case of NDBuffer where dim=1, stride=1, and dtype="b". However, + is a special case of NDBuffer where dim=1, stride=1, and dtype="B". However, in order to use Python's type system to differentiate between the contiguous Buffer and the n-dim (non-contiguous) NDBuffer, we keep the definition of the two classes separate. diff --git a/src/zarr/core/buffer/gpu.py b/src/zarr/core/buffer/gpu.py index 6941c8897e..fbfebedaf4 100644 --- a/src/zarr/core/buffer/gpu.py +++ b/src/zarr/core/buffer/gpu.py @@ -55,7 +55,7 @@ def __init__(self, array_like: ArrayLike) -> None: if array_like.ndim != 1: raise ValueError("array_like: only 1-dim allowed") - if array_like.dtype != np.dtype("b"): + if array_like.dtype != np.dtype("B"): raise ValueError("array_like: only byte dtype allowed") if not hasattr(array_like, "__cuda_array_interface__"): @@ -80,7 +80,7 @@ def create_zero_length(cls) -> Self: ------- New empty 0-length buffer """ - return cls(cp.array([], dtype="b")) + return cls(cp.array([], dtype="B")) @classmethod def from_buffer(cls, buffer: core.Buffer) -> Self: @@ -96,14 +96,14 @@ def from_buffer(cls, buffer: core.Buffer) -> Self: @classmethod def from_bytes(cls, bytes_like: BytesLike) -> Self: - return cls.from_array_like(cp.frombuffer(bytes_like, dtype="b")) + return cls.from_array_like(cp.frombuffer(bytes_like, dtype="B")) def as_numpy_array(self) -> npt.NDArray[Any]: return cast(npt.NDArray[Any], cp.asnumpy(self._data)) def __add__(self, other: core.Buffer) -> Self: other_array = other.as_array_like() - assert other_array.dtype == np.dtype("b") + assert other_array.dtype == np.dtype("B") gpu_other = Buffer(other_array) gpu_other_array = gpu_other.as_array_like() return self.__class__( @@ -125,7 +125,7 @@ class NDBuffer(core.NDBuffer): Notes ----- The two buffer classes Buffer and NDBuffer are very similar. In fact, Buffer - is a special case of NDBuffer where dim=1, stride=1, and dtype="b". However, + is a special case of NDBuffer where dim=1, stride=1, and dtype="B". However, in order to use Python's type system to differentiate between the contiguous Buffer and the n-dim (non-contiguous) NDBuffer, we keep the definition of the two classes separate.