From 610689ed896e1abaf3b0ab719a928eebb4bba9de Mon Sep 17 00:00:00 2001
From: Elliott Sales de Andrade <quantum.analyst@gmail.com>
Date: Tue, 21 Jan 2025 04:11:54 -0500
Subject: [PATCH] Use unsigned bytes to back Buffer

This makes compressors consistent with v2, and buffers consistents with
`bytes` types.

Fixes #2735
---
 src/zarr/codecs/bytes.py     |  2 +-
 src/zarr/codecs/crc32c_.py   |  2 +-
 src/zarr/core/buffer/core.py |  4 ++--
 src/zarr/core/buffer/cpu.py  |  8 ++++----
 src/zarr/core/buffer/gpu.py  | 10 +++++-----
 5 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/zarr/codecs/bytes.py b/src/zarr/codecs/bytes.py
index 78c7b22fbc..750707d36a 100644
--- a/src/zarr/codecs/bytes.py
+++ b/src/zarr/codecs/bytes.py
@@ -114,7 +114,7 @@ async def _encode_single(
 
         nd_array = chunk_array.as_ndarray_like()
         # Flatten the nd-array (only copy if needed) and reinterpret as bytes
-        nd_array = nd_array.ravel().view(dtype="b")
+        nd_array = nd_array.ravel().view(dtype="B")
         return chunk_spec.prototype.buffer.from_array_like(nd_array)
 
     def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int:
diff --git a/src/zarr/codecs/crc32c_.py b/src/zarr/codecs/crc32c_.py
index 3a6624ad25..ab8a57eba7 100644
--- a/src/zarr/codecs/crc32c_.py
+++ b/src/zarr/codecs/crc32c_.py
@@ -57,7 +57,7 @@ async def _encode_single(
         # Calculate the checksum and "cast" it to a numpy array
         checksum = np.array([crc32c(cast(typing_extensions.Buffer, data))], dtype=np.uint32)
         # Append the checksum (as bytes) to the data
-        return chunk_spec.prototype.buffer.from_array_like(np.append(data, checksum.view("b")))
+        return chunk_spec.prototype.buffer.from_array_like(np.append(data, checksum.view("B")))
 
     def compute_encoded_size(self, input_byte_length: int, _chunk_spec: ArraySpec) -> int:
         return input_byte_length + 4
diff --git a/src/zarr/core/buffer/core.py b/src/zarr/core/buffer/core.py
index ccab103e0f..05ce91acc2 100644
--- a/src/zarr/core/buffer/core.py
+++ b/src/zarr/core/buffer/core.py
@@ -139,7 +139,7 @@ class Buffer(ABC):
     def __init__(self, array_like: ArrayLike) -> None:
         if array_like.ndim != 1:
             raise ValueError("array_like: only 1-dim allowed")
-        if array_like.dtype != np.dtype("b"):
+        if array_like.dtype != np.dtype("B"):
             raise ValueError("array_like: only byte dtype allowed")
         self._data = array_like
 
@@ -302,7 +302,7 @@ class NDBuffer:
     Notes
     -----
     The two buffer classes Buffer and NDBuffer are very similar. In fact, Buffer
-    is a special case of NDBuffer where dim=1, stride=1, and dtype="b". However,
+    is a special case of NDBuffer where dim=1, stride=1, and dtype="B". However,
     in order to use Python's type system to differentiate between the contiguous
     Buffer and the n-dim (non-contiguous) NDBuffer, we keep the definition of the
     two classes separate.
diff --git a/src/zarr/core/buffer/cpu.py b/src/zarr/core/buffer/cpu.py
index 5019075496..dda2282bc1 100644
--- a/src/zarr/core/buffer/cpu.py
+++ b/src/zarr/core/buffer/cpu.py
@@ -49,7 +49,7 @@ def __init__(self, array_like: ArrayLike) -> None:
 
     @classmethod
     def create_zero_length(cls) -> Self:
-        return cls(np.array([], dtype="b"))
+        return cls(np.array([], dtype="B"))
 
     @classmethod
     def from_buffer(cls, buffer: core.Buffer) -> Self:
@@ -92,7 +92,7 @@ def from_bytes(cls, bytes_like: BytesLike) -> Self:
         -------
             New buffer representing `bytes_like`
         """
-        return cls.from_array_like(np.frombuffer(bytes_like, dtype="b"))
+        return cls.from_array_like(np.frombuffer(bytes_like, dtype="B"))
 
     def as_numpy_array(self) -> npt.NDArray[Any]:
         """Returns the buffer as a NumPy array (host memory).
@@ -111,7 +111,7 @@ def __add__(self, other: core.Buffer) -> Self:
         """Concatenate two buffers"""
 
         other_array = other.as_array_like()
-        assert other_array.dtype == np.dtype("b")
+        assert other_array.dtype == np.dtype("B")
         return self.__class__(
             np.concatenate((np.asanyarray(self._data), np.asanyarray(other_array)))
         )
@@ -131,7 +131,7 @@ class NDBuffer(core.NDBuffer):
     Notes
     -----
     The two buffer classes Buffer and NDBuffer are very similar. In fact, Buffer
-    is a special case of NDBuffer where dim=1, stride=1, and dtype="b". However,
+    is a special case of NDBuffer where dim=1, stride=1, and dtype="B". However,
     in order to use Python's type system to differentiate between the contiguous
     Buffer and the n-dim (non-contiguous) NDBuffer, we keep the definition of the
     two classes separate.
diff --git a/src/zarr/core/buffer/gpu.py b/src/zarr/core/buffer/gpu.py
index 6941c8897e..fbfebedaf4 100644
--- a/src/zarr/core/buffer/gpu.py
+++ b/src/zarr/core/buffer/gpu.py
@@ -55,7 +55,7 @@ def __init__(self, array_like: ArrayLike) -> None:
 
         if array_like.ndim != 1:
             raise ValueError("array_like: only 1-dim allowed")
-        if array_like.dtype != np.dtype("b"):
+        if array_like.dtype != np.dtype("B"):
             raise ValueError("array_like: only byte dtype allowed")
 
         if not hasattr(array_like, "__cuda_array_interface__"):
@@ -80,7 +80,7 @@ def create_zero_length(cls) -> Self:
         -------
             New empty 0-length buffer
         """
-        return cls(cp.array([], dtype="b"))
+        return cls(cp.array([], dtype="B"))
 
     @classmethod
     def from_buffer(cls, buffer: core.Buffer) -> Self:
@@ -96,14 +96,14 @@ def from_buffer(cls, buffer: core.Buffer) -> Self:
 
     @classmethod
     def from_bytes(cls, bytes_like: BytesLike) -> Self:
-        return cls.from_array_like(cp.frombuffer(bytes_like, dtype="b"))
+        return cls.from_array_like(cp.frombuffer(bytes_like, dtype="B"))
 
     def as_numpy_array(self) -> npt.NDArray[Any]:
         return cast(npt.NDArray[Any], cp.asnumpy(self._data))
 
     def __add__(self, other: core.Buffer) -> Self:
         other_array = other.as_array_like()
-        assert other_array.dtype == np.dtype("b")
+        assert other_array.dtype == np.dtype("B")
         gpu_other = Buffer(other_array)
         gpu_other_array = gpu_other.as_array_like()
         return self.__class__(
@@ -125,7 +125,7 @@ class NDBuffer(core.NDBuffer):
     Notes
     -----
     The two buffer classes Buffer and NDBuffer are very similar. In fact, Buffer
-    is a special case of NDBuffer where dim=1, stride=1, and dtype="b". However,
+    is a special case of NDBuffer where dim=1, stride=1, and dtype="B". However,
     in order to use Python's type system to differentiate between the contiguous
     Buffer and the n-dim (non-contiguous) NDBuffer, we keep the definition of the
     two classes separate.