From 03bd5696e99587f64656d416f15a18da34a496a6 Mon Sep 17 00:00:00 2001 From: Tom White Date: Mon, 7 Apr 2025 15:31:22 +0100 Subject: [PATCH 1/5] Avoid memory copy in obstore write --- src/zarr/storage/_obstore.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/zarr/storage/_obstore.py b/src/zarr/storage/_obstore.py index 79afa08d15..92db9373f5 100644 --- a/src/zarr/storage/_obstore.py +++ b/src/zarr/storage/_obstore.py @@ -7,6 +7,8 @@ from collections.abc import Iterable from typing import TYPE_CHECKING, Any, TypedDict +import numpy as np + from zarr.abc.store import ( ByteRequest, OffsetByteRequest, @@ -145,7 +147,7 @@ async def set(self, key: str, value: Buffer) -> None: self._check_writable() - buf = value.to_bytes() + buf = value.as_numpy_array().view(np.uint8) await obs.put_async(self.store, key, buf) async def set_if_not_exists(self, key: str, value: Buffer) -> None: @@ -153,7 +155,7 @@ async def set_if_not_exists(self, key: str, value: Buffer) -> None: import obstore as obs self._check_writable() - buf = value.to_bytes() + buf = value.as_numpy_array().view(np.uint8) with contextlib.suppress(obs.exceptions.AlreadyExistsError): await obs.put_async(self.store, key, buf, mode="create") From ce25ec3c437e710db5a0f5f38f2bd28573ba22dd Mon Sep 17 00:00:00 2001 From: Tom White Date: Tue, 8 Apr 2025 11:53:41 +0100 Subject: [PATCH 2/5] Add as_bytes_like method to Buffer --- src/zarr/core/buffer/core.py | 13 +++++++++++++ src/zarr/storage/_obstore.py | 6 ++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/zarr/core/buffer/core.py b/src/zarr/core/buffer/core.py index ccab103e0f..a7af38a904 100644 --- a/src/zarr/core/buffer/core.py +++ b/src/zarr/core/buffer/core.py @@ -251,6 +251,19 @@ def as_numpy_array(self) -> npt.NDArray[Any]: """ ... + def as_bytes_like(self) -> BytesLike: + """Returns the buffer as a bytes-like object. + + Notes + ----- + Might have to copy data, since the implementation uses `.as_numpy_array()`. + + Returns + ------- + A bytes-like object that implements the Python buffer protocol + """ + return memoryview(self.as_numpy_array().view(np.uint8)) # type: ignore[arg-type] + def to_bytes(self) -> bytes: """Returns the buffer as `bytes` (host memory). diff --git a/src/zarr/storage/_obstore.py b/src/zarr/storage/_obstore.py index 92db9373f5..be8c7a7429 100644 --- a/src/zarr/storage/_obstore.py +++ b/src/zarr/storage/_obstore.py @@ -7,8 +7,6 @@ from collections.abc import Iterable from typing import TYPE_CHECKING, Any, TypedDict -import numpy as np - from zarr.abc.store import ( ByteRequest, OffsetByteRequest, @@ -147,7 +145,7 @@ async def set(self, key: str, value: Buffer) -> None: self._check_writable() - buf = value.as_numpy_array().view(np.uint8) + buf = value.as_bytes_like() await obs.put_async(self.store, key, buf) async def set_if_not_exists(self, key: str, value: Buffer) -> None: @@ -155,7 +153,7 @@ async def set_if_not_exists(self, key: str, value: Buffer) -> None: import obstore as obs self._check_writable() - buf = value.as_numpy_array().view(np.uint8) + buf = value.as_bytes_like() with contextlib.suppress(obs.exceptions.AlreadyExistsError): await obs.put_async(self.store, key, buf, mode="create") From a48b8bd4d5830c9ba11777daa5bac398807667a1 Mon Sep 17 00:00:00 2001 From: Tom White Date: Wed, 9 Apr 2025 09:56:37 +0100 Subject: [PATCH 3/5] Add changelog entry --- changes/2972.misc.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/2972.misc.rst diff --git a/changes/2972.misc.rst b/changes/2972.misc.rst new file mode 100644 index 0000000000..f0258c1d05 --- /dev/null +++ b/changes/2972.misc.rst @@ -0,0 +1 @@ +Avoid an unnecessary memory copy when writing Zarr with obstore From bf3c7131cee3ed48feb908c8e44e0a82f8dae67a Mon Sep 17 00:00:00 2001 From: Tom White Date: Mon, 21 Apr 2025 15:31:08 +0100 Subject: [PATCH 4/5] No need to take unsigned bytes view following #2738 --- src/zarr/core/buffer/core.py | 2 +- src/zarr/storage/_local.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/zarr/core/buffer/core.py b/src/zarr/core/buffer/core.py index de37d6f333..591ad02711 100644 --- a/src/zarr/core/buffer/core.py +++ b/src/zarr/core/buffer/core.py @@ -266,7 +266,7 @@ def as_bytes_like(self) -> BytesLike: ------- A bytes-like object that implements the Python buffer protocol """ - return memoryview(self.as_numpy_array().view(np.uint8)) # type: ignore[arg-type] + return memoryview(self.as_numpy_array()) # type: ignore[arg-type] def to_bytes(self) -> bytes: """Returns the buffer as `bytes` (host memory). diff --git a/src/zarr/storage/_local.py b/src/zarr/storage/_local.py index bd5bfc1da2..6c70ed8e62 100644 --- a/src/zarr/storage/_local.py +++ b/src/zarr/storage/_local.py @@ -52,10 +52,10 @@ def _put( with path.open("r+b") as f: f.seek(start) # write takes any object supporting the buffer protocol - f.write(value.as_numpy_array()) # type: ignore[arg-type] + f.write(value.as_bytes_like()) return None else: - view = memoryview(value.as_numpy_array()) # type: ignore[arg-type] + view = value.as_bytes_like() if exclusive: mode = "xb" else: From 5683b24c49ae254671854a016ae5699823c57888 Mon Sep 17 00:00:00 2001 From: Tom White Date: Tue, 22 Apr 2025 14:06:36 +0100 Subject: [PATCH 5/5] Change method name to `as_buffer_like` --- src/zarr/core/buffer/core.py | 6 +++--- src/zarr/storage/_local.py | 4 ++-- src/zarr/storage/_obstore.py | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/zarr/core/buffer/core.py b/src/zarr/core/buffer/core.py index 591ad02711..eaafa4c714 100644 --- a/src/zarr/core/buffer/core.py +++ b/src/zarr/core/buffer/core.py @@ -255,8 +255,8 @@ def as_numpy_array(self) -> npt.NDArray[Any]: """ ... - def as_bytes_like(self) -> BytesLike: - """Returns the buffer as a bytes-like object. + def as_buffer_like(self) -> BytesLike: + """Returns the buffer as an object that implements the Python buffer protocol. Notes ----- @@ -264,7 +264,7 @@ def as_bytes_like(self) -> BytesLike: Returns ------- - A bytes-like object that implements the Python buffer protocol + An object that implements the Python buffer protocol """ return memoryview(self.as_numpy_array()) # type: ignore[arg-type] diff --git a/src/zarr/storage/_local.py b/src/zarr/storage/_local.py index 6c70ed8e62..b46c263333 100644 --- a/src/zarr/storage/_local.py +++ b/src/zarr/storage/_local.py @@ -52,10 +52,10 @@ def _put( with path.open("r+b") as f: f.seek(start) # write takes any object supporting the buffer protocol - f.write(value.as_bytes_like()) + f.write(value.as_buffer_like()) return None else: - view = value.as_bytes_like() + view = value.as_buffer_like() if exclusive: mode = "xb" else: diff --git a/src/zarr/storage/_obstore.py b/src/zarr/storage/_obstore.py index 5d8874bc62..e3d8d9cee6 100644 --- a/src/zarr/storage/_obstore.py +++ b/src/zarr/storage/_obstore.py @@ -160,7 +160,7 @@ async def set(self, key: str, value: Buffer) -> None: self._check_writable() - buf = value.as_bytes_like() + buf = value.as_buffer_like() await obs.put_async(self.store, key, buf) async def set_if_not_exists(self, key: str, value: Buffer) -> None: @@ -168,7 +168,7 @@ async def set_if_not_exists(self, key: str, value: Buffer) -> None: import obstore as obs self._check_writable() - buf = value.as_bytes_like() + buf = value.as_buffer_like() with contextlib.suppress(obs.exceptions.AlreadyExistsError): await obs.put_async(self.store, key, buf, mode="create")