Skip to content

Commit c84fe50

Browse files
committed
DOC: Add memory-mapping example to storage guide
Fixes #1245 Add documentation and tests for memory-mapped store, focusing on efficient access to small slices from large uncompressed chunks.
1 parent e9772ac commit c84fe50

File tree

2 files changed

+116
-0
lines changed

2 files changed

+116
-0
lines changed

docs/user-guide/storage.rst

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,37 @@ Zarr data (metadata and chunks) to a dictionary.:
9999
>>> zarr.create_array(store=store, shape=(2,), dtype='float64')
100100
<Array memory://... shape=(2,) dtype=float64>
101101

102+
Memory-Mapped Store
103+
~~~~~~~~~~~~~~~~~~~~
104+
105+
For performance optimization when working with uncompressed data, you can create a memory-mapped store by subclassing :class:`zarr.storage.LocalStore`.
106+
Memory mapping allows direct access to portions of chunk data without loading entire chunks into memory, which can be beneficial when you need to
107+
read small slices from large chunks.:
108+
109+
>>> import mmap
110+
>>> from zarr.storage import LocalStore
111+
>>>
112+
>>> class MemoryMappedDirectoryStore(LocalStore):
113+
... def _fromfile(self, fn):
114+
... with open(fn, "rb") as fh:
115+
... return memoryview(mmap.mmap(fh.fileno(), 0, prot=mmap.PROT_READ))
116+
>>>
117+
>>> # Create a memory-mapped store
118+
>>> store = MemoryMappedDirectoryStore('data/example.zarr')
119+
>>> z = zarr.open_array(store=store)
120+
121+
For example, if you have an array with large 1000x1000 chunks and frequently need to access small 100x100 sections,
122+
memory mapping can provide efficient access by mapping only the needed portions into memory
123+
rather than loading entire chunks.:
124+
125+
>>> # Create an array with large chunks
126+
>>> z = zarr.create_array('data/example.zarr', shape=(10000, 10000), chunks=(1000, 1000), dtype='float64')
127+
>>> # Later, open with memory mapping for efficient chunk access
128+
>>> mmap_store = MemoryMappedDirectoryStore('data/example.zarr')
129+
>>> z = zarr.open_array(store=mmap_store)
130+
>>> # Access specific chunks efficiently
131+
>>> chunk_data = z[500:600, 500:600] # Only maps the needed chunks into memory
132+
102133
.. _user-guide-custom-stores:
103134

104135
Developing custom stores

tests/test_store/test_mmap.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
from __future__ import annotations
2+
3+
from typing import TYPE_CHECKING
4+
5+
import mmap
6+
import pytest
7+
8+
import zarr
9+
from zarr.core.buffer import Buffer, cpu
10+
from zarr.storage import LocalStore
11+
from zarr.testing.store import StoreTests
12+
13+
if TYPE_CHECKING:
14+
import pathlib
15+
16+
17+
class MemoryMappedDirectoryStore(LocalStore):
18+
def _fromfile(self, fn):
19+
with open(fn, "rb") as fh:
20+
return memoryview(mmap.mmap(fh.fileno(), 0, prot=mmap.PROT_READ))
21+
22+
23+
class TestMemoryMappedDirectoryStore(StoreTests[MemoryMappedDirectoryStore, cpu.Buffer]):
24+
store_cls = MemoryMappedDirectoryStore
25+
buffer_cls = cpu.Buffer
26+
27+
async def get(self, store: MemoryMappedDirectoryStore, key: str) -> Buffer:
28+
return self.buffer_cls.from_bytes((store.root / key).read_bytes())
29+
30+
async def set(self, store: MemoryMappedDirectoryStore, key: str, value: Buffer) -> None:
31+
parent = (store.root / key).parent
32+
if not parent.exists():
33+
parent.mkdir(parents=True)
34+
(store.root / key).write_bytes(value.to_bytes())
35+
36+
@pytest.fixture
37+
def store_kwargs(self, tmpdir) -> dict[str, str]:
38+
return {"root": str(tmpdir)}
39+
40+
def test_store_repr(self, store: MemoryMappedDirectoryStore) -> None:
41+
assert str(store) == f"file://{store.root.as_posix()}"
42+
43+
def test_store_supports_writes(self, store: MemoryMappedDirectoryStore) -> None:
44+
assert store.supports_writes
45+
46+
def test_store_supports_partial_writes(self, store: MemoryMappedDirectoryStore) -> None:
47+
assert store.supports_partial_writes
48+
49+
def test_store_supports_listing(self, store: MemoryMappedDirectoryStore) -> None:
50+
assert store.supports_listing
51+
52+
async def test_empty_with_empty_subdir(self, store: MemoryMappedDirectoryStore) -> None:
53+
assert await store.is_empty("")
54+
(store.root / "foo/bar").mkdir(parents=True)
55+
assert await store.is_empty("")
56+
57+
def test_creates_new_directory(self, tmp_path: pathlib.Path):
58+
target = tmp_path.joinpath("a", "b", "c")
59+
assert not target.exists()
60+
61+
store = self.store_cls(root=target)
62+
zarr.group(store=store)
63+
64+
async def test_mmap_slice_reads(self, store: MemoryMappedDirectoryStore) -> None:
65+
"""Test reading slices with memory mapping"""
66+
# Create array with large chunks
67+
z = zarr.create_array(store=store, shape=(2000, 2000), chunks=(1000, 1000),
68+
dtype='float64')
69+
# Write test data
70+
data = zarr.full(shape=(2000, 2000), chunks=(1000, 1000), fill_value=42.0,
71+
dtype='float64')
72+
z[:] = data[:]
73+
74+
# Test reading various slices
75+
slices = [
76+
# Within single chunk
77+
(slice(100, 200), slice(100, 200)),
78+
# Across chunk boundaries
79+
(slice(900, 1100), slice(900, 1100)),
80+
# Full chunk
81+
(slice(0, 1000), slice(0, 1000))
82+
]
83+
84+
for test_slice in slices:
85+
assert (z[test_slice] == data[test_slice]).all()

0 commit comments

Comments
 (0)