Skip to content

Commit 3d21514

Browse files
committed
adjust code examples (but we don't know if they work, because we don't have doctests working)
1 parent 56db161 commit 3d21514

File tree

1 file changed

+70
-41
lines changed

1 file changed

+70
-41
lines changed

docs/user-guide/experimental.md

Lines changed: 70 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ This section contains documentation for experimental Zarr Python features. The f
44

55
## `CacheStore`
66

7-
Zarr Python 3.1.4 adds `zarr.storage.CacheStore` provides a dual-store caching implementation
7+
Zarr Python 3.1.4 adds `zarr.experimental.cache_store.CacheStore` provides a dual-store caching implementation
88
that can be wrapped around any Zarr store to improve performance for repeated data access.
99
This is particularly useful when working with remote stores (e.g., S3, HTTP) where network
1010
latency can significantly impact data access speed.
@@ -24,15 +24,16 @@ Because the `CacheStore` uses an ordinary Zarr `Store` object as the caching lay
2424
Creating a CacheStore requires both a source store and a cache store. The cache store
2525
can be any Store implementation, providing flexibility in cache persistence:
2626

27-
```python
27+
```python exec="true" session="experimental" source="above" result="ansi"
2828
import zarr
2929
import zarr.storage
3030
import numpy as np
31+
from zarr.experimental.cache_store import CacheStore
3132

3233
# Create a local store and a separate cache store
3334
source_store = zarr.storage.LocalStore('test.zarr')
3435
cache_store = zarr.storage.MemoryStore() # In-memory cache
35-
cached_store = zarr.storage.CacheStore(
36+
cached_store = CacheStore(
3637
store=source_store,
3738
cache_store=cache_store,
3839
max_size=256*1024*1024 # 256MB cache
@@ -52,7 +53,7 @@ such as a remote store for source data and a local store for persistent caching.
5253

5354
The CacheStore provides significant performance improvements for repeated data access:
5455

55-
```python
56+
```python exec="true" session="experimental" source="above" result="ansi"
5657
import time
5758

5859
# Benchmark reading with cache
@@ -80,23 +81,34 @@ The CacheStore is most beneficial when used with remote stores where network lat
8081
is a significant factor. You can use different store types for source and cache:
8182

8283
```python
83-
from zarr.storage import FsspecStore, LocalStore
84-
85-
# Create a remote store (S3 example) - for demonstration only
86-
remote_store = FsspecStore.from_url('s3://bucket/data.zarr', storage_options={'anon': True})
87-
88-
# Use a local store for persistent caching
89-
local_cache_store = LocalStore('cache_data')
90-
91-
# Create cached store with persistent local cache
92-
cached_store = zarr.storage.CacheStore(
93-
store=remote_store,
94-
cache_store=local_cache_store,
84+
# This example shows remote store setup but requires network access
85+
# from zarr.storage import FsspecStore, LocalStore
86+
87+
# # Create a remote store (S3 example) - for demonstration only
88+
# remote_store = FsspecStore.from_url('s3://bucket/data.zarr', storage_options={'anon': True})
89+
90+
# # Use a local store for persistent caching
91+
# local_cache_store = LocalStore('cache_data')
92+
93+
# # Create cached store with persistent local cache
94+
# cached_store = CacheStore(
95+
# store=remote_store,
96+
# cache_store=local_cache_store,
97+
# max_size=512*1024*1024 # 512MB cache
98+
# )
99+
100+
# # Open array through cached store
101+
# z = zarr.open(cached_store)
102+
103+
# For demonstration, use local stores instead
104+
from zarr.storage import LocalStore
105+
local_source = LocalStore('remote_data.zarr')
106+
local_cache = LocalStore('cache_data')
107+
cached_store = CacheStore(
108+
store=local_source,
109+
cache_store=local_cache,
95110
max_size=512*1024*1024 # 512MB cache
96111
)
97-
98-
# Open array through cached store
99-
z = zarr.open(cached_store)
100112
```
101113

102114
The first access to any chunk will be slow (network retrieval), but subsequent accesses
@@ -109,16 +121,16 @@ The CacheStore can be configured with several parameters:
109121

110122
**max_size**: Controls the maximum size of cached data in bytes
111123

112-
```python
124+
```python exec="true" session="experimental" source="above" result="ansi"
113125
# 256MB cache with size limit
114-
cache = zarr.storage.CacheStore(
126+
cache = CacheStore(
115127
store=source_store,
116128
cache_store=cache_store,
117129
max_size=256*1024*1024
118130
)
119131

120132
# Unlimited cache size (use with caution)
121-
cache = zarr.storage.CacheStore(
133+
cache = CacheStore(
122134
store=source_store,
123135
cache_store=cache_store,
124136
max_size=None
@@ -127,16 +139,16 @@ cache = zarr.storage.CacheStore(
127139

128140
**max_age_seconds**: Controls time-based cache expiration
129141

130-
```python
142+
```python exec="true" session="experimental" source="above" result="ansi"
131143
# Cache expires after 1 hour
132-
cache = zarr.storage.CacheStore(
144+
cache = CacheStore(
133145
store=source_store,
134146
cache_store=cache_store,
135147
max_age_seconds=3600
136148
)
137149

138150
# Cache never expires
139-
cache = zarr.storage.CacheStore(
151+
cache = CacheStore(
140152
store=source_store,
141153
cache_store=cache_store,
142154
max_age_seconds="infinity"
@@ -145,16 +157,16 @@ cache = zarr.storage.CacheStore(
145157

146158
**cache_set_data**: Controls whether written data is cached
147159

148-
```python
160+
```python exec="true" session="experimental" source="above" result="ansi"
149161
# Cache data when writing (default)
150-
cache = zarr.storage.CacheStore(
162+
cache = CacheStore(
151163
store=source_store,
152164
cache_store=cache_store,
153165
cache_set_data=True
154166
)
155167

156168
# Don't cache written data (read-only cache)
157-
cache = zarr.storage.CacheStore(
169+
cache = CacheStore(
158170
store=source_store,
159171
cache_store=cache_store,
160172
cache_set_data=False
@@ -165,7 +177,7 @@ cache = zarr.storage.CacheStore(
165177

166178
The CacheStore provides statistics to monitor cache performance and state:
167179

168-
```python
180+
```python exec="true" session="experimental" source="above" result="ansi"
169181
# Access some data to generate cache activity
170182
data = zarr_array[0:50, 0:50] # First access - cache miss
171183
data = zarr_array[0:50, 0:50] # Second access - cache hit
@@ -187,7 +199,7 @@ The `cache_info()` method returns a dictionary with detailed information about t
187199

188200
The CacheStore provides methods for manual cache management:
189201

190-
```python
202+
```python exec="true" session="experimental" source="above" result="ansi"
191203
# Clear all cached data and tracking information
192204
import asyncio
193205
asyncio.run(cached_store.clear_cache())
@@ -217,11 +229,12 @@ and use any store type for the cache backend:
217229

218230
### Local Store with Memory Cache
219231

220-
```python
232+
```python exec="true" session="experimental-memory-cache" source="above" result="ansi"
221233
from zarr.storage import LocalStore, MemoryStore
234+
from zarr.experimental.cache_store import CacheStore
222235
source_store = LocalStore('data.zarr')
223236
cache_store = MemoryStore()
224-
cached_store = zarr.storage.CacheStore(
237+
cached_store = CacheStore(
225238
store=source_store,
226239
cache_store=cache_store,
227240
max_size=128*1024*1024
@@ -230,12 +243,25 @@ cached_store = zarr.storage.CacheStore(
230243

231244
### Remote Store with Local Cache
232245

233-
```python
234-
from zarr.storage import FsspecStore, LocalStore
235-
remote_store = FsspecStore.from_url('s3://bucket/data.zarr', storage_options={'anon': True})
246+
```python exec="true" session="experimental-remote-cache" source="above" result="ansi"
247+
# Remote store example (commented out as it requires network access)
248+
# from zarr.storage import FsspecStore, LocalStore
249+
# remote_store = FsspecStore.from_url('s3://bucket/data.zarr', storage_options={'anon': True})
250+
# local_cache = LocalStore('local_cache')
251+
# cached_store = CacheStore(
252+
# store=remote_store,
253+
# cache_store=local_cache,
254+
# max_size=1024*1024*1024,
255+
# max_age_seconds=3600
256+
# )
257+
258+
# Local store example for demonstration
259+
from zarr.storage import LocalStore
260+
from zarr.experimental.cache_store import CacheStore
261+
remote_like_store = LocalStore('remote_like_data.zarr')
236262
local_cache = LocalStore('local_cache')
237-
cached_store = zarr.storage.CacheStore(
238-
store=remote_store,
263+
cached_store = CacheStore(
264+
store=remote_like_store,
239265
cache_store=local_cache,
240266
max_size=1024*1024*1024,
241267
max_age_seconds=3600
@@ -244,11 +270,12 @@ cached_store = zarr.storage.CacheStore(
244270

245271
### Memory Store with Persistent Cache
246272

247-
```python
273+
```python exec="true" session="experimental-local-cache" source="above" result="ansi"
248274
from zarr.storage import MemoryStore, LocalStore
275+
from zarr.experimental.cache_store import CacheStore
249276
memory_store = MemoryStore()
250277
persistent_cache = LocalStore('persistent_cache')
251-
cached_store = zarr.storage.CacheStore(
278+
cached_store = CacheStore(
252279
store=memory_store,
253280
cache_store=persistent_cache,
254281
max_size=256*1024*1024
@@ -262,16 +289,17 @@ of source and cache stores for your specific use case.
262289

263290
Here's a complete example demonstrating cache effectiveness:
264291

265-
```python
292+
```python exec="true" session="experimental-final" source="above" result="ansi"
266293
import zarr
267294
import zarr.storage
268295
import time
269296
import numpy as np
297+
from zarr.experimental.cache_store import CacheStore
270298

271299
# Create test data with dual-store cache
272300
source_store = zarr.storage.LocalStore('benchmark.zarr')
273301
cache_store = zarr.storage.MemoryStore()
274-
cached_store = zarr.storage.CacheStore(
302+
cached_store = CacheStore(
275303
store=source_store,
276304
cache_store=cache_store,
277305
max_size=256*1024*1024
@@ -292,6 +320,7 @@ second_access = time.time() - start
292320
info = cached_store.cache_info()
293321
assert info['cached_keys'] > 0 # Should have cached keys
294322
assert info['current_size'] > 0 # Should have cached data
323+
print(f"Cache contains {info['cached_keys']} keys with {info['current_size']} bytes")
295324
```
296325

297326
This example shows how the CacheStore can significantly reduce access times for repeated

0 commit comments

Comments
 (0)