@@ -4,7 +4,7 @@ This section contains documentation for experimental Zarr Python features. The f
4
4
5
5
## ` CacheStore `
6
6
7
- Zarr Python 3.1.4 adds ` zarr.storage .CacheStore ` provides a dual-store caching implementation
7
+ Zarr Python 3.1.4 adds ` zarr.experimental.cache_store .CacheStore ` provides a dual-store caching implementation
8
8
that can be wrapped around any Zarr store to improve performance for repeated data access.
9
9
This is particularly useful when working with remote stores (e.g., S3, HTTP) where network
10
10
latency can significantly impact data access speed.
@@ -24,15 +24,16 @@ Because the `CacheStore` uses an ordinary Zarr `Store` object as the caching lay
24
24
Creating a CacheStore requires both a source store and a cache store. The cache store
25
25
can be any Store implementation, providing flexibility in cache persistence:
26
26
27
- ``` python
27
+ ``` python exec="true" session="experimental" source="above" result="ansi"
28
28
import zarr
29
29
import zarr.storage
30
30
import numpy as np
31
+ from zarr.experimental.cache_store import CacheStore
31
32
32
33
# Create a local store and a separate cache store
33
34
source_store = zarr.storage.LocalStore(' test.zarr' )
34
35
cache_store = zarr.storage.MemoryStore() # In-memory cache
35
- cached_store = zarr.storage. CacheStore(
36
+ cached_store = CacheStore(
36
37
store = source_store,
37
38
cache_store = cache_store,
38
39
max_size = 256 * 1024 * 1024 # 256MB cache
@@ -52,7 +53,7 @@ such as a remote store for source data and a local store for persistent caching.
52
53
53
54
The CacheStore provides significant performance improvements for repeated data access:
54
55
55
- ``` python
56
+ ``` python exec="true" session="experimental" source="above" result="ansi"
56
57
import time
57
58
58
59
# Benchmark reading with cache
@@ -80,23 +81,34 @@ The CacheStore is most beneficial when used with remote stores where network lat
80
81
is a significant factor. You can use different store types for source and cache:
81
82
82
83
``` python
83
- from zarr.storage import FsspecStore, LocalStore
84
-
85
- # Create a remote store (S3 example) - for demonstration only
86
- remote_store = FsspecStore.from_url(' s3://bucket/data.zarr' , storage_options = {' anon' : True })
87
-
88
- # Use a local store for persistent caching
89
- local_cache_store = LocalStore(' cache_data' )
90
-
91
- # Create cached store with persistent local cache
92
- cached_store = zarr.storage.CacheStore(
93
- store = remote_store,
94
- cache_store = local_cache_store,
84
+ # This example shows remote store setup but requires network access
85
+ # from zarr.storage import FsspecStore, LocalStore
86
+
87
+ # # Create a remote store (S3 example) - for demonstration only
88
+ # remote_store = FsspecStore.from_url('s3://bucket/data.zarr', storage_options={'anon': True})
89
+
90
+ # # Use a local store for persistent caching
91
+ # local_cache_store = LocalStore('cache_data')
92
+
93
+ # # Create cached store with persistent local cache
94
+ # cached_store = CacheStore(
95
+ # store=remote_store,
96
+ # cache_store=local_cache_store,
97
+ # max_size=512*1024*1024 # 512MB cache
98
+ # )
99
+
100
+ # # Open array through cached store
101
+ # z = zarr.open(cached_store)
102
+
103
+ # For demonstration, use local stores instead
104
+ from zarr.storage import LocalStore
105
+ local_source = LocalStore(' remote_data.zarr' )
106
+ local_cache = LocalStore(' cache_data' )
107
+ cached_store = CacheStore(
108
+ store = local_source,
109
+ cache_store = local_cache,
95
110
max_size = 512 * 1024 * 1024 # 512MB cache
96
111
)
97
-
98
- # Open array through cached store
99
- z = zarr.open(cached_store)
100
112
```
101
113
102
114
The first access to any chunk will be slow (network retrieval), but subsequent accesses
@@ -109,16 +121,16 @@ The CacheStore can be configured with several parameters:
109
121
110
122
** max_size** : Controls the maximum size of cached data in bytes
111
123
112
- ``` python
124
+ ``` python exec="true" session="experimental" source="above" result="ansi"
113
125
# 256MB cache with size limit
114
- cache = zarr.storage. CacheStore(
126
+ cache = CacheStore(
115
127
store = source_store,
116
128
cache_store = cache_store,
117
129
max_size = 256 * 1024 * 1024
118
130
)
119
131
120
132
# Unlimited cache size (use with caution)
121
- cache = zarr.storage. CacheStore(
133
+ cache = CacheStore(
122
134
store = source_store,
123
135
cache_store = cache_store,
124
136
max_size = None
@@ -127,16 +139,16 @@ cache = zarr.storage.CacheStore(
127
139
128
140
** max_age_seconds** : Controls time-based cache expiration
129
141
130
- ``` python
142
+ ``` python exec="true" session="experimental" source="above" result="ansi"
131
143
# Cache expires after 1 hour
132
- cache = zarr.storage. CacheStore(
144
+ cache = CacheStore(
133
145
store = source_store,
134
146
cache_store = cache_store,
135
147
max_age_seconds = 3600
136
148
)
137
149
138
150
# Cache never expires
139
- cache = zarr.storage. CacheStore(
151
+ cache = CacheStore(
140
152
store = source_store,
141
153
cache_store = cache_store,
142
154
max_age_seconds = " infinity"
@@ -145,16 +157,16 @@ cache = zarr.storage.CacheStore(
145
157
146
158
** cache_set_data** : Controls whether written data is cached
147
159
148
- ``` python
160
+ ``` python exec="true" session="experimental" source="above" result="ansi"
149
161
# Cache data when writing (default)
150
- cache = zarr.storage. CacheStore(
162
+ cache = CacheStore(
151
163
store = source_store,
152
164
cache_store = cache_store,
153
165
cache_set_data = True
154
166
)
155
167
156
168
# Don't cache written data (read-only cache)
157
- cache = zarr.storage. CacheStore(
169
+ cache = CacheStore(
158
170
store = source_store,
159
171
cache_store = cache_store,
160
172
cache_set_data = False
@@ -165,7 +177,7 @@ cache = zarr.storage.CacheStore(
165
177
166
178
The CacheStore provides statistics to monitor cache performance and state:
167
179
168
- ``` python
180
+ ``` python exec="true" session="experimental" source="above" result="ansi"
169
181
# Access some data to generate cache activity
170
182
data = zarr_array[0 :50 , 0 :50 ] # First access - cache miss
171
183
data = zarr_array[0 :50 , 0 :50 ] # Second access - cache hit
@@ -187,7 +199,7 @@ The `cache_info()` method returns a dictionary with detailed information about t
187
199
188
200
The CacheStore provides methods for manual cache management:
189
201
190
- ``` python
202
+ ``` python exec="true" session="experimental" source="above" result="ansi"
191
203
# Clear all cached data and tracking information
192
204
import asyncio
193
205
asyncio.run(cached_store.clear_cache())
@@ -217,11 +229,12 @@ and use any store type for the cache backend:
217
229
218
230
### Local Store with Memory Cache
219
231
220
- ``` python
232
+ ``` python exec="true" session="experimental-memory-cache" source="above" result="ansi"
221
233
from zarr.storage import LocalStore, MemoryStore
234
+ from zarr.experimental.cache_store import CacheStore
222
235
source_store = LocalStore(' data.zarr' )
223
236
cache_store = MemoryStore()
224
- cached_store = zarr.storage. CacheStore(
237
+ cached_store = CacheStore(
225
238
store = source_store,
226
239
cache_store = cache_store,
227
240
max_size = 128 * 1024 * 1024
@@ -230,12 +243,25 @@ cached_store = zarr.storage.CacheStore(
230
243
231
244
### Remote Store with Local Cache
232
245
233
- ``` python
234
- from zarr.storage import FsspecStore, LocalStore
235
- remote_store = FsspecStore.from_url(' s3://bucket/data.zarr' , storage_options = {' anon' : True })
246
+ ``` python exec="true" session="experimental-remote-cache" source="above" result="ansi"
247
+ # Remote store example (commented out as it requires network access)
248
+ # from zarr.storage import FsspecStore, LocalStore
249
+ # remote_store = FsspecStore.from_url('s3://bucket/data.zarr', storage_options={'anon': True})
250
+ # local_cache = LocalStore('local_cache')
251
+ # cached_store = CacheStore(
252
+ # store=remote_store,
253
+ # cache_store=local_cache,
254
+ # max_size=1024*1024*1024,
255
+ # max_age_seconds=3600
256
+ # )
257
+
258
+ # Local store example for demonstration
259
+ from zarr.storage import LocalStore
260
+ from zarr.experimental.cache_store import CacheStore
261
+ remote_like_store = LocalStore(' remote_like_data.zarr' )
236
262
local_cache = LocalStore(' local_cache' )
237
- cached_store = zarr.storage. CacheStore(
238
- store = remote_store ,
263
+ cached_store = CacheStore(
264
+ store = remote_like_store ,
239
265
cache_store = local_cache,
240
266
max_size = 1024 * 1024 * 1024 ,
241
267
max_age_seconds = 3600
@@ -244,11 +270,12 @@ cached_store = zarr.storage.CacheStore(
244
270
245
271
### Memory Store with Persistent Cache
246
272
247
- ``` python
273
+ ``` python exec="true" session="experimental-local-cache" source="above" result="ansi"
248
274
from zarr.storage import MemoryStore, LocalStore
275
+ from zarr.experimental.cache_store import CacheStore
249
276
memory_store = MemoryStore()
250
277
persistent_cache = LocalStore(' persistent_cache' )
251
- cached_store = zarr.storage. CacheStore(
278
+ cached_store = CacheStore(
252
279
store = memory_store,
253
280
cache_store = persistent_cache,
254
281
max_size = 256 * 1024 * 1024
@@ -262,16 +289,17 @@ of source and cache stores for your specific use case.
262
289
263
290
Here's a complete example demonstrating cache effectiveness:
264
291
265
- ``` python
292
+ ``` python exec="true" session="experimental-final" source="above" result="ansi"
266
293
import zarr
267
294
import zarr.storage
268
295
import time
269
296
import numpy as np
297
+ from zarr.experimental.cache_store import CacheStore
270
298
271
299
# Create test data with dual-store cache
272
300
source_store = zarr.storage.LocalStore(' benchmark.zarr' )
273
301
cache_store = zarr.storage.MemoryStore()
274
- cached_store = zarr.storage. CacheStore(
302
+ cached_store = CacheStore(
275
303
store = source_store,
276
304
cache_store = cache_store,
277
305
max_size = 256 * 1024 * 1024
@@ -292,6 +320,7 @@ second_access = time.time() - start
292
320
info = cached_store.cache_info()
293
321
assert info[' cached_keys' ] > 0 # Should have cached keys
294
322
assert info[' current_size' ] > 0 # Should have cached data
323
+ print (f " Cache contains { info[' cached_keys' ]} keys with { info[' current_size' ]} bytes " )
295
324
```
296
325
297
326
This example shows how the CacheStore can significantly reduce access times for repeated
0 commit comments