@@ -4,7 +4,7 @@ This section contains documentation for experimental Zarr Python features. The f
44
55## ` CacheStore `
66
7- Zarr Python 3.1.4 adds ` zarr.storage .CacheStore ` provides a dual-store caching implementation
7+ Zarr Python 3.1.4 adds ` zarr.experimental.cache_store .CacheStore ` provides a dual-store caching implementation
88that can be wrapped around any Zarr store to improve performance for repeated data access.
99This is particularly useful when working with remote stores (e.g., S3, HTTP) where network
1010latency can significantly impact data access speed.
@@ -24,15 +24,16 @@ Because the `CacheStore` uses an ordinary Zarr `Store` object as the caching lay
2424Creating a CacheStore requires both a source store and a cache store. The cache store
2525can be any Store implementation, providing flexibility in cache persistence:
2626
27- ``` python
27+ ``` python exec="true" session="experimental" source="above" result="ansi"
2828import zarr
2929import zarr.storage
3030import numpy as np
31+ from zarr.experimental.cache_store import CacheStore
3132
3233# Create a local store and a separate cache store
3334source_store = zarr.storage.LocalStore(' test.zarr' )
3435cache_store = zarr.storage.MemoryStore() # In-memory cache
35- cached_store = zarr.storage. CacheStore(
36+ cached_store = CacheStore(
3637 store = source_store,
3738 cache_store = cache_store,
3839 max_size = 256 * 1024 * 1024 # 256MB cache
@@ -52,7 +53,7 @@ such as a remote store for source data and a local store for persistent caching.
5253
5354The CacheStore provides significant performance improvements for repeated data access:
5455
55- ``` python
56+ ``` python exec="true" session="experimental" source="above" result="ansi"
5657import time
5758
5859# Benchmark reading with cache
@@ -80,23 +81,34 @@ The CacheStore is most beneficial when used with remote stores where network lat
8081is a significant factor. You can use different store types for source and cache:
8182
8283``` python
83- from zarr.storage import FsspecStore, LocalStore
84-
85- # Create a remote store (S3 example) - for demonstration only
86- remote_store = FsspecStore.from_url(' s3://bucket/data.zarr' , storage_options = {' anon' : True })
87-
88- # Use a local store for persistent caching
89- local_cache_store = LocalStore(' cache_data' )
90-
91- # Create cached store with persistent local cache
92- cached_store = zarr.storage.CacheStore(
93- store = remote_store,
94- cache_store = local_cache_store,
84+ # This example shows remote store setup but requires network access
85+ # from zarr.storage import FsspecStore, LocalStore
86+
87+ # # Create a remote store (S3 example) - for demonstration only
88+ # remote_store = FsspecStore.from_url('s3://bucket/data.zarr', storage_options={'anon': True})
89+
90+ # # Use a local store for persistent caching
91+ # local_cache_store = LocalStore('cache_data')
92+
93+ # # Create cached store with persistent local cache
94+ # cached_store = CacheStore(
95+ # store=remote_store,
96+ # cache_store=local_cache_store,
97+ # max_size=512*1024*1024 # 512MB cache
98+ # )
99+
100+ # # Open array through cached store
101+ # z = zarr.open(cached_store)
102+
103+ # For demonstration, use local stores instead
104+ from zarr.storage import LocalStore
105+ local_source = LocalStore(' remote_data.zarr' )
106+ local_cache = LocalStore(' cache_data' )
107+ cached_store = CacheStore(
108+ store = local_source,
109+ cache_store = local_cache,
95110 max_size = 512 * 1024 * 1024 # 512MB cache
96111)
97-
98- # Open array through cached store
99- z = zarr.open(cached_store)
100112```
101113
102114The first access to any chunk will be slow (network retrieval), but subsequent accesses
@@ -109,16 +121,16 @@ The CacheStore can be configured with several parameters:
109121
110122** max_size** : Controls the maximum size of cached data in bytes
111123
112- ``` python
124+ ``` python exec="true" session="experimental" source="above" result="ansi"
113125# 256MB cache with size limit
114- cache = zarr.storage. CacheStore(
126+ cache = CacheStore(
115127 store = source_store,
116128 cache_store = cache_store,
117129 max_size = 256 * 1024 * 1024
118130)
119131
120132# Unlimited cache size (use with caution)
121- cache = zarr.storage. CacheStore(
133+ cache = CacheStore(
122134 store = source_store,
123135 cache_store = cache_store,
124136 max_size = None
@@ -127,16 +139,16 @@ cache = zarr.storage.CacheStore(
127139
128140** max_age_seconds** : Controls time-based cache expiration
129141
130- ``` python
142+ ``` python exec="true" session="experimental" source="above" result="ansi"
131143# Cache expires after 1 hour
132- cache = zarr.storage. CacheStore(
144+ cache = CacheStore(
133145 store = source_store,
134146 cache_store = cache_store,
135147 max_age_seconds = 3600
136148)
137149
138150# Cache never expires
139- cache = zarr.storage. CacheStore(
151+ cache = CacheStore(
140152 store = source_store,
141153 cache_store = cache_store,
142154 max_age_seconds = " infinity"
@@ -145,16 +157,16 @@ cache = zarr.storage.CacheStore(
145157
146158** cache_set_data** : Controls whether written data is cached
147159
148- ``` python
160+ ``` python exec="true" session="experimental" source="above" result="ansi"
149161# Cache data when writing (default)
150- cache = zarr.storage. CacheStore(
162+ cache = CacheStore(
151163 store = source_store,
152164 cache_store = cache_store,
153165 cache_set_data = True
154166)
155167
156168# Don't cache written data (read-only cache)
157- cache = zarr.storage. CacheStore(
169+ cache = CacheStore(
158170 store = source_store,
159171 cache_store = cache_store,
160172 cache_set_data = False
@@ -165,7 +177,7 @@ cache = zarr.storage.CacheStore(
165177
166178The CacheStore provides statistics to monitor cache performance and state:
167179
168- ``` python
180+ ``` python exec="true" session="experimental" source="above" result="ansi"
169181# Access some data to generate cache activity
170182data = zarr_array[0 :50 , 0 :50 ] # First access - cache miss
171183data = zarr_array[0 :50 , 0 :50 ] # Second access - cache hit
@@ -187,7 +199,7 @@ The `cache_info()` method returns a dictionary with detailed information about t
187199
188200The CacheStore provides methods for manual cache management:
189201
190- ``` python
202+ ``` python exec="true" session="experimental" source="above" result="ansi"
191203# Clear all cached data and tracking information
192204import asyncio
193205asyncio.run(cached_store.clear_cache())
@@ -217,11 +229,12 @@ and use any store type for the cache backend:
217229
218230### Local Store with Memory Cache
219231
220- ``` python
232+ ``` python exec="true" session="experimental-memory-cache" source="above" result="ansi"
221233from zarr.storage import LocalStore, MemoryStore
234+ from zarr.experimental.cache_store import CacheStore
222235source_store = LocalStore(' data.zarr' )
223236cache_store = MemoryStore()
224- cached_store = zarr.storage. CacheStore(
237+ cached_store = CacheStore(
225238 store = source_store,
226239 cache_store = cache_store,
227240 max_size = 128 * 1024 * 1024
@@ -230,12 +243,25 @@ cached_store = zarr.storage.CacheStore(
230243
231244### Remote Store with Local Cache
232245
233- ``` python
234- from zarr.storage import FsspecStore, LocalStore
235- remote_store = FsspecStore.from_url(' s3://bucket/data.zarr' , storage_options = {' anon' : True })
246+ ``` python exec="true" session="experimental-remote-cache" source="above" result="ansi"
247+ # Remote store example (commented out as it requires network access)
248+ # from zarr.storage import FsspecStore, LocalStore
249+ # remote_store = FsspecStore.from_url('s3://bucket/data.zarr', storage_options={'anon': True})
250+ # local_cache = LocalStore('local_cache')
251+ # cached_store = CacheStore(
252+ # store=remote_store,
253+ # cache_store=local_cache,
254+ # max_size=1024*1024*1024,
255+ # max_age_seconds=3600
256+ # )
257+
258+ # Local store example for demonstration
259+ from zarr.storage import LocalStore
260+ from zarr.experimental.cache_store import CacheStore
261+ remote_like_store = LocalStore(' remote_like_data.zarr' )
236262local_cache = LocalStore(' local_cache' )
237- cached_store = zarr.storage. CacheStore(
238- store = remote_store ,
263+ cached_store = CacheStore(
264+ store = remote_like_store ,
239265 cache_store = local_cache,
240266 max_size = 1024 * 1024 * 1024 ,
241267 max_age_seconds = 3600
@@ -244,11 +270,12 @@ cached_store = zarr.storage.CacheStore(
244270
245271### Memory Store with Persistent Cache
246272
247- ``` python
273+ ``` python exec="true" session="experimental-local-cache" source="above" result="ansi"
248274from zarr.storage import MemoryStore, LocalStore
275+ from zarr.experimental.cache_store import CacheStore
249276memory_store = MemoryStore()
250277persistent_cache = LocalStore(' persistent_cache' )
251- cached_store = zarr.storage. CacheStore(
278+ cached_store = CacheStore(
252279 store = memory_store,
253280 cache_store = persistent_cache,
254281 max_size = 256 * 1024 * 1024
@@ -262,16 +289,17 @@ of source and cache stores for your specific use case.
262289
263290Here's a complete example demonstrating cache effectiveness:
264291
265- ``` python
292+ ``` python exec="true" session="experimental-final" source="above" result="ansi"
266293import zarr
267294import zarr.storage
268295import time
269296import numpy as np
297+ from zarr.experimental.cache_store import CacheStore
270298
271299# Create test data with dual-store cache
272300source_store = zarr.storage.LocalStore(' benchmark.zarr' )
273301cache_store = zarr.storage.MemoryStore()
274- cached_store = zarr.storage. CacheStore(
302+ cached_store = CacheStore(
275303 store = source_store,
276304 cache_store = cache_store,
277305 max_size = 256 * 1024 * 1024
@@ -292,6 +320,7 @@ second_access = time.time() - start
292320info = cached_store.cache_info()
293321assert info[' cached_keys' ] > 0 # Should have cached keys
294322assert info[' current_size' ] > 0 # Should have cached data
323+ print (f " Cache contains { info[' cached_keys' ]} keys with { info[' current_size' ]} bytes " )
295324```
296325
297326This example shows how the CacheStore can significantly reduce access times for repeated
0 commit comments