@@ -36,8 +36,8 @@ can be any Store implementation, providing flexibility in cache persistence:
36
36
>>> source_store = zarr.storage.LocalStore(' test.zarr' )
37
37
>>> cache_store = zarr.storage.MemoryStore() # In-memory cache
38
38
>>> cached_store = zarr.storage.CacheStore(
39
- ... store= source_store,
40
- ... cache_store= cache_store,
39
+ ... store= source_store,
40
+ ... cache_store= cache_store,
41
41
... max_size= 256 * 1024 * 1024 # 256MB cache
42
42
... )
43
43
>>>
@@ -83,21 +83,21 @@ is a significant factor. You can use different store types for source and cache:
83
83
84
84
>>> from zarr.storage import FsspecStore, LocalStore
85
85
>>>
86
- >>> # Create a remote store (S3 example)
87
- >>> remote_store = FsspecStore.from_url(' s3://bucket/data.zarr' , storage_options = {' anon' : True })
88
- >>>
89
- >>> # Use a local store for persistent caching
90
- >>> local_cache_store = LocalStore(' cache_data' )
91
- >>>
86
+ >>> # Create a remote store (S3 example) - for demonstration only
87
+ >>> remote_store = FsspecStore.from_url(' s3://bucket/data.zarr' , storage_options = {' anon' : True }) # doctest: +SKIP
88
+ >>>
89
+ >>> # Use a local store for persistent caching
90
+ >>> local_cache_store = LocalStore(' cache_data' ) # doctest: +SKIP
91
+ >>>
92
92
>>> # Create cached store with persistent local cache
93
- >>> cached_store = zarr.storage.CacheStore(
93
+ >>> cached_store = zarr.storage.CacheStore( # doctest: +SKIP
94
94
... store= remote_store,
95
95
... cache_store= local_cache_store,
96
96
... max_size= 512 * 1024 * 1024 # 512MB cache
97
97
... )
98
98
>>>
99
- >>> # Open array through cached store
100
- >>> z = zarr.open(cached_store)
99
+ >>> # Open array through cached store
100
+ >>> z = zarr.open(cached_store) # doctest: +SKIP
101
101
102
102
The first access to any chunk will be slow (network retrieval), but subsequent accesses
103
103
to the same chunk will be served from the local cache, providing dramatic speedup.
@@ -177,7 +177,7 @@ The CacheStore provides statistics to monitor cache performance and state:
177
177
True
178
178
>>> info[' tracked_keys' ] >= 0
179
179
True
180
- >>> info[' cached_keys' ] >= 0
180
+ >>> info[' cached_keys' ] >= 0
181
181
True
182
182
>>> isinstance (info[' cache_set_data' ], bool )
183
183
True
@@ -193,14 +193,14 @@ The CacheStore provides methods for manual cache management:
193
193
>>> import asyncio
194
194
>>> asyncio.run(cached_store.clear_cache()) # doctest: +SKIP
195
195
>>>
196
- >>> # Check cache info after clearing
196
+ >>> # Check cache info after clearing
197
197
>>> info = cached_store.cache_info() # doctest: +SKIP
198
198
>>> info[' tracked_keys' ] == 0 # doctest: +SKIP
199
199
True
200
- >>> info[' current_size' ] == 0 # doctest: +SKIP
200
+ >>> info[' current_size' ] == 0 # doctest: +SKIP
201
201
True
202
202
203
- The `clear_cache() ` method is an async method that clears both the cache store
203
+ The `clear_cache() ` method is an async method that clears both the cache store
204
204
(if it supports the `clear ` method) and all internal tracking data.
205
205
206
206
Best Practices
@@ -235,9 +235,9 @@ Remote Store with Local Cache
235
235
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
236
236
237
237
>>> from zarr.storage import FsspecStore, LocalStore
238
- >>> remote_store = FsspecStore.from_url(' s3://bucket/data.zarr' , storage_options = {' anon' : True })
239
- >>> local_cache = LocalStore(' local_cache' )
240
- >>> cached_store = zarr.storage.CacheStore(
238
+ >>> remote_store = FsspecStore.from_url(' s3://bucket/data.zarr' , storage_options = {' anon' : True }) # doctest: +SKIP
239
+ >>> local_cache = LocalStore(' local_cache' ) # doctest: +SKIP
240
+ >>> cached_store = zarr.storage.CacheStore( # doctest: +SKIP
241
241
... store= remote_store,
242
242
... cache_store= local_cache,
243
243
... max_size= 1024 * 1024 * 1024 ,
@@ -286,7 +286,7 @@ Here's a complete example demonstrating cache effectiveness:
286
286
>>> first_access = time.time() - start
287
287
>>>
288
288
>>> start = time.time()
289
- >>> data = zarr_array[20 :30 , 20 :30 ] # Second access (cache hit)
289
+ >>> data = zarr_array[20 :30 , 20 :30 ] # Second access (cache hit)
290
290
>>> second_access = time.time() - start
291
291
>>>
292
292
>>> # Check cache statistics
0 commit comments