@@ -19,12 +19,18 @@ def process_url(
1919) -> FSStore :
2020 """Check read/write access to FSStore target and return FSStore with double caching.
2121
22- It uses a file cache (simplecache protocol from FSSpec) and an in-memory
23- Least Recently Used (LRU) cache implementation from zarr.
22+ It can use an in-memory Least Recently Used (LRU) cache implementation from
23+ Zarr, and optionally, a file cache (`simplecache` protocol from FSSpec) that
24+ is useful for remote stores.
2425
2526 File cache is only valid for remote stores. The LRU caching works
2627 on both remote and local.
2728
29+ The `storage_options` argument represents a set of parameters to be passed
30+ to the FSSpec backend. Note that the format of `storage_options` is
31+ different if `disk_cache` is enabled or disabled, since `disk_cache`
32+ interanlly uses the simplecache protocol.
33+
2834 Args:
2935 url: FSSpec compliant url
3036 mode: Toggle for overwriting existing store
@@ -35,21 +41,49 @@ def process_url(
3541 Returns:
3642 Store with augmentations like cache, write verification etc.
3743
44+ Examples:
45+ If we want to access an MDIO file from S3 without using disk caching,
46+ the simplecache protocol is not used, and therefore we only need to
47+ specify the s3 filesystem options:
48+
49+ >>> from mdio.api.convenience import process_url
50+ >>>
51+ >>>
52+ >>> process_url(
53+ ... url="s3://bucket/key",
54+ ... mode="r",
55+ ... storage_options={"key": "my_key", "secret": "my_secret"},
56+ ... memory_cache_size=0,
57+ ... disk_cache=False,
58+ ... )
59+
60+ On the other hand, if we want to use disk caching, we need to
61+ explicitly state that the options we are passing are for the S3
62+ filesystem:
63+
64+ >>> process_url(
65+ ... url="s3://bucket/key",
66+ ... mode="r",
67+ ... storage_options={"s3": {"key": "my_key", "secret": "my_secret"}},
68+ ... memory_cache_size=0,
69+ ... disk_cache=True,
70+ ... )
71+
72+ This allows us to pass options to the simplecache filesystem as well:
73+
74+ >>> process_url(
75+ ... url="s3://bucket/key",
76+ ... mode="r",
77+ ... storage_options={
78+ ... "s3": {"key": "my_key", "secret": "my_secret"},
79+ ... "simplecache": {"cache_storage": "custom/local/cache/path"},
80+ ... },
81+ ... memory_cache_size=0,
82+ ... disk_cache=True,
83+ ... )
3884 """
39- # Append simplecache (disk caching) protocol
40- # We need to change the storage options when caching is enabled.
41- # Example below. This allows you to configure the cache protocol as well if needed.
42- # storage_options_before = {'key': 'my_key', 'secret': 'my_secret'}
43- # storage_options_after = {'s3:' {'key': 'my_key', 'secret': 'my_secret'},
44- # 'simplecache': {'cache_storage': '/my/cache/path'}}
4585 if disk_cache is True :
4686 url = "::" .join (["simplecache" , url ])
47- if "s3://" in url :
48- storage_options = {"s3" : storage_options }
49- elif "gcs://" in url or "gs://" in url :
50- storage_options = {"gcs" : storage_options }
51- elif "az://" in url or "abfs://" in url :
52- storage_options = {"abfs" : storage_options }
5387
5488 # Strip whitespaces and slashes from end of string
5589 url = url .rstrip ("/ " )
0 commit comments