Skip to content

Commit d449676

Browse files
authored
Merge pull request #62 from sanath-2024/main
Make `storage_options` consistent with fsspec to be able to pass local cache directory
2 parents 2dd5887 + 870a67a commit d449676

File tree

4 files changed

+66
-15
lines changed

4 files changed

+66
-15
lines changed

src/mdio/api/io_utils.py

Lines changed: 48 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,18 @@ def process_url(
1919
) -> FSStore:
2020
"""Check read/write access to FSStore target and return FSStore with double caching.
2121
22-
It uses a file cache (simplecache protocol from FSSpec) and an in-memory
23-
Least Recently Used (LRU) cache implementation from zarr.
22+
It can use an in-memory Least Recently Used (LRU) cache implementation from
23+
Zarr, and optionally, a file cache (`simplecache` protocol from FSSpec) that
24+
is useful for remote stores.
2425
2526
File cache is only valid for remote stores. The LRU caching works
2627
on both remote and local.
2728
29+
The `storage_options` argument represents a set of parameters to be passed
30+
to the FSSpec backend. Note that the format of `storage_options` is
31+
different if `disk_cache` is enabled or disabled, since `disk_cache`
32+
interanlly uses the simplecache protocol.
33+
2834
Args:
2935
url: FSSpec compliant url
3036
mode: Toggle for overwriting existing store
@@ -35,21 +41,49 @@ def process_url(
3541
Returns:
3642
Store with augmentations like cache, write verification etc.
3743
44+
Examples:
45+
If we want to access an MDIO file from S3 without using disk caching,
46+
the simplecache protocol is not used, and therefore we only need to
47+
specify the s3 filesystem options:
48+
49+
>>> from mdio.api.convenience import process_url
50+
>>>
51+
>>>
52+
>>> process_url(
53+
... url="s3://bucket/key",
54+
... mode="r",
55+
... storage_options={"key": "my_key", "secret": "my_secret"},
56+
... memory_cache_size=0,
57+
... disk_cache=False,
58+
... )
59+
60+
On the other hand, if we want to use disk caching, we need to
61+
explicitly state that the options we are passing are for the S3
62+
filesystem:
63+
64+
>>> process_url(
65+
... url="s3://bucket/key",
66+
... mode="r",
67+
... storage_options={"s3": {"key": "my_key", "secret": "my_secret"}},
68+
... memory_cache_size=0,
69+
... disk_cache=True,
70+
... )
71+
72+
This allows us to pass options to the simplecache filesystem as well:
73+
74+
>>> process_url(
75+
... url="s3://bucket/key",
76+
... mode="r",
77+
... storage_options={
78+
... "s3": {"key": "my_key", "secret": "my_secret"},
79+
... "simplecache": {"cache_storage": "custom/local/cache/path"},
80+
... },
81+
... memory_cache_size=0,
82+
... disk_cache=True,
83+
... )
3884
"""
39-
# Append simplecache (disk caching) protocol
40-
# We need to change the storage options when caching is enabled.
41-
# Example below. This allows you to configure the cache protocol as well if needed.
42-
# storage_options_before = {'key': 'my_key', 'secret': 'my_secret'}
43-
# storage_options_after = {'s3:' {'key': 'my_key', 'secret': 'my_secret'},
44-
# 'simplecache': {'cache_storage': '/my/cache/path'}}
4585
if disk_cache is True:
4686
url = "::".join(["simplecache", url])
47-
if "s3://" in url:
48-
storage_options = {"s3": storage_options}
49-
elif "gcs://" in url or "gs://" in url:
50-
storage_options = {"gcs": storage_options}
51-
elif "az://" in url or "abfs://" in url:
52-
storage_options = {"abfs": storage_options}
5387

5488
# Strip whitespaces and slashes from end of string
5589
url = url.rstrip("/ ")

src/mdio/segy/creation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def mdio_spec_to_segy(
2626
):
2727
"""Create SEG-Y file without any traces given MDIO specification.
2828
29-
This function opens a MDIO file, gets some relevant information for SEG-Y files,
29+
This function opens an MDIO file, gets some relevant information for SEG-Y files,
3030
then creates a SEG-Y file with the specification it read from the MDIO file.
3131
3232
It then returns the `MDIOReader` instance, and the parsed floating point format

tests/unit/conftest.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,3 +148,13 @@ def mock_mdio(
148148
def mock_reader(mock_mdio: Group) -> MDIOReader:
149149
"""Reader that points to the mocked data to be used later."""
150150
return MDIOReader(mock_mdio.store.path)
151+
152+
153+
@pytest.fixture
154+
def mock_reader_cached(mock_mdio: Group) -> MDIOReader:
155+
"""Reader that points to the mocked data to be used later. (with local caching)."""
156+
return MDIOReader(
157+
mock_mdio.store.path,
158+
disk_cache=True,
159+
storage_options={"simplecache": {"cache_storage": "./mdio_test_cache"}},
160+
)

tests/unit/test_accessor.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
"""Test for MDIO accessors."""
22

3+
import os
4+
import shutil
35

46
import numpy as np
57
import numpy.testing as npt
@@ -83,6 +85,11 @@ def test_coord_slicing(
8385
for act_idx, exp_idx in zip(z_indices, z_index):
8486
npt.assert_array_equal(mock_reader[..., act_idx], mock_data[..., exp_idx])
8587

88+
def test_local_caching(self, mock_reader_cached):
89+
"""Test local caching."""
90+
assert os.path.isdir("./mdio_test_cache")
91+
shutil.rmtree("./mdio_test_cache")
92+
8693

8794
class TestExceptions:
8895
"""Test custom exceptions and if they're raised properly."""

0 commit comments

Comments
 (0)