Skip to content

Commit e3b20e1

Browse files
authored
Add ObstoreMemCacheReader (#2)
1 parent 54bc685 commit e3b20e1

File tree

3 files changed

+84
-3
lines changed

3 files changed

+84
-3
lines changed

src/obspec_utils/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
from ._version import __version__
2-
from .file_handlers import ObstoreReader
2+
from .file_handlers import ObstoreMemCacheReader, ObstoreReader
33

4-
__all__ = ["__version__", "ObstoreReader"]
4+
__all__ = ["__version__", "ObstoreMemCacheReader", "ObstoreReader"]

src/obspec_utils/file_handlers.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
from obstore import ReadableFile
99
from obstore.store import ObjectStore
1010

11+
from obstore.store import MemoryStore
12+
1113

1214
class ObstoreReader:
1315
_reader: ReadableFile
@@ -40,3 +42,30 @@ def seek(self, offset: int, whence: int = 0, /):
4042

4143
def tell(self) -> int:
4244
return self._reader.tell()
45+
46+
47+
class ObstoreMemCacheReader(ObstoreReader):
48+
_reader: ReadableFile
49+
_memstore: MemoryStore
50+
51+
def __init__(self, store: ObjectStore, path: str) -> None:
52+
"""
53+
Create an obstore file reader that caches the specified path
54+
in a MemoryStore then performs reads from the file in memory.
55+
56+
This reader loads the entire file into memory first, which can be beneficial
57+
for files that will be read multiple times or when you want to avoid repeated
58+
network requests to the original store.
59+
60+
Parameters
61+
----------
62+
store
63+
[ObjectStore][obstore.store.ObjectStore] for reading the file.
64+
path
65+
The path to the file within the store. This should not include the prefix.
66+
"""
67+
self._memstore = MemoryStore()
68+
buffer = store.get(path).bytes()
69+
self._memstore.put(path, buffer)
70+
71+
self._reader = obs.open_reader(self._memstore, path)

tests/test_xarray.py

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import xarray as xr
2-
from obspec_utils import ObstoreReader
2+
from obspec_utils import ObstoreMemCacheReader, ObstoreReader
33
from obstore.store import LocalStore
44

55

@@ -8,3 +8,55 @@ def test_local_reader(local_netcdf4_file) -> None:
88
reader = ObstoreReader(store=LocalStore(), path=local_netcdf4_file)
99
ds_obstore = xr.open_dataset(reader, engine="h5netcdf")
1010
xr.testing.assert_allclose(ds_fsspec, ds_obstore)
11+
12+
13+
def test_memcache_reader(local_netcdf4_file) -> None:
14+
"""Test that ObstoreMemCacheReader works with xarray."""
15+
ds_fsspec = xr.open_dataset(local_netcdf4_file, engine="h5netcdf")
16+
reader = ObstoreMemCacheReader(store=LocalStore(), path=local_netcdf4_file)
17+
ds_obstore = xr.open_dataset(reader, engine="h5netcdf")
18+
xr.testing.assert_allclose(ds_fsspec, ds_obstore)
19+
20+
21+
def test_memcache_reader_interface(local_netcdf4_file) -> None:
22+
"""Test that ObstoreMemCacheReader implements the same interface as ObstoreReader."""
23+
store = LocalStore()
24+
regular_reader = ObstoreReader(store=store, path=local_netcdf4_file)
25+
memcache_reader = ObstoreMemCacheReader(store=store, path=local_netcdf4_file)
26+
27+
# Test readall
28+
data_regular = regular_reader.readall()
29+
data_memcache = memcache_reader.readall()
30+
assert data_regular == data_memcache
31+
assert isinstance(data_memcache, bytes)
32+
33+
34+
def test_memcache_reader_multiple_reads(local_netcdf4_file) -> None:
35+
"""Test that ObstoreMemCacheReader can perform multiple reads."""
36+
store = LocalStore()
37+
reader = ObstoreMemCacheReader(store=store, path=local_netcdf4_file)
38+
39+
# Read the first 100 bytes
40+
chunk1 = reader.read(100)
41+
assert len(chunk1) == 100
42+
assert isinstance(chunk1, bytes)
43+
44+
# Read the next 100 bytes
45+
chunk2 = reader.read(100)
46+
assert len(chunk2) == 100
47+
assert isinstance(chunk2, bytes)
48+
49+
# The two chunks should be different (different parts of the file)
50+
assert chunk1 != chunk2
51+
52+
# Test tell
53+
position = reader.tell()
54+
assert position == 200
55+
56+
# Test seek
57+
reader.seek(0)
58+
assert reader.tell() == 0
59+
60+
# Re-reading from the beginning should give us the same data
61+
chunk1_again = reader.read(100)
62+
assert chunk1 == chunk1_again

0 commit comments

Comments
 (0)