Skip to content

Commit ccacc17

Browse files
committed
fix: implement cache touch in board, fix rsc cache
1 parent 9ccfafe commit ccacc17

File tree

6 files changed

+142
-22
lines changed

6 files changed

+142
-22
lines changed

pins/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,4 @@
1111
# Imports ----
1212
from .boards import BaseBoard
1313
from .constructors import *
14+
from .cache import PinsCache, PinsUrlCache

pins/boards.py

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -148,9 +148,14 @@ def pin_meta(self, name, version: str = None) -> Meta:
148148
components = [pin_name, selected_version.version]
149149
meta_name = self.meta_factory.get_meta_name(*components)
150150

151-
path_version = self.construct_path([*components, meta_name])
152-
f = self.fs.open(path_version)
153-
return self.meta_factory.read_pin_yaml(f, pin_name, selected_version)
151+
path_meta = self.construct_path([*components, meta_name])
152+
f = self.fs.open(path_meta)
153+
154+
meta = self.meta_factory.read_pin_yaml(f, pin_name, selected_version)
155+
156+
self._touch_cache(path_meta)
157+
158+
return meta
154159

155160
def pin_list(self):
156161
"""List names of all pins in a board.
@@ -561,6 +566,23 @@ def _extract_search_meta(self, meta):
561566
d["meta"] = meta
562567
return d
563568

569+
def _get_cache_path(self, pin_name, version):
570+
p_version = self.construct_path([self.path_to_pin(pin_name), version])
571+
hash = self.fs.hash_name(p_version, True)
572+
return str(Path(self.fs.storage[-1]) / hash)
573+
574+
def _touch_cache(self, path):
575+
from pins.cache import touch_access_time
576+
577+
# TODO: assumes same_name set to True. Let's require this be set to
578+
# instantiate a pins cache.
579+
if not hasattr(self.fs, "cached_files"):
580+
return
581+
582+
hash = self.fs.hash_name(path, True)
583+
path_to_hashed = Path(self.fs.storage[-1]) / hash
584+
return touch_access_time(path_to_hashed)
585+
564586

565587
class BoardManual(BaseBoard):
566588
"""Simple board that accepts a dictionary of form pin_name: path.
@@ -614,7 +636,9 @@ def pin_meta(self, name, version=None):
614636

615637
path_meta = self.construct_path([pin_name, meta_name])
616638
f = self.fs.open(path_meta)
617-
return self.meta_factory.read_pin_yaml(f, pin_name, VersionRaw(""))
639+
meta = self.meta_factory.read_pin_yaml(f, pin_name, VersionRaw(""))
640+
641+
return meta
618642

619643
def pin_download(self, name, version=None, hash=None) -> Sequence[str]:
620644
meta = self.pin_meta(name, version)

pins/cache.py

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
PLACEHOLDER_FILE = "file"
1717

1818

19-
def touch_access_time(path, access_time: "float | None" = None):
19+
def touch_access_time(path, access_time: "float | None" = None, strict=True):
2020
"""Update access time of file.
2121
2222
Returns the new access time.
@@ -27,7 +27,7 @@ def touch_access_time(path, access_time: "float | None" = None):
2727

2828
p = Path(path)
2929

30-
if not p.exists():
30+
if not p.exists() and not strict:
3131
p.touch()
3232

3333
stat = p.stat()
@@ -64,6 +64,7 @@ def _make_local_details(self, path):
6464
# note that this is called in ._open(), at the point it's known the file
6565
# will be cached
6666
fn = super()._make_local_details(path)
67+
print(f"cache file: {fn}")
6768
Path(fn).parent.mkdir(parents=True, exist_ok=True)
6869

6970
return fn
@@ -72,7 +73,7 @@ def hash_name(self, path, same_name):
7273
# the main change in this function is that, for same_name, it returns
7374
# the full path
7475
if same_name:
75-
if self.hash_prefix:
76+
if self.hash_prefix is not None:
7677
# optionally make the name relative to a parent path
7778
# using the hash of parent path as a prefix, to flatten a bit
7879
suffix = Path(path).relative_to(Path(self.hash_prefix))
@@ -87,10 +88,37 @@ def hash_name(self, path, same_name):
8788

8889
return path
8990
else:
90-
return hash_name(path, same_name)
91+
raise NotImplementedError()
9192

92-
def touch_access_time(path):
93-
return touch_access_time(path)
93+
94+
class PinsRscCache(PinsCache):
95+
"""Modifies the PinsCache to allow hash_prefix to be an RSC server url.
96+
97+
Note that this class also modifies the first / in a path to be a -, so that
98+
pin contents will not be put into subdirectories, for e.g. michael/mtcars/data.txt.
99+
"""
100+
101+
protocol = "pinsrsccache"
102+
103+
def hash_name(self, path, same_name):
104+
# the main change in this function is that, for same_name, it returns
105+
# the full path
106+
if same_name:
107+
if self.hash_prefix is None:
108+
raise NotImplementedError()
109+
110+
# change pin path of form <user>/<content> to <user>+<content>
111+
suffix = path.replace("/", "+", 1)
112+
prefix = hash_name(self.hash_prefix, False)
113+
114+
# TODO: hacky to automatically tack on protocol here
115+
# but this is what R pins boards do. Could make a bool arg?
116+
proto_name = protocol_to_string(self.fs.protocol)
117+
full_prefix = "_".join([proto_name, prefix])
118+
return str(full_prefix / Path(suffix))
119+
120+
else:
121+
raise NotImplementedError()
94122

95123

96124
class PinsUrlCache(PinsCache):

pins/constructors.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import tempfile
44

55
from .boards import BaseBoard, BoardRsConnect, BoardManual
6-
from .cache import PinsCache, PinsUrlCache
6+
from .cache import PinsCache, PinsUrlCache, PinsRscCache
77
from .config import get_data_dir, get_cache_dir
88

99

@@ -58,6 +58,8 @@ def board(
5858
if storage_options is None:
5959
storage_options = {}
6060

61+
# TODO: at this point should just manually construct the rsc board directly
62+
# from board_rsconnect...
6163
if protocol == "rsc":
6264
# TODO: register RsConnectFs with fsspec
6365
from pins.rsconnect.fs import RsConnectFs
@@ -71,9 +73,17 @@ def board(
7173

7274
if cache is DEFAULT:
7375
cache_dir = get_cache_dir()
74-
fs = PinsCache(
75-
cache_storage=cache_dir, fs=fs, hash_prefix=path, same_names=True
76-
)
76+
77+
# manually create a subdirectory for rsc server
78+
if protocol == "rsc":
79+
hash_prefix = storage_options["server_url"]
80+
fs = PinsRscCache(
81+
cache_storage=cache_dir, fs=fs, hash_prefix=hash_prefix, same_names=True
82+
)
83+
else:
84+
fs = PinsCache(
85+
cache_storage=cache_dir, fs=fs, hash_prefix=path, same_names=True
86+
)
7787
elif cache is None:
7888
pass
7989
else:
@@ -272,7 +282,9 @@ def board_rsconnect(
272282
server_url = os.environ.get("CONNECT_SERVER")
273283

274284
kwargs = dict(server_url=server_url, api_key=api_key)
275-
return board("rsc", "", versioned, cache, allow_pickle_read, storage_options=kwargs)
285+
return board(
286+
"rsc", None, versioned, cache, allow_pickle_read, storage_options=kwargs
287+
)
276288

277289

278290
def board_s3(path, versioned=True, cache=DEFAULT, allow_pickle_read=None):

pins/tests/test_boards.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
from datetime import datetime, timedelta
1313
from time import sleep
14+
from pathlib import Path
1415

1516
# using pytest cases, so that we can pass in fixtures as parameters
1617
# TODO: this seems like maybe overkill
@@ -288,6 +289,36 @@ def test_board_pin_search_name(board, df, search, matches):
288289
assert sorted_meta_names == sorted(matches)
289290

290291

292+
# BaseBoard specific ==========================================================
293+
294+
from pins.boards import BaseBoard # noqa
295+
from pins.cache import PinsCache # noqa
296+
297+
298+
def test_board_base_pin_meta_cache_touch(tmp_dir2, df):
299+
300+
cache = fsspec.filesystem(
301+
"pinscache", target_protocol="file", same_names=True, hash_prefix=str(tmp_dir2),
302+
)
303+
board = BaseBoard(str(tmp_dir2), fs=cache)
304+
305+
board.pin_write(df, "some_df", type="csv")
306+
meta = board.pin_meta("some_df")
307+
v = meta.version.version
308+
309+
p_cache_version = board._get_cache_path(meta.name, v)
310+
p_cache_meta = Path(p_cache_version) / "data.txt"
311+
312+
orig_access = p_cache_meta.stat().st_atime
313+
314+
board.pin_meta("some_df")
315+
316+
new_access = p_cache_meta.stat().st_atime
317+
318+
assert orig_access < new_access
319+
assert False
320+
321+
291322
# RStudio Connect specific ====================================================
292323

293324
# import fixture that builds / tearsdown user "susan"

pins/tests/test_constructors.py

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -100,38 +100,62 @@ def board(backend):
100100

101101

102102
def test_constructor_board(board, df_csv, tmp_cache):
103-
prot = board.fs.protocol
103+
# TODO: would be nice to have fixtures for each board constructor
104+
# doesn't need to copy over pins-compat content
105+
106+
# create board from constructor -------------------------------------------
104107

108+
prot = board.fs.protocol
105109
fs_name = prot if isinstance(prot, str) else prot[0]
106110

107111
if fs_name == "file":
108-
con_name = "folder"
112+
board = c.board_folder(board.board)
109113
elif fs_name == "rsc":
110-
con_name = "rsconnect"
111-
pytest.xfail()
114+
board = c.board_rsconnect(
115+
server_url=board.fs.api.server_url, api_key=board.fs.api.api_key
116+
)
112117
else:
113-
con_name = fs_name
118+
board = getattr(c, f"board_{fs_name}")(board.board)
119+
120+
# read a pin and check its contents ---------------------------------------
114121

115-
board = getattr(c, f"board_{con_name}")(board.board)
116122
df = board.pin_read("df_csv")
117123

118124
# check data
119125
assert_frame_equal(df, df_csv)
120126

127+
# check the cache structure -----------------------------------------------
128+
121129
# check cache
122130
if fs_name == "file":
123131
# no caching for local file boards
124132
pass
125133
else:
134+
# check path structure ----
135+
126136
options = list(tmp_cache.glob("*"))
127137
assert len(options) == 1
128138

129139
cache_dir = options[0]
130-
res = list(cache_dir.rglob("**/*.csv"))
140+
res = list(cache_dir.rglob("*/*.csv"))
131141
assert len(res) == 1
132142

133143
check_cache_file_path(res[0], cache_dir)
134144

145+
# check cache touch on access time ----
146+
147+
meta = board.pin_meta("df_csv")
148+
p_cache_meta = (
149+
Path(board._get_cache_path(meta.name, meta.version.version)) / "data.txt"
150+
)
151+
orig_access = p_cache_meta.stat().st_atime
152+
153+
board.pin_meta("df_csv")
154+
155+
new_access = p_cache_meta.stat().st_atime
156+
157+
assert orig_access < new_access
158+
135159

136160
# Board particulars ===========================================================
137161

0 commit comments

Comments
 (0)