Skip to content

Commit 1ded3d9

Browse files
authored
Merge pull request #78 from machow/feat-cache-touch
Feat cache touch
2 parents 9ccfafe + 133479f commit 1ded3d9

File tree

9 files changed

+166
-34
lines changed

9 files changed

+166
-34
lines changed

pins/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,4 @@
1111
# Imports ----
1212
from .boards import BaseBoard
1313
from .constructors import *
14+
from .cache import PinsCache, PinsUrlCache, cache_prune

pins/boards.py

Lines changed: 35 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import logging
12
import tempfile
23
import shutil
34
import inspect
@@ -148,9 +149,14 @@ def pin_meta(self, name, version: str = None) -> Meta:
148149
components = [pin_name, selected_version.version]
149150
meta_name = self.meta_factory.get_meta_name(*components)
150151

151-
path_version = self.construct_path([*components, meta_name])
152-
f = self.fs.open(path_version)
153-
return self.meta_factory.read_pin_yaml(f, pin_name, selected_version)
152+
path_meta = self.construct_path([*components, meta_name])
153+
f = self.fs.open(path_meta)
154+
155+
meta = self.meta_factory.read_pin_yaml(f, pin_name, selected_version)
156+
157+
self._touch_cache(path_meta)
158+
159+
return meta
154160

155161
def pin_list(self):
156162
"""List names of all pins in a board.
@@ -385,9 +391,9 @@ def pin_versions_prune(
385391
# TODO(question): how to pin_inform? Log or warning?
386392
if to_delete:
387393
str_vers = ", ".join([v.version for v in to_delete])
388-
print(f"Deleting versions: {str_vers}.")
394+
logging.info(f"Deleting versions: {str_vers}.")
389395
if not to_delete:
390-
print("No old versions to delete")
396+
logging.info("No old versions to delete")
391397

392398
for version in to_delete:
393399
self.pin_version_delete(name, version.version)
@@ -561,6 +567,23 @@ def _extract_search_meta(self, meta):
561567
d["meta"] = meta
562568
return d
563569

570+
def _get_cache_path(self, pin_name, version):
571+
p_version = self.construct_path([self.path_to_pin(pin_name), version])
572+
hash = self.fs.hash_name(p_version, True)
573+
return str(Path(self.fs.storage[-1]) / hash)
574+
575+
def _touch_cache(self, path):
576+
from pins.cache import touch_access_time
577+
578+
# TODO: assumes same_name set to True. Let's require this be set to
579+
# instantiate a pins cache.
580+
if not hasattr(self.fs, "cached_files"):
581+
return
582+
583+
hash = self.fs.hash_name(path, True)
584+
path_to_hashed = Path(self.fs.storage[-1]) / hash
585+
return touch_access_time(path_to_hashed)
586+
564587

565588
class BoardManual(BaseBoard):
566589
"""Simple board that accepts a dictionary of form pin_name: path.
@@ -614,7 +637,9 @@ def pin_meta(self, name, version=None):
614637

615638
path_meta = self.construct_path([pin_name, meta_name])
616639
f = self.fs.open(path_meta)
617-
return self.meta_factory.read_pin_yaml(f, pin_name, VersionRaw(""))
640+
meta = self.meta_factory.read_pin_yaml(f, pin_name, VersionRaw(""))
641+
642+
return meta
618643

619644
def pin_download(self, name, version=None, hash=None) -> Sequence[str]:
620645
meta = self.pin_meta(name, version)
@@ -692,12 +717,13 @@ def pin_search(self, search=None, as_df=True):
692717

693718
paged_res = self.fs.api.misc_get_applications("content_type:pin", search=search)
694719
results = paged_res.results
695-
names = [f"{cont['owner_username']}/{cont['name']}" for cont in results]
696720

697721
res = []
698-
for pin_name in names:
722+
for content in results:
723+
pin_name = f"{content['owner_username']}/{content['name']}"
724+
version = str(content["bundle_id"])
699725
try:
700-
meta = self.pin_meta(pin_name)
726+
meta = self.pin_meta(pin_name, version)
701727
res.append(meta)
702728

703729
except RsConnectApiRequestError as e:

pins/cache.py

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import humanize
2+
import logging
23
import os
34
import time
45
import shutil
@@ -16,7 +17,7 @@
1617
PLACEHOLDER_FILE = "file"
1718

1819

19-
def touch_access_time(path, access_time: "float | None" = None):
20+
def touch_access_time(path, access_time: "float | None" = None, strict=True):
2021
"""Update access time of file.
2122
2223
Returns the new access time.
@@ -27,7 +28,7 @@ def touch_access_time(path, access_time: "float | None" = None):
2728

2829
p = Path(path)
2930

30-
if not p.exists():
31+
if not p.exists() and not strict:
3132
p.touch()
3233

3334
stat = p.stat()
@@ -64,6 +65,7 @@ def _make_local_details(self, path):
6465
# note that this is called in ._open(), at the point it's known the file
6566
# will be cached
6667
fn = super()._make_local_details(path)
68+
logging.info(f"cache file: {fn}")
6769
Path(fn).parent.mkdir(parents=True, exist_ok=True)
6870

6971
return fn
@@ -72,7 +74,7 @@ def hash_name(self, path, same_name):
7274
# the main change in this function is that, for same_name, it returns
7375
# the full path
7476
if same_name:
75-
if self.hash_prefix:
77+
if self.hash_prefix is not None:
7678
# optionally make the name relative to a parent path
7779
# using the hash of parent path as a prefix, to flatten a bit
7880
suffix = Path(path).relative_to(Path(self.hash_prefix))
@@ -87,10 +89,37 @@ def hash_name(self, path, same_name):
8789

8890
return path
8991
else:
90-
return hash_name(path, same_name)
92+
raise NotImplementedError()
9193

92-
def touch_access_time(path):
93-
return touch_access_time(path)
94+
95+
class PinsRscCache(PinsCache):
96+
"""Modifies the PinsCache to allow hash_prefix to be an RSC server url.
97+
98+
Note that this class also modifies the first / in a path to be a -, so that
99+
pin contents will not be put into subdirectories, for e.g. michael/mtcars/data.txt.
100+
"""
101+
102+
protocol = "pinsrsccache"
103+
104+
def hash_name(self, path, same_name):
105+
# the main change in this function is that, for same_name, it returns
106+
# the full path
107+
if same_name:
108+
if self.hash_prefix is None:
109+
raise NotImplementedError()
110+
111+
# change pin path of form <user>/<content> to <user>+<content>
112+
suffix = path.replace("/", "+", 1)
113+
prefix = hash_name(self.hash_prefix, False)
114+
115+
# TODO: hacky to automatically tack on protocol here
116+
# but this is what R pins boards do. Could make a bool arg?
117+
proto_name = protocol_to_string(self.fs.protocol)
118+
full_prefix = "_".join([proto_name, prefix])
119+
return str(full_prefix / Path(suffix))
120+
121+
else:
122+
raise NotImplementedError()
94123

95124

96125
class PinsUrlCache(PinsCache):
@@ -171,7 +200,7 @@ def prune(self, days=30):
171200
for path in to_prune:
172201
delete_version(to_prune)
173202

174-
print("Skipping cache deletion")
203+
logging.info("Skipping cache deletion")
175204

176205

177206
def delete_version(path: "str | Path"):
@@ -184,7 +213,7 @@ def disk_usage(path):
184213

185214

186215
def prompt_cache_prune(to_prune, size) -> bool:
187-
print(to_prune)
216+
logging.info(f"Pruning items: {to_prune}")
188217
human_size = humanize.naturalsize(size, binary=True)
189218
resp = input(f"Delete {len(to_prune)} pin versions, freeing {human_size}?")
190219
return resp == "yes"

pins/config.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,12 @@ def get_allow_pickle_read(flag):
2929
flag = bool(env_int)
3030

3131
return flag
32+
33+
34+
def _enable_logs():
35+
import logging
36+
37+
format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
38+
handlers = [logging.FileHandler("filename.log"), logging.StreamHandler()]
39+
40+
logging.basicConfig(level=logging.INFO, format=format, handlers=handlers)

pins/constructors.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import tempfile
44

55
from .boards import BaseBoard, BoardRsConnect, BoardManual
6-
from .cache import PinsCache, PinsUrlCache
6+
from .cache import PinsCache, PinsUrlCache, PinsRscCache
77
from .config import get_data_dir, get_cache_dir
88

99

@@ -58,6 +58,8 @@ def board(
5858
if storage_options is None:
5959
storage_options = {}
6060

61+
# TODO: at this point should just manually construct the rsc board directly
62+
# from board_rsconnect...
6163
if protocol == "rsc":
6264
# TODO: register RsConnectFs with fsspec
6365
from pins.rsconnect.fs import RsConnectFs
@@ -71,9 +73,17 @@ def board(
7173

7274
if cache is DEFAULT:
7375
cache_dir = get_cache_dir()
74-
fs = PinsCache(
75-
cache_storage=cache_dir, fs=fs, hash_prefix=path, same_names=True
76-
)
76+
77+
# manually create a subdirectory for rsc server
78+
if protocol == "rsc":
79+
hash_prefix = storage_options["server_url"]
80+
fs = PinsRscCache(
81+
cache_storage=cache_dir, fs=fs, hash_prefix=hash_prefix, same_names=True
82+
)
83+
else:
84+
fs = PinsCache(
85+
cache_storage=cache_dir, fs=fs, hash_prefix=path, same_names=True
86+
)
7787
elif cache is None:
7888
pass
7989
else:
@@ -272,7 +282,9 @@ def board_rsconnect(
272282
server_url = os.environ.get("CONNECT_SERVER")
273283

274284
kwargs = dict(server_url=server_url, api_key=api_key)
275-
return board("rsc", "", versioned, cache, allow_pickle_read, storage_options=kwargs)
285+
return board(
286+
"rsc", None, versioned, cache, allow_pickle_read, storage_options=kwargs
287+
)
276288

277289

278290
def board_s3(path, versioned=True, cache=DEFAULT, allow_pickle_read=None):

pins/meta.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,8 @@ def to_pin_yaml(self, f: Optional[IOBase] = None) -> "str | None":
112112
class MetaV0:
113113
file: Union[str, Sequence[str]]
114114
type: str
115+
description: "str | None"
115116

116-
description: str
117117
name: str
118118

119119
version: VersionRaw
@@ -134,9 +134,10 @@ def to_dict(self):
134134
@classmethod
135135
def from_pin_dict(cls, data, pin_name, version) -> "MetaV0":
136136
# could infer from dataclasses.fields(), but seems excessive.
137-
req_fields = {"type", "description", "name"}
137+
req_fields = {"type", "description"}
138138

139-
req_inputs = {k: v for k, v in data.items() if k in req_fields}
139+
# Note that we need to .get(), since fields may not be in metadata
140+
req_inputs = {k: data.get(k) for k in req_fields}
140141
req_inputs["file"] = data["path"]
141142

142143
return cls(**req_inputs, name=pin_name, original_fields=data, version=version)

pins/rsconnect/api.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import logging
12
import os
23
import requests
34
import tempfile
@@ -221,6 +222,7 @@ def _raw_query(self, url, method="GET", return_request=False, **kwargs):
221222

222223
headers = self._get_headers()
223224

225+
logging.info(f"RSConnect API {method}: {url} -- {kwargs}")
224226
r = self.session.request(method, url, headers=headers, **kwargs)
225227

226228
if return_request:
@@ -246,13 +248,11 @@ def walk_paginated_offsets(self, f_query, endpoint, method, params=None, **kwarg
246248
all_results.extend(data["results"])
247249

248250
while data["results"]:
249-
print("FETCHING")
250251
page_kwargs = {"page_number": data["current_page"] + 1}
251252
new_params = {**params, **page_kwargs}
252253
data = f_query(endpoint, method, params=new_params)
253254

254255
all_results.extend(data["results"])
255-
print(data["results"])
256256

257257
return all_results
258258

pins/tests/test_boards.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
from datetime import datetime, timedelta
1313
from time import sleep
14+
from pathlib import Path
1415

1516
# using pytest cases, so that we can pass in fixtures as parameters
1617
# TODO: this seems like maybe overkill
@@ -288,6 +289,35 @@ def test_board_pin_search_name(board, df, search, matches):
288289
assert sorted_meta_names == sorted(matches)
289290

290291

292+
# BaseBoard specific ==========================================================
293+
294+
from pins.boards import BaseBoard # noqa
295+
from pins.cache import PinsCache # noqa
296+
297+
298+
def test_board_base_pin_meta_cache_touch(tmp_dir2, df):
299+
300+
cache = fsspec.filesystem(
301+
"pinscache", target_protocol="file", same_names=True, hash_prefix=str(tmp_dir2),
302+
)
303+
board = BaseBoard(str(tmp_dir2), fs=cache)
304+
305+
board.pin_write(df, "some_df", type="csv")
306+
meta = board.pin_meta("some_df")
307+
v = meta.version.version
308+
309+
p_cache_version = board._get_cache_path(meta.name, v)
310+
p_cache_meta = Path(p_cache_version) / "data.txt"
311+
312+
orig_access = p_cache_meta.stat().st_atime
313+
314+
board.pin_meta("some_df")
315+
316+
new_access = p_cache_meta.stat().st_atime
317+
318+
assert orig_access < new_access
319+
320+
291321
# RStudio Connect specific ====================================================
292322

293323
# import fixture that builds / tearsdown user "susan"

0 commit comments

Comments
 (0)