Skip to content

Commit 2d35fae

Browse files
committed
fix: correct cache paths
1 parent 5897d88 commit 2d35fae

File tree

4 files changed

+102
-22
lines changed

4 files changed

+102
-22
lines changed

pins/boards.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,13 @@ def info(self, path):
4545

4646
class BaseBoard:
4747
def __init__(
48-
self, board: str, fs: IFileSystem, versioned=True, meta_factory=MetaFactory(),
48+
self,
49+
board: "str | Path",
50+
fs: IFileSystem,
51+
versioned=True,
52+
meta_factory=MetaFactory(),
4953
):
50-
self.board = board
54+
self.board = str(board)
5155
self.fs = fs
5256
self.meta_factory = meta_factory
5357

pins/cache.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,13 @@ def touch_access_time(path, access_time: "float | None" = None):
3636
return access_time
3737

3838

39+
def protocol_to_string(protocol):
40+
if isinstance(protocol, str):
41+
return protocol
42+
43+
return protocol[0]
44+
45+
3946
class PinsCache(SimpleCacheFileSystem):
4047
protocol = "pinscache"
4148

@@ -74,7 +81,8 @@ def hash_name(self, path, same_name):
7481

7582
# TODO: hacky to automatically tack on protocol here
7683
# but this is what R pins boards do. Could make a bool arg?
77-
full_prefix = "_".join([self.fs.protocol, prefix])
84+
proto_name = protocol_to_string(self.fs.protocol)
85+
full_prefix = "_".join([proto_name, prefix])
7886
return str(full_prefix / suffix)
7987

8088
return path
@@ -109,7 +117,9 @@ def hash_name(self, path, same_name):
109117

110118
# note that we include an extra version folder, so it conforms with
111119
# pin board path form: <board_path>/<pin_name>/<version_name>/<file>
112-
return str(Path(prefix) / PLACEHOLDER_VERSION / final_part)
120+
proto_name = protocol_to_string(self.fs.protocol)
121+
full_prefix = "_".join([proto_name, prefix])
122+
return str(Path(full_prefix) / PLACEHOLDER_VERSION / final_part)
113123

114124

115125
class CachePruner:

pins/tests/conftest.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,10 @@
77
from pathlib import Path
88
from pins.tests.helpers import BoardBuilder, RscBoardBuilder, Snapshot, rm_env
99

10+
EXAMPLE_REL_PATH = "pins/tests/pins-compat"
1011
PATH_TO_EXAMPLE_BOARD = files("pins") / "tests/pins-compat"
12+
PATH_TO_EXAMPLE_VERSION = PATH_TO_EXAMPLE_BOARD / "df_csv/20220214T163720Z-9bfad/"
13+
EXAMPLE_PIN_NAME = "df_csv"
1114

1215

1316
# Based on https://github.com/machow/siuba/blob/main/siuba/tests/helpers.py
@@ -66,19 +69,19 @@ def tmp_dir2():
6669

6770

6871
@pytest.fixture
69-
def tmp_cache(tmp_dir2):
72+
def tmp_cache():
7073
with rm_env("PINS_CACHE_DIR"):
71-
os.environ["PINS_CACHE_DIR"] = str(tmp_dir2)
72-
73-
yield tmp_dir2
74+
with tempfile.TemporaryDirectory() as tmp_dir:
75+
os.environ["PINS_CACHE_DIR"] = str(tmp_dir)
76+
yield Path(tmp_dir)
7477

7578

7679
@pytest.fixture
77-
def tmp_data_dir(tmp_dir2):
80+
def tmp_data_dir():
7881
with rm_env("PINS_DATA_DIR"):
79-
os.environ["PINS_DATA_DIR"] = str(tmp_dir2)
80-
81-
yield tmp_dir2
82+
with tempfile.TemporaryDirectory() as tmp_dir:
83+
os.environ["PINS_DATA_DIR"] = str(tmp_dir)
84+
yield Path(tmp_dir)
8285

8386

8487
def pytest_addoption(parser):

pins/tests/test_constructors.py

Lines changed: 73 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,90 @@
11
import os
2+
import pandas as pd
23
import pytest
34

5+
from pandas.testing import assert_frame_equal
46
from pathlib import Path
57

68
from pins import constructors as c
7-
from pins.tests.conftest import PATH_TO_EXAMPLE_BOARD
9+
from pins.tests.conftest import (
10+
PATH_TO_EXAMPLE_BOARD,
11+
PATH_TO_EXAMPLE_VERSION,
12+
EXAMPLE_REL_PATH,
13+
)
814
from pins.tests.helpers import rm_env
915

1016

11-
# adapted from https://stackoverflow.com/a/34333710
17+
@pytest.fixture
18+
def df_csv():
19+
return pd.read_csv(PATH_TO_EXAMPLE_VERSION / "df_csv.csv", index_col=0)
1220

1321

1422
def check_dir_writable(p_dir):
1523
assert p_dir.parent.exists()
1624
assert os.access(p_dir.parent.absolute(), os.W_OK)
1725

1826

27+
def check_cache_file_path(p_file, p_cache):
28+
assert str(p_file.relative_to(p_cache)).count("/") == 2
29+
30+
1931
# End-to-end constructor tests
2032

2133
# there are two facets of boards: reading and writing.
2234
# copied from test_compat
23-
def test_constructor_board_url(tmp_cache, http_example_board_path):
35+
def test_constructor_board_url_data(tmp_cache, http_example_board_path, df_csv):
36+
board = c.board_urls(
37+
http_example_board_path,
38+
# could derive from example version path
39+
pin_paths={"df_csv": "df_csv/20220214T163720Z-9bfad/"},
40+
)
41+
42+
df = board.pin_read("df_csv")
43+
44+
# check data ----
45+
assert_frame_equal(df, df_csv)
46+
47+
48+
@pytest.mark.xfail
49+
def test_constructor_board_url_cache(tmp_cache, http_example_board_path, df_csv):
50+
# TODO: downloading a pin does not put files in the same directory, since
51+
# in this case we are hashing on the full url.
52+
2453
board = c.board_urls(
25-
http_example_board_path, pin_paths={"df_csv": "df_csv/20220214T163718Z-eceac/"}
54+
http_example_board_path,
55+
# could derive from example version path
56+
pin_paths={"df_csv": "df_csv/20220214T163718Z-eceac/"},
2657
)
2758

2859
board.pin_read("df_csv")
2960

30-
# check cache
31-
# check data
61+
# check cache ----
62+
http_dirs = list(tmp_cache.glob("http_*"))
63+
64+
assert len(http_dirs) == 1
3265

66+
parent = http_dirs[0]
67+
res = list(parent.rglob("**/*.csv"))
68+
assert len(res) == 1
3369

34-
def test_constructor_board_github(tmp_cache, http_example_board_path):
35-
board = c.board_github("machow", "pins-python", PATH_TO_EXAMPLE_BOARD) # noqa
70+
# has form: <pin>/<version>/<file>
71+
check_cache_file_path(res[0], parent)
72+
73+
74+
def test_constructor_board_github(tmp_cache, http_example_board_path, df_csv):
75+
board = c.board_github("machow", "pins-python", EXAMPLE_REL_PATH) # noqa
76+
77+
df = board.pin_read("df_csv")
78+
assert_frame_equal(df, df_csv)
79+
80+
cache_options = list(tmp_cache.glob("github_*"))
81+
assert len(cache_options) == 1
82+
cache_dir = cache_options[0]
83+
84+
res = list(cache_dir.rglob("**/*.csv"))
85+
assert len(res) == 1
86+
87+
check_cache_file_path(res[0], cache_dir)
3688

3789

3890
@pytest.fixture(scope="session")
@@ -44,7 +96,7 @@ def board(backend):
4496
backend.teardown_board(board)
4597

4698

47-
def test_constructor_board(board):
99+
def test_constructor_board(board, df_csv, tmp_cache):
48100
prot = board.fs.protocol
49101

50102
fs_name = prot if isinstance(prot, str) else prot[0]
@@ -58,9 +110,20 @@ def test_constructor_board(board):
58110
con_name = fs_name
59111

60112
board = getattr(c, f"board_{con_name}")(board.board)
113+
df = board.pin_read("df_csv")
61114

62-
# check cache
63115
# check data
116+
assert_frame_equal(df, df_csv)
117+
118+
# check cache
119+
options = list(tmp_cache.glob("s3_*"))
120+
assert len(options) == 1
121+
122+
cache_dir = options[0]
123+
res = list(cache_dir.rglob("**/*.csv"))
124+
assert len(res) == 1
125+
126+
check_cache_file_path(res[0], cache_dir)
64127

65128

66129
# Board particulars ===========================================================

0 commit comments

Comments
 (0)