Skip to content

Commit e06524f

Browse files
authored
Merge branch 'main' into add/job-subtypes-remote
2 parents eb7ecdd + fe1f012 commit e06524f

File tree

4 files changed

+1285
-0
lines changed

4 files changed

+1285
-0
lines changed

tests/test_dataset.py

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
import os
2+
import importlib
3+
4+
5+
def _fresh(monkeypatch):
6+
for mod in ["lab.dataset", "lab.dirs"]:
7+
if mod in importlib.sys.modules:
8+
importlib.sys.modules.pop(mod)
9+
10+
11+
def test_dataset_get_dir(tmp_path, monkeypatch):
12+
_fresh(monkeypatch)
13+
home = tmp_path / ".tfl_home"
14+
ws = tmp_path / ".tfl_ws"
15+
home.mkdir()
16+
ws.mkdir()
17+
monkeypatch.setenv("TFL_HOME_DIR", str(home))
18+
monkeypatch.setenv("TFL_WORKSPACE_DIR", str(ws))
19+
20+
from lab.dataset import Dataset
21+
22+
ds = Dataset("test-dataset")
23+
d = ds.get_dir()
24+
assert d.endswith(os.path.join("datasets", "test-dataset"))
25+
26+
27+
def test_dataset_create_and_get(tmp_path, monkeypatch):
28+
_fresh(monkeypatch)
29+
home = tmp_path / ".tfl_home"
30+
ws = tmp_path / ".tfl_ws"
31+
home.mkdir()
32+
ws.mkdir()
33+
monkeypatch.setenv("TFL_HOME_DIR", str(home))
34+
monkeypatch.setenv("TFL_WORKSPACE_DIR", str(ws))
35+
36+
from lab.dataset import Dataset
37+
38+
# Create dataset and verify it exists
39+
ds = Dataset.create("test_dataset")
40+
assert ds is not None
41+
assert os.path.isdir(ds.get_dir())
42+
index_file = os.path.join(ds.get_dir(), "index.json")
43+
assert os.path.isfile(index_file)
44+
45+
# Get the dataset and verify its properties
46+
ds2 = Dataset.get("test_dataset")
47+
assert isinstance(ds2, Dataset)
48+
data = ds2.get_json_data()
49+
assert data["dataset_id"] == "test_dataset"
50+
assert data["location"] == "local"
51+
52+
53+
def test_dataset_default_json(tmp_path, monkeypatch):
54+
_fresh(monkeypatch)
55+
home = tmp_path / ".tfl_home"
56+
ws = tmp_path / ".tfl_ws"
57+
home.mkdir()
58+
ws.mkdir()
59+
monkeypatch.setenv("TFL_HOME_DIR", str(home))
60+
monkeypatch.setenv("TFL_WORKSPACE_DIR", str(ws))
61+
62+
from lab.dataset import Dataset
63+
64+
ds = Dataset.create("test_dataset_default")
65+
data = ds.get_json_data()
66+
assert data["dataset_id"] == "test_dataset_default"
67+
assert data["location"] == "local"
68+
assert data["description"] == ""
69+
assert data["size"] == -1
70+
assert data["json_data"] == {}
71+
72+
73+
def test_dataset_set_metadata(tmp_path, monkeypatch):
74+
_fresh(monkeypatch)
75+
home = tmp_path / ".tfl_home"
76+
ws = tmp_path / ".tfl_ws"
77+
home.mkdir()
78+
ws.mkdir()
79+
monkeypatch.setenv("TFL_HOME_DIR", str(home))
80+
monkeypatch.setenv("TFL_WORKSPACE_DIR", str(ws))
81+
82+
from lab.dataset import Dataset
83+
84+
ds = Dataset.create("test_dataset_metadata")
85+
86+
# Test setting individual metadata fields
87+
ds.set_metadata(location="remote", description="Test dataset", size=1000)
88+
data = ds.get_json_data()
89+
assert data["location"] == "remote"
90+
assert data["description"] == "Test dataset"
91+
assert data["size"] == 1000
92+
93+
# Test setting json_data
94+
ds.set_metadata(json_data={"key1": "value1", "key2": "value2"})
95+
data = ds.get_json_data()
96+
assert data["json_data"]["key1"] == "value1"
97+
assert data["json_data"]["key2"] == "value2"
98+
99+
# Test merging json_data (shallow merge)
100+
ds.set_metadata(json_data={"key2": "updated", "key3": "value3"})
101+
data = ds.get_json_data()
102+
assert data["json_data"]["key1"] == "value1" # Preserved
103+
assert data["json_data"]["key2"] == "updated" # Updated
104+
assert data["json_data"]["key3"] == "value3" # New key
105+
106+
107+
def test_dataset_get_metadata(tmp_path, monkeypatch):
108+
_fresh(monkeypatch)
109+
home = tmp_path / ".tfl_home"
110+
ws = tmp_path / ".tfl_ws"
111+
home.mkdir()
112+
ws.mkdir()
113+
monkeypatch.setenv("TFL_HOME_DIR", str(home))
114+
monkeypatch.setenv("TFL_WORKSPACE_DIR", str(ws))
115+
116+
from lab.dataset import Dataset
117+
118+
ds = Dataset.create("test_dataset_get")
119+
ds.set_metadata(description="My dataset", size=500)
120+
metadata = ds.get_metadata()
121+
assert metadata["dataset_id"] == "test_dataset_get"
122+
assert metadata["description"] == "My dataset"
123+
assert metadata["size"] == 500
124+
125+
126+
def test_dataset_list_all(tmp_path, monkeypatch):
127+
_fresh(monkeypatch)
128+
home = tmp_path / ".tfl_home"
129+
ws = tmp_path / ".tfl_ws"
130+
home.mkdir()
131+
ws.mkdir()
132+
monkeypatch.setenv("TFL_HOME_DIR", str(home))
133+
monkeypatch.setenv("TFL_WORKSPACE_DIR", str(ws))
134+
135+
from lab.dataset import Dataset
136+
137+
# Create multiple datasets
138+
ds1 = Dataset.create("dataset1")
139+
ds1.set_metadata(description="First dataset")
140+
ds2 = Dataset.create("dataset2")
141+
ds2.set_metadata(description="Second dataset")
142+
143+
# List all datasets
144+
all_datasets = Dataset.list_all()
145+
assert isinstance(all_datasets, list)
146+
assert len(all_datasets) >= 2
147+
148+
# Verify datasets are in the list
149+
dataset_ids = [d["dataset_id"] for d in all_datasets]
150+
assert "dataset1" in dataset_ids
151+
assert "dataset2" in dataset_ids
152+
153+
154+
def test_dataset_list_all_empty_dir(tmp_path, monkeypatch):
155+
_fresh(monkeypatch)
156+
home = tmp_path / ".tfl_home"
157+
ws = tmp_path / ".tfl_ws"
158+
home.mkdir()
159+
ws.mkdir()
160+
monkeypatch.setenv("TFL_HOME_DIR", str(home))
161+
monkeypatch.setenv("TFL_WORKSPACE_DIR", str(ws))
162+
163+
from lab.dataset import Dataset
164+
165+
# List all datasets when none exist
166+
all_datasets = Dataset.list_all()
167+
assert isinstance(all_datasets, list)
168+
assert len(all_datasets) == 0
169+
170+
171+
def test_dataset_secure_filename(tmp_path, monkeypatch):
172+
_fresh(monkeypatch)
173+
home = tmp_path / ".tfl_home"
174+
ws = tmp_path / ".tfl_ws"
175+
home.mkdir()
176+
ws.mkdir()
177+
monkeypatch.setenv("TFL_HOME_DIR", str(home))
178+
monkeypatch.setenv("TFL_WORKSPACE_DIR", str(ws))
179+
180+
from lab.dataset import Dataset
181+
182+
# Test that secure_filename sanitizes the dataset ID
183+
# secure_filename converts "/" to "_" and ".." to "__"
184+
ds = Dataset.create("test/../dataset")
185+
# The directory should be sanitized
186+
dir_path = ds.get_dir()
187+
# Should not contain actual path traversal (../ as a path component)
188+
# secure_filename converts "test/../dataset" to "test_.._dataset"
189+
# which is safe because ".." is part of the filename, not a path separator
190+
assert os.path.sep + ".." + os.path.sep not in dir_path
191+
assert dir_path.endswith("test_.._dataset") or "test_.._dataset" in dir_path
192+

tests/test_init_exports.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,8 @@ def test_exports():
66
assert hasattr(lab, "Job")
77
assert hasattr(lab, "Experiment")
88
assert hasattr(lab, "Model")
9+
assert hasattr(lab, "Dataset")
10+
assert hasattr(lab, "Task")
11+
assert hasattr(lab, "Lab")
12+
assert hasattr(lab, "lab")
913

0 commit comments

Comments
 (0)