Skip to content

Commit fea4eae

Browse files
committed
use a cache for downloads
1 parent 6a29927 commit fea4eae

File tree

3 files changed

+126
-56
lines changed

3 files changed

+126
-56
lines changed

.circleci/config.yml

Lines changed: 37 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,20 +6,41 @@ version: 2.1
66
docker:
77
- image: pennlinc/babs_testing:0.2
88

9-
# TODO:
10-
# runinstall: &runinstall
11-
# name: Install BABS
12-
# TODO: build; build_docs; see QSIPrep
13-
# TODO: add tests from flake8
14-
159
jobs:
10+
download_test_data:
11+
<<: *dockersetup
12+
resource_class: medium
13+
working_directory: /home/circleci/src/babs
14+
steps:
15+
- checkout:
16+
path: /home/circleci/src/babs
17+
- restore_cache:
18+
keys:
19+
- test-data-v1
20+
- run:
21+
name: Download test datasets
22+
command: |
23+
mkdir -p /home/circleci/test_data
24+
cd /home/circleci/test_data
25+
# Install datalad if not already in the image
26+
pip install datalad-osf
27+
# Download each dataset from origin_input_dataset.yaml
28+
python3 /home/circleci/src/babs/tests/download_datasets.py
29+
- save_cache:
30+
key: test-data-v1
31+
paths:
32+
- /home/circleci/test_data
33+
1634
pytest: # run pytest
1735
<<: *dockersetup
1836
resource_class: xlarge
1937
working_directory: /home/circleci/src/babs
2038
steps:
2139
- checkout:
2240
path: /home/circleci/src/babs
41+
- restore_cache:
42+
keys:
43+
- test-data-v1
2344
- run:
2445
name: pytest of BABS
2546
no_output_timeout: 1h
@@ -53,26 +74,28 @@ workflows:
5374
# version: 2
5475
build_test_deploy:
5576
jobs:
56-
# - build
57-
# - build_docs
77+
- download_test_data:
78+
filters:
79+
tags:
80+
only: /.*/
5881
- pytest:
59-
# requires:
60-
# - build
82+
requires:
83+
- download_test_data
6184
filters:
6285
tags:
63-
only: /.*/ # i.e., all branches and tags
86+
only: /.*/
6487
- deployable:
65-
requires: # should require all jobs' success before deploying
88+
requires:
6689
- pytest
67-
filters: # run when main branch + any tags
90+
filters:
6891
branches:
6992
only: main
7093
tags:
7194
only: /.*/
7295
- deploy_pypi:
7396
requires:
7497
- deployable
75-
filters: # runs for no branches but runs for any tags
98+
filters:
7699
branches:
77100
ignore: /.*/
78101
tags:

tests/download_datasets.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#!/usr/bin/env python3
2+
"""Script to download test datasets for BABS testing"""
3+
4+
import os
5+
import os.path as op
6+
7+
import datalad.api as dlapi
8+
import yaml
9+
10+
11+
def download_datasets():
12+
"""Download all test datasets specified in origin_input_dataset.yaml"""
13+
# Read the yaml file
14+
yaml_path = op.join(op.dirname(op.abspath(__file__)), 'origin_input_dataset.yaml')
15+
with open(yaml_path) as f:
16+
datasets = yaml.safe_load(f)
17+
18+
base_dir = '/home/circleci/test_data'
19+
os.makedirs(base_dir, exist_ok=True)
20+
21+
# Download each dataset
22+
for dataset_type, sessions in datasets.items():
23+
for session_type, url in sessions.items():
24+
target_dir = op.join(base_dir, f'{dataset_type}_{session_type}')
25+
print(f'Downloading {dataset_type} {session_type} from {url} to {target_dir}')
26+
dlapi.clone(source=url, path=target_dir)
27+
28+
29+
if __name__ == '__main__':
30+
download_datasets()

tests/get_data.py

Lines changed: 59 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""This is to get data for pytests"""
2+
23
import os
34
import os.path as op
45

@@ -11,32 +12,36 @@
1112
import pytest
1213

1314
sys.path.append('..')
14-
from babs.utils import (read_yaml) # noqa
15+
from babs.utils import read_yaml # noqa
1516

1617
# =============== Define several constant variables: ==================
17-
__location__ = op.dirname(op.abspath(__file__)) # the path to the directory of current python script
18+
__location__ = op.dirname(
19+
op.abspath(__file__)
20+
) # the path to the directory of current python script
1821
# ^^ `op.abspath()` is to make sure always returns abs path, regardless of python version
1922
# ref: https://note.nkmk.me/en/python-script-file-path/
2023

2124
# containers:
2225
LIST_WHICH_BIDSAPP = ['toybidsapp', 'fmriprep', 'qsiprep']
23-
TOYBIDSAPP_VERSION = '0.0.7' # +++++++++++++++++++++++
26+
TOYBIDSAPP_VERSION = '0.0.7' # +++++++++++++++++++++++
2427
TOYBIDSAPP_VERSION_DASH = TOYBIDSAPP_VERSION.replace('.', '-')
25-
FN_TOYBIDSAPP_SIF_CIRCLECI = op.join('/singularity_images',
26-
'toybidsapp_' + TOYBIDSAPP_VERSION + '.sif')
28+
FN_TOYBIDSAPP_SIF_CIRCLECI = op.join(
29+
'/singularity_images', 'toybidsapp_' + TOYBIDSAPP_VERSION + '.sif'
30+
)
2731

2832
# path of input datasets:
2933
ORIGIN_INPUT_DATA = read_yaml(op.join(__location__, 'origin_input_dataset.yaml'))
3034
INFO_2ND_INPUT_DATA = {
3135
'which_input': 'fmriprep',
3236
# "type_session": this should be consistent with the first dataset
33-
'if_input_local': False
37+
'if_input_local': False,
3438
}
3539

3640
# env variables
3741
TEMPLATEFLOW_HOME = '/test/templateflow_home'
3842
# ====================================================================
3943

44+
4045
def get_input_data(which_input, type_session, if_input_local, tmp_path_factory):
4146
"""
4247
This is to get the path of input data.
@@ -51,14 +56,25 @@ def get_input_data(which_input, type_session, if_input_local, tmp_path_factory):
5156
if_input_local: bool
5257
if the input dataset is local [True] or remote (e.g., on OSF) [False]
5358
tmp_path_factory: fixture
54-
see: https://docs.pytest.org/en/7.1.x/how-to/tmp_path.html#the-tmp-path-factory-fixture
55-
API see: https://docs.pytest.org/en/7.1.x/reference/reference.html#tmp-path-factory
59+
see: https://docs.pytest.org/en/7.1.x/how-to/tmp_path.html#the-tmp-path-factory
5660
5761
Returns:
5862
-----------
5963
path_in: str
6064
where is the input dataset
6165
"""
66+
# Check if we're on CircleCI and should use cached data
67+
env_circleci = os.getenv('CIRCLECI')
68+
if env_circleci and not if_input_local:
69+
cached_path = op.join('/home/circleci/test_data', f'{which_input}_{type_session}')
70+
if op.exists(cached_path):
71+
return cached_path
72+
else:
73+
raise Exception(
74+
f'Expected cached dataset not found at {cached_path}. '
75+
'Did the download_test_data job complete successfully?'
76+
)
77+
6278
if not if_input_local:
6379
# directly grab from pre-defined YAML file:
6480
path_in = ORIGIN_INPUT_DATA[which_input][type_session]
@@ -68,29 +84,25 @@ def get_input_data(which_input, type_session, if_input_local, tmp_path_factory):
6884
path_in_pathlib = tmp_path_factory.mktemp(which_input)
6985
# turn into a string of absolute path (but seems not necessary):
7086
path_in = path_in_pathlib.absolute().as_posix()
71-
# ^^ e.g.: `/private/var/folders/fn/????/T/pytest-of-<username>/pytest-00/<which_input>0`
72-
# `????` is random number but consistent across pytests;
73-
# `pytest-00` is index of pytest, 01, 02, etc. Only most recent 3 temp dir will be kept.
74-
# `<which_input>0`: e.g., `bids0`, `bids1`, etc.
75-
# --> Even if same `which_input`, the temp dir won't be duplicated. Tested.
7687

7788
# clone to this local temporary place:
78-
dlapi.clone(source=origin_in,
79-
path=path_in)
89+
dlapi.clone(source=origin_in, path=path_in)
8090

8191
return path_in
8292

93+
8394
@pytest.fixture(scope='session')
8495
def if_circleci():
85-
""" If it's currently running on CircleCI """
86-
env_circleci = os.getenv('CIRCLECI') # a string 'true' or None
96+
"""If it's currently running on CircleCI"""
97+
env_circleci = os.getenv('CIRCLECI') # a string 'true' or None
8798
if env_circleci:
8899
if_circleci = True
89100
else:
90101
if_circleci = False
91102

92103
return if_circleci
93104

105+
94106
@pytest.fixture(scope='session')
95107
def where_now(if_circleci):
96108
"""
@@ -113,10 +125,13 @@ def where_now(if_circleci):
113125
where_now = 'on_local'
114126
else:
115127
where_now = ''
116-
raise Exception('Not on CircleCI, and neither singularity nor docker is installed!'
117-
+ ' Pytest cannot proceed.')
128+
raise Exception(
129+
'Not on CircleCI, and neither singularity nor docker is installed!'
130+
' Pytest cannot proceed.'
131+
)
118132
return where_now
119133

134+
120135
@pytest.fixture(scope='session')
121136
def container_ds_path(where_now, tmp_path_factory):
122137
"""
@@ -152,19 +167,20 @@ def container_ds_path(where_now, tmp_path_factory):
152167
elif where_now == 'on_cluster':
153168
# build the sif file:
154169
folder_toybidsapp_sif = tmp_path_factory.mktemp('temp_singularity_images')
155-
fn_toybidsapp_sif = op.join(folder_toybidsapp_sif,
156-
'toybidsapp_' + TOYBIDSAPP_VERSION + '.sif')
170+
fn_toybidsapp_sif = op.join(
171+
folder_toybidsapp_sif, 'toybidsapp_' + TOYBIDSAPP_VERSION + '.sif'
172+
)
157173
proc_singularity_build = subprocess.run(
158174
# singularity build toybidsapp_${toybidsapp_version}.sif \
159175
# docker://pennlinc/toy_bids_app:${toybidsapp_version}
160176
['singularity', 'build', fn_toybidsapp_sif, docker_url],
161-
stdout=subprocess.PIPE)
177+
stdout=subprocess.PIPE,
178+
)
162179
proc_singularity_build.check_returncode()
163180
elif where_now == 'on_local':
164181
# directly pull from docker:
165182
cmd = 'docker pull ' + docker_addr
166-
proc_docker_pull = subprocess.run(
167-
cmd.split())
183+
proc_docker_pull = subprocess.run(cmd.split())
168184
proc_docker_pull.check_returncode()
169185
fn_toybidsapp_sif = None
170186
else:
@@ -179,27 +195,27 @@ def container_ds_path(where_now, tmp_path_factory):
179195
container_ds_handle = dlapi.create(path=origin_container_ds)
180196
# add container image into this datalad dataset:
181197
for which_bidsapp in LIST_WHICH_BIDSAPP:
182-
if where_now in ['on_circleci', 'on_cluster']: # add the built sif file:
198+
if where_now in ['on_circleci', 'on_cluster']: # add the built sif file:
183199
# datalad containers-add --url ${fn_sif} toybidsapp-${version_tag_dash}
184200
# API help: in python env: `help(dlapi.containers_add)`
185201
container_ds_handle.containers_add(
186-
name=which_bidsapp+'-'+TOYBIDSAPP_VERSION_DASH, # e.g., "toybidsapp-0-0-7"
187-
url=fn_toybidsapp_sif)
202+
name=which_bidsapp + '-' + TOYBIDSAPP_VERSION_DASH, # e.g., "toybidsapp-0-0-7"
203+
url=fn_toybidsapp_sif,
204+
)
188205
# # can remove the original sif file now:
189206
# os.remove(fn_toybidsapp_sif)
190-
elif where_now == 'on_local': # add docker image:
207+
elif where_now == 'on_local': # add docker image:
191208
# datalad containers-add --url dhub://pennlinc/toy_bids_app:${version_tag} \
192209
# toybidsapp-${version_tag_dash}
193210
container_ds_handle.containers_add(
194-
name=which_bidsapp+'-'+TOYBIDSAPP_VERSION_DASH, # e.g., "toybidsapp-0-0-7"
195-
url='dhub://'+docker_addr # e.g., "dhub://pennlinc/toy_bids_app:0.0.7"
211+
name=which_bidsapp + '-' + TOYBIDSAPP_VERSION_DASH, # e.g., "toybidsapp-0-0-7"
212+
url='dhub://' + docker_addr, # e.g., "dhub://pennlinc/toy_bids_app:0.0.7"
196213
)
197214

198215
return origin_container_ds
199216

200-
def get_container_config_yaml_filename(which_bidsapp,
201-
which_input, if_two_input,
202-
type_system):
217+
218+
def get_container_config_yaml_filename(which_bidsapp, which_input, if_two_input, type_system):
203219
"""
204220
This is to get the container's config YAML file name,
205221
depending on the BIDS App and if there are two inputs (for fMRIPrep)
@@ -223,16 +239,17 @@ def get_container_config_yaml_filename(which_bidsapp,
223239
"""
224240
# dict_cluster_name = {'sge': 'cubic',
225241
# 'slurm': 'msi'}
226-
dict_bidsapp_version = {'qsiprep': '0-18-1',
227-
'fmriprep': '23-1-3',
228-
'toybidsapp': '0-0-7'}
229-
dict_task_name = {'qsiprep': 'regular',
230-
'fmriprep': 'regular',
231-
'toybidsapp': 'rawBIDS-walkthrough'}
242+
dict_bidsapp_version = {'qsiprep': '0-18-1', 'fmriprep': '23-1-3', 'toybidsapp': '0-0-7'}
243+
dict_task_name = {
244+
'qsiprep': 'regular',
245+
'fmriprep': 'regular',
246+
'toybidsapp': 'rawBIDS-walkthrough',
247+
}
232248

233249
# bidsapp and its version:
234-
container_config_yaml_filename = 'eg_' + which_bidsapp + '-' \
235-
+ dict_bidsapp_version[which_bidsapp]
250+
container_config_yaml_filename = (
251+
'eg_' + which_bidsapp + '-' + dict_bidsapp_version[which_bidsapp]
252+
)
236253

237254
# task:
238255
container_config_yaml_filename += '_'
@@ -265,7 +282,7 @@ def if_command_installed(cmd):
265282
True or False
266283
"""
267284
a = shutil.which(cmd)
268-
if a is None: # not exist:
285+
if a is None: # not exist:
269286
if_installed = False
270287
else:
271288
if_installed = True

0 commit comments

Comments
 (0)