@@ -128,7 +128,7 @@ def create_readme_content(pet_datasets, readme_template):
128
128
return readme_template .format (dataset_list = dataset_list )
129
129
130
130
131
- pet_datasets = {
131
+ DEFAULT_PET_DATASETS = {
132
132
'ds005619' : {
133
133
'version' : '1.1.0' ,
134
134
'description' : '[18F]SF51, a Novel 18F-labeled PET Radioligand for '
@@ -150,22 +150,26 @@ def create_readme_content(pet_datasets, readme_template):
150
150
},
151
151
}
152
152
153
- openneuro_template_string = 'https://github.com/OpenNeuroDatasets/{DATASET_ID}.git'
153
+ OPENNEURO_TEMPLATE_STRING = 'https://github.com/OpenNeuroDatasets/{DATASET_ID}.git'
154
154
155
155
156
156
def download_test_data (
157
- working_directory : TemporaryDirectory | None = None ,
158
- output_directory : Path | str = '' ,
159
- pet_datasets_json = None , # Default to None, not the dict
157
+ working_directory : Path | None = None ,
158
+ output_directory : Path | None = None ,
159
+ pet_datasets_json : dict = None , # Default to None, not the dict
160
+ derivatives : list [str ] | None = None ,
160
161
):
161
162
# Use default datasets if no JSON file provided
162
163
if pet_datasets_json is None :
163
- datasets_to_use = pet_datasets # Use the default defined at module level
164
+ datasets_to_use = DEFAULT_PET_DATASETS # Use the default defined at module level
164
165
else :
165
166
# Load from JSON file
166
167
with open (pet_datasets_json ) as infile :
167
168
datasets_to_use = json .load (infile )
168
169
170
+ if derivatives is None :
171
+ derivatives = []
172
+
169
173
if not working_directory :
170
174
working_directory = TemporaryDirectory ()
171
175
@@ -184,18 +188,25 @@ def download_test_data(
184
188
dataset_path .rmdir ()
185
189
dataset = api .install (
186
190
path = dataset_path ,
187
- source = openneuro_template_string .format (DATASET_ID = dataset_id ),
191
+ source = OPENNEURO_TEMPLATE_STRING .format (DATASET_ID = dataset_id ),
188
192
)
189
193
# api.unlock(str(dataset_path))
190
194
dataset .unlock ()
191
195
192
196
# see how pybids handles this datalad nonsense
193
197
b = bids .layout .BIDSLayout (
194
- dataset_path , derivatives = False
198
+ dataset_path ,
199
+ derivatives = False ,
200
+ validate = False ,
195
201
) # when petderivatives are a thing, we'll think about using pybids to get them
196
202
197
203
# Access participants.tsv
198
- participants_files = b .get (suffix = 'participants' , extension = '.tsv' , return_type = 'file' )
204
+ participants_files = b .get (
205
+ suffix = 'participants' ,
206
+ extension = '.tsv' ,
207
+ return_type = 'file' ,
208
+ scope = 'raw' ,
209
+ )
199
210
if participants_files :
200
211
participants_file = participants_files [0 ]
201
212
@@ -207,33 +218,47 @@ def download_test_data(
207
218
[combined_participants_tsv , participants_df ], ignore_index = True
208
219
)
209
220
# if a subset of subjects are specified collect only those subjects in the install
210
- if meta .get ('subject_ids' , []) != [] :
211
- for _id in meta ['subject_ids' ]:
212
- combined_subjects .append (_id )
221
+ if meta .get ('subject_ids' , []):
222
+ for sid in meta ['subject_ids' ]:
223
+ combined_subjects .append (sid )
213
224
# Get the entire subject directory content including git-annex files
214
- subject_dir = dataset_path / f'sub-{ _id } '
215
- if subject_dir .exists ():
216
- # First, get all content in the subject directory
217
- # (this retrieves git-annex files)
218
- dataset .get (str (subject_dir ))
219
-
220
- # Then collect all files after they've been retrieved
221
- all_files = []
222
- for file_path in subject_dir .rglob ('*' ):
223
- if file_path .is_file ():
224
- relative_path = file_path .relative_to (dataset_path )
225
+ subject_dir = dataset_path / f'sub-{ sid } '
226
+ if not subject_dir .exists ():
227
+ continue
228
+ # First, get all content in the subject directory
229
+ # (this retrieves git-annex files)
230
+ dataset .get (str (subject_dir ))
231
+
232
+ # Then collect all files after they've been retrieved
233
+ all_files = []
234
+ for file_path in subject_dir .rglob ('*' ):
235
+ if file_path .is_file ():
236
+ relative_path = file_path .relative_to (dataset_path )
237
+ all_files .append (str (relative_path ))
238
+
239
+ for deriv in derivatives :
240
+ print (f'Getting derivative: { deriv } /sub-{ sid } ' )
241
+ deriv_dir = dataset_path / 'derivatives' / deriv / f'sub-{ sid } '
242
+ try :
243
+ dataset .get (str (deriv_dir ))
244
+ except Exception as e : # noqa: BLE001
245
+ print (f'Error getting derivative { deriv } /sub-{ sid } : { e } ' )
246
+ continue
247
+ for dv in deriv_dir .rglob ('*' ):
248
+ if dv .is_file ():
249
+ relative_path = dv .relative_to (dataset_path )
225
250
all_files .append (str (relative_path ))
226
251
227
- # Copy all files to output directory
228
- for f in all_files :
229
- print (f )
230
- # Unlock the file to make it writable
231
- api .unlock (path = str (dataset_path / f ), dataset = str (dataset_path ))
232
- source_file = dataset_path / f
233
- relative_path = source_file .relative_to (dataset_path )
234
- target_file = Path (output_directory ) / relative_path
235
- target_file .parent .mkdir (parents = True , exist_ok = True )
236
- shutil .copy2 (source_file , target_file )
252
+ # Copy all files to output directory
253
+ for f in all_files :
254
+ print (f )
255
+ # Unlock the file to make it writable
256
+ api .unlock (path = str (dataset_path / f ), dataset = str (dataset_path ))
257
+ source_file = dataset_path / f
258
+ relative_path = source_file .relative_to (dataset_path )
259
+ target_file = Path (output_directory ) / relative_path
260
+ target_file .parent .mkdir (parents = True , exist_ok = True )
261
+ shutil .copy2 (source_file , target_file )
237
262
238
263
else :
239
264
combined_subjects += b .get (return_type = 'id' , target = 'subject' )
@@ -257,7 +282,7 @@ def download_test_data(
257
282
json .dump (create_dataset_description (), f , indent = 4 )
258
283
259
284
with open (readme_path , 'w' ) as f :
260
- f .write (create_readme_content (pet_datasets , readme_template ))
285
+ f .write (create_readme_content (datasets_to_use , readme_template ))
261
286
combined_participants .to_csv (
262
287
Path (output_directory ) / 'participants.tsv' , sep = '\t ' , index = False
263
288
)
@@ -273,19 +298,23 @@ def download_test_data(
273
298
parser .add_argument (
274
299
'--working-directory' ,
275
300
'-w' ,
276
- type = str ,
277
301
default = TemporaryDirectory (),
278
302
help = 'Working directory for downloading and combining datasets,'
279
303
'defaults to a temporary directory.' ,
280
304
)
281
305
parser .add_argument (
282
306
'--output-directory' ,
283
307
'-o' ,
284
- type = str ,
285
- default = os .getcwd (),
308
+ default = Path .cwd (),
286
309
help = 'Output directory of combined dataset,'
287
- 'defaults where this script is called from, presently {os.getcwd()}' ,
288
- required = True ,
310
+ 'defaults where this script is called from, presently current working directory.' ,
311
+ )
312
+ parser .add_argument (
313
+ '--derivatives' ,
314
+ '-d' ,
315
+ nargs = '+' ,
316
+ type = str ,
317
+ help = 'Additional derivatives to include alongside the BIDS data.' ,
289
318
)
290
319
parser .add_argument (
291
320
'--datasets-json' ,
@@ -320,4 +349,5 @@ def download_test_data(
320
349
working_directory = args .working_directory ,
321
350
output_directory = args .output_directory ,
322
351
pet_datasets_json = args .datasets_json , # This will be None if not provided
352
+ derivatives = args .derivatives ,
323
353
)
0 commit comments