80
80
def create_dataset_description ():
81
81
"""Create BIDS dataset_description.json content."""
82
82
return {
83
- " Name" : " PETPrep Test Data Collection" ,
84
- " BIDSVersion" : " 1.7.0" ,
85
- " DatasetType" : " raw" ,
86
- " License" : " CC0" ,
87
- " Authors" : [" datalad" , " python" , " make" , " openneuro" ],
88
- " HowToAcknowledge" : " Please cite the original datasets and PETPrep software." ,
89
- " Funding" : [
90
- " This test data collection was created for PETPrep development and testing purposes"
83
+ ' Name' : ' PETPrep Test Data Collection' ,
84
+ ' BIDSVersion' : ' 1.7.0' ,
85
+ ' DatasetType' : ' raw' ,
86
+ ' License' : ' CC0' ,
87
+ ' Authors' : [' datalad' , ' python' , ' make' , ' openneuro' ],
88
+ ' HowToAcknowledge' : ' Please cite the original datasets and PETPrep software.' ,
89
+ ' Funding' : [
90
+ ' This test data collection was created for PETPrep development and testing purposes'
91
91
],
92
- " EthicsApprovals" : [
93
- " This is a test dataset compiled from publicly available BIDS datasets for software testing purposes"
92
+ ' EthicsApprovals' : [
93
+ ' This is a test dataset compiled from publicly available BIDS datasets for software testing purposes'
94
94
],
95
- " ReferencesAndLinks" : [
96
- " https://github.com/nipreps/petprep" ,
97
- " https://openneuro.org" ,
95
+ ' ReferencesAndLinks' : [
96
+ ' https://github.com/nipreps/petprep' ,
97
+ ' https://openneuro.org' ,
98
98
],
99
- " DatasetDOI" : " 10.18112/openneuro.ds000000.v1.0.0" ,
100
- " HEDVersion" : " 8.0.0" ,
99
+ ' DatasetDOI' : ' 10.18112/openneuro.ds000000.v1.0.0' ,
100
+ ' HEDVersion' : ' 8.0.0' ,
101
101
}
102
102
103
103
@@ -106,36 +106,36 @@ def create_readme_content(pet_datasets, readme_template):
106
106
"""Create README content dynamically based on the datasets."""
107
107
108
108
# Generate dataset list dynamically
109
- dataset_list = ""
109
+ dataset_list = ''
110
110
for i , (dataset_id , meta ) in enumerate (pet_datasets .items (), 1 ):
111
- dataset_list += f" { i } . **{ dataset_id } **: { meta [' description' ]} \n "
111
+ dataset_list += f' { i } . **{ dataset_id } **: { meta [" description" ]} \n '
112
112
113
113
return readme_template .format (dataset_list = dataset_list )
114
114
115
115
116
116
pet_datasets = {
117
- " ds005619" : {
118
- " version" : " 1.1.0" ,
119
- " description" : " [18F]SF51, a Novel 18F-labeled PET Radioligand for "
120
- " Translocator Protein 18kDa (TSPO) in Brain, Works Well "
121
- " in Monkeys but Fails in Humans" ,
122
- " subject_ids" : [" sf02" ],
117
+ ' ds005619' : {
118
+ ' version' : ' 1.1.0' ,
119
+ ' description' : ' [18F]SF51, a Novel 18F-labeled PET Radioligand for '
120
+ ' Translocator Protein 18kDa (TSPO) in Brain, Works Well '
121
+ ' in Monkeys but Fails in Humans' ,
122
+ ' subject_ids' : [' sf02' ],
123
123
},
124
- " ds004868" : {
125
- " version" : " 1.0.4" ,
126
- " description" : " [11C]PS13 demonstrates pharmacologically selective and "
127
- " substantial binding to cyclooxygenase-1 (COX-1) in the "
128
- " human brain" ,
129
- " subject_ids" : [" PSBB01" ],
124
+ ' ds004868' : {
125
+ ' version' : ' 1.0.4' ,
126
+ ' description' : ' [11C]PS13 demonstrates pharmacologically selective and '
127
+ ' substantial binding to cyclooxygenase-1 (COX-1) in the '
128
+ ' human brain' ,
129
+ ' subject_ids' : [' PSBB01' ],
130
130
},
131
- " ds004869" : {
132
- " version" : " 1.1.1" ,
133
- " description" : " https://openneuro.org/datasets/ds004869/versions/1.1.1" ,
134
- " subject_ids" : ["01" ],
131
+ ' ds004869' : {
132
+ ' version' : ' 1.1.1' ,
133
+ ' description' : ' https://openneuro.org/datasets/ds004869/versions/1.1.1' ,
134
+ ' subject_ids' : ['01' ],
135
135
},
136
136
}
137
137
138
- openneuro_template_string = " https://github.com/OpenNeuroDatasets/{DATASET_ID}.git"
138
+ openneuro_template_string = ' https://github.com/OpenNeuroDatasets/{DATASET_ID}.git'
139
139
140
140
141
141
def download_test_data (
@@ -148,14 +148,17 @@ def download_test_data(
148
148
datasets_to_use = pet_datasets # Use the default defined at module level
149
149
else :
150
150
# Load from JSON file
151
- with open (pet_datasets_json , "r" ) as infile :
151
+ with open (pet_datasets_json , 'r' ) as infile :
152
152
datasets_to_use = json .load (infile )
153
-
153
+
154
154
with working_directory as data_path :
155
155
combined_participants_tsv = pd .DataFrame ()
156
156
combined_subjects = []
157
157
combined_dataset_files = []
158
- for dataset_id , meta in datasets_to_use .items (): # Use datasets_to_use instead of pet_datasets
158
+ for (
159
+ dataset_id ,
160
+ meta ,
161
+ ) in datasets_to_use .items (): # Use datasets_to_use instead of pet_datasets
159
162
dataset_path = Path (data_path ) / Path (dataset_id )
160
163
if dataset_path .is_dir () and len (sys .argv ) <= 1 :
161
164
dataset_path .rmdir ()
@@ -172,100 +175,96 @@ def download_test_data(
172
175
) # when petderivatives are a thing, we'll think about using pybids to get them
173
176
174
177
# Access participants.tsv
175
- participants_files = b .get (
176
- suffix = "participants" , extension = ".tsv" , return_type = "file"
177
- )
178
+ participants_files = b .get (suffix = 'participants' , extension = '.tsv' , return_type = 'file' )
178
179
if participants_files :
179
180
participants_file = participants_files [0 ]
180
181
181
182
# Read participants.tsv as pandas DataFrame
182
- participants_df = pd .read_csv (participants_file , sep = " \t " )
183
+ participants_df = pd .read_csv (participants_file , sep = ' \t ' )
183
184
184
185
# Combine with overall participants DataFrame
185
186
combined_participants_tsv = pd .concat (
186
187
[combined_participants_tsv , participants_df ], ignore_index = True
187
188
)
188
189
# if a subset of subjects are specified collect only those subjects in the install
189
- if meta .get (" subject_ids" , []) != []:
190
- for id in meta [" subject_ids" ]:
190
+ if meta .get (' subject_ids' , []) != []:
191
+ for id in meta [' subject_ids' ]:
191
192
combined_subjects .append (id )
192
193
# Get the entire subject directory content including git-annex files
193
- subject_dir = dataset_path / f" sub-{ id } "
194
+ subject_dir = dataset_path / f' sub-{ id } '
194
195
if subject_dir .exists ():
195
196
# First, get all content in the subject directory (this retrieves git-annex files)
196
197
result = dataset .get (str (subject_dir ))
197
-
198
+
198
199
# Then collect all files after they've been retrieved
199
200
all_files = []
200
- for file_path in subject_dir .rglob ("*" ):
201
+ for file_path in subject_dir .rglob ('*' ):
201
202
if file_path .is_file ():
202
203
relative_path = file_path .relative_to (dataset_path )
203
204
all_files .append (str (relative_path ))
204
-
205
+
205
206
# Copy all files to output directory
206
207
for f in all_files :
207
208
print (f )
208
209
# Unlock the file to make it writable
209
- api .unlock (
210
- path = str (dataset_path / f ), dataset = str (dataset_path )
211
- )
210
+ api .unlock (path = str (dataset_path / f ), dataset = str (dataset_path ))
212
211
source_file = dataset_path / f
213
212
relative_path = source_file .relative_to (dataset_path )
214
213
target_file = Path (output_directory ) / relative_path
215
214
target_file .parent .mkdir (parents = True , exist_ok = True )
216
215
shutil .copy2 (source_file , target_file )
217
216
218
217
else :
219
- combined_subjects += b .get (return_type = "id" , target = " subject" )
218
+ combined_subjects += b .get (return_type = 'id' , target = ' subject' )
220
219
# Get all files first
221
220
dataset .get (dataset_path )
222
221
api .unlock (path = str (dataset_path ), dataset = dataset )
223
222
shutil .copytree (dataset_path , output_directory )
224
223
225
- combined_subjects = [f" sub-{ s } " for s in combined_subjects ]
224
+ combined_subjects = [f' sub-{ s } ' for s in combined_subjects ]
226
225
227
226
# Filter participants DataFrame to keep only subjects in combined_subjects list
228
227
combined_participants = combined_participants_tsv [
229
- combined_participants_tsv [" participant_id" ].isin (combined_subjects )
228
+ combined_participants_tsv [' participant_id' ].isin (combined_subjects )
230
229
]
231
230
232
231
# Only write files if a specific download path was provided
233
- dataset_desc_path = Path (output_directory ) / " dataset_description.json"
234
- readme_path = Path (output_directory ) / " README.md"
232
+ dataset_desc_path = Path (output_directory ) / ' dataset_description.json'
233
+ readme_path = Path (output_directory ) / ' README.md'
235
234
236
- with open (dataset_desc_path , "w" ) as f :
235
+ with open (dataset_desc_path , 'w' ) as f :
237
236
json .dump (create_dataset_description (), f , indent = 4 )
238
237
239
- with open (readme_path , "w" ) as f :
238
+ with open (readme_path , 'w' ) as f :
240
239
f .write (create_readme_content (pet_datasets , readme_template ))
241
240
combined_participants .to_csv (
242
- Path (output_directory ) / " participants.tsv" , sep = " \t " , index = False
241
+ Path (output_directory ) / ' participants.tsv' , sep = ' \t ' , index = False
243
242
)
244
243
245
244
246
- if __name__ == " __main__" :
245
+ if __name__ == ' __main__' :
247
246
parser = argparse .ArgumentParser (
248
- prog = " PETPrepTestDataCollector" ,
249
- description = " Collects PET datasets from OpenNeuro.org and combines them into a single BIDS dataset using datalad and pandas" ,
247
+ prog = ' PETPrepTestDataCollector' ,
248
+ description = ' Collects PET datasets from OpenNeuro.org and combines them into a single BIDS dataset using datalad and pandas' ,
250
249
formatter_class = argparse .RawTextHelpFormatter ,
251
250
)
252
251
parser .add_argument (
253
- " --working-directory" ,
254
- "-w" ,
252
+ ' --working-directory' ,
253
+ '-w' ,
255
254
type = str ,
256
255
default = TemporaryDirectory (),
257
- help = " Working directory for downloading and combining datasets, defaults to a temporary directory." ,
256
+ help = ' Working directory for downloading and combining datasets, defaults to a temporary directory.' ,
258
257
)
259
258
parser .add_argument (
260
- " --output-directory" ,
261
- "-o" ,
259
+ ' --output-directory' ,
260
+ '-o' ,
262
261
type = str ,
263
262
default = os .getcwd (),
264
- help = f" Output directory of combined dataset, defaults where this script is called from, presently { os .getcwd ()} " ,
263
+ help = f' Output directory of combined dataset, defaults where this script is called from, presently { os .getcwd ()} ' ,
265
264
)
266
265
parser .add_argument (
267
- " --datasets-json" ,
268
- "-j" ,
266
+ ' --datasets-json' ,
267
+ '-j' ,
269
268
type = str ,
270
269
default = None ,
271
270
help = """Use a custom json of datasets along
@@ -293,7 +292,7 @@ def download_test_data(
293
292
args = parser .parse_args ()
294
293
295
294
download_test_data (
296
- working_directory = args .working_directory ,
295
+ working_directory = args .working_directory ,
297
296
output_directory = args .output_directory ,
298
- pet_datasets_json = args .datasets_json # This will be None if not provided
297
+ pet_datasets_json = args .datasets_json , # This will be None if not provided
299
298
)
0 commit comments