Skip to content

Commit 2320469

Browse files
bendhousearteffigies
authored andcommitted
reformatted with ruff
1 parent 1366b4c commit 2320469

File tree

1 file changed

+70
-71
lines changed

1 file changed

+70
-71
lines changed

scripts/collect_test_data.py

Lines changed: 70 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -80,24 +80,24 @@
8080
def create_dataset_description():
8181
"""Create BIDS dataset_description.json content."""
8282
return {
83-
"Name": "PETPrep Test Data Collection",
84-
"BIDSVersion": "1.7.0",
85-
"DatasetType": "raw",
86-
"License": "CC0",
87-
"Authors": ["datalad", "python", "make", "openneuro"],
88-
"HowToAcknowledge": "Please cite the original datasets and PETPrep software.",
89-
"Funding": [
90-
"This test data collection was created for PETPrep development and testing purposes"
83+
'Name': 'PETPrep Test Data Collection',
84+
'BIDSVersion': '1.7.0',
85+
'DatasetType': 'raw',
86+
'License': 'CC0',
87+
'Authors': ['datalad', 'python', 'make', 'openneuro'],
88+
'HowToAcknowledge': 'Please cite the original datasets and PETPrep software.',
89+
'Funding': [
90+
'This test data collection was created for PETPrep development and testing purposes'
9191
],
92-
"EthicsApprovals": [
93-
"This is a test dataset compiled from publicly available BIDS datasets for software testing purposes"
92+
'EthicsApprovals': [
93+
'This is a test dataset compiled from publicly available BIDS datasets for software testing purposes'
9494
],
95-
"ReferencesAndLinks": [
96-
"https://github.com/nipreps/petprep",
97-
"https://openneuro.org",
95+
'ReferencesAndLinks': [
96+
'https://github.com/nipreps/petprep',
97+
'https://openneuro.org',
9898
],
99-
"DatasetDOI": "10.18112/openneuro.ds000000.v1.0.0",
100-
"HEDVersion": "8.0.0",
99+
'DatasetDOI': '10.18112/openneuro.ds000000.v1.0.0',
100+
'HEDVersion': '8.0.0',
101101
}
102102

103103

@@ -106,36 +106,36 @@ def create_readme_content(pet_datasets, readme_template):
106106
"""Create README content dynamically based on the datasets."""
107107

108108
# Generate dataset list dynamically
109-
dataset_list = ""
109+
dataset_list = ''
110110
for i, (dataset_id, meta) in enumerate(pet_datasets.items(), 1):
111-
dataset_list += f"{i}. **{dataset_id}**: {meta['description']}\n"
111+
dataset_list += f'{i}. **{dataset_id}**: {meta["description"]}\n'
112112

113113
return readme_template.format(dataset_list=dataset_list)
114114

115115

116116
pet_datasets = {
117-
"ds005619": {
118-
"version": "1.1.0",
119-
"description": "[18F]SF51, a Novel 18F-labeled PET Radioligand for "
120-
"Translocator Protein 18kDa (TSPO) in Brain, Works Well "
121-
"in Monkeys but Fails in Humans",
122-
"subject_ids": ["sf02"],
117+
'ds005619': {
118+
'version': '1.1.0',
119+
'description': '[18F]SF51, a Novel 18F-labeled PET Radioligand for '
120+
'Translocator Protein 18kDa (TSPO) in Brain, Works Well '
121+
'in Monkeys but Fails in Humans',
122+
'subject_ids': ['sf02'],
123123
},
124-
"ds004868": {
125-
"version": "1.0.4",
126-
"description": "[11C]PS13 demonstrates pharmacologically selective and "
127-
"substantial binding to cyclooxygenase-1 (COX-1) in the "
128-
"human brain",
129-
"subject_ids": ["PSBB01"],
124+
'ds004868': {
125+
'version': '1.0.4',
126+
'description': '[11C]PS13 demonstrates pharmacologically selective and '
127+
'substantial binding to cyclooxygenase-1 (COX-1) in the '
128+
'human brain',
129+
'subject_ids': ['PSBB01'],
130130
},
131-
"ds004869": {
132-
"version": "1.1.1",
133-
"description": "https://openneuro.org/datasets/ds004869/versions/1.1.1",
134-
"subject_ids": ["01"],
131+
'ds004869': {
132+
'version': '1.1.1',
133+
'description': 'https://openneuro.org/datasets/ds004869/versions/1.1.1',
134+
'subject_ids': ['01'],
135135
},
136136
}
137137

138-
openneuro_template_string = "https://github.com/OpenNeuroDatasets/{DATASET_ID}.git"
138+
openneuro_template_string = 'https://github.com/OpenNeuroDatasets/{DATASET_ID}.git'
139139

140140

141141
def download_test_data(
@@ -148,14 +148,17 @@ def download_test_data(
148148
datasets_to_use = pet_datasets # Use the default defined at module level
149149
else:
150150
# Load from JSON file
151-
with open(pet_datasets_json, "r") as infile:
151+
with open(pet_datasets_json, 'r') as infile:
152152
datasets_to_use = json.load(infile)
153-
153+
154154
with working_directory as data_path:
155155
combined_participants_tsv = pd.DataFrame()
156156
combined_subjects = []
157157
combined_dataset_files = []
158-
for dataset_id, meta in datasets_to_use.items(): # Use datasets_to_use instead of pet_datasets
158+
for (
159+
dataset_id,
160+
meta,
161+
) in datasets_to_use.items(): # Use datasets_to_use instead of pet_datasets
159162
dataset_path = Path(data_path) / Path(dataset_id)
160163
if dataset_path.is_dir() and len(sys.argv) <= 1:
161164
dataset_path.rmdir()
@@ -172,100 +175,96 @@ def download_test_data(
172175
) # when petderivatives are a thing, we'll think about using pybids to get them
173176

174177
# Access participants.tsv
175-
participants_files = b.get(
176-
suffix="participants", extension=".tsv", return_type="file"
177-
)
178+
participants_files = b.get(suffix='participants', extension='.tsv', return_type='file')
178179
if participants_files:
179180
participants_file = participants_files[0]
180181

181182
# Read participants.tsv as pandas DataFrame
182-
participants_df = pd.read_csv(participants_file, sep="\t")
183+
participants_df = pd.read_csv(participants_file, sep='\t')
183184

184185
# Combine with overall participants DataFrame
185186
combined_participants_tsv = pd.concat(
186187
[combined_participants_tsv, participants_df], ignore_index=True
187188
)
188189
# if a subset of subjects are specified collect only those subjects in the install
189-
if meta.get("subject_ids", []) != []:
190-
for id in meta["subject_ids"]:
190+
if meta.get('subject_ids', []) != []:
191+
for id in meta['subject_ids']:
191192
combined_subjects.append(id)
192193
# Get the entire subject directory content including git-annex files
193-
subject_dir = dataset_path / f"sub-{id}"
194+
subject_dir = dataset_path / f'sub-{id}'
194195
if subject_dir.exists():
195196
# First, get all content in the subject directory (this retrieves git-annex files)
196197
result = dataset.get(str(subject_dir))
197-
198+
198199
# Then collect all files after they've been retrieved
199200
all_files = []
200-
for file_path in subject_dir.rglob("*"):
201+
for file_path in subject_dir.rglob('*'):
201202
if file_path.is_file():
202203
relative_path = file_path.relative_to(dataset_path)
203204
all_files.append(str(relative_path))
204-
205+
205206
# Copy all files to output directory
206207
for f in all_files:
207208
print(f)
208209
# Unlock the file to make it writable
209-
api.unlock(
210-
path=str(dataset_path / f), dataset=str(dataset_path)
211-
)
210+
api.unlock(path=str(dataset_path / f), dataset=str(dataset_path))
212211
source_file = dataset_path / f
213212
relative_path = source_file.relative_to(dataset_path)
214213
target_file = Path(output_directory) / relative_path
215214
target_file.parent.mkdir(parents=True, exist_ok=True)
216215
shutil.copy2(source_file, target_file)
217216

218217
else:
219-
combined_subjects += b.get(return_type="id", target="subject")
218+
combined_subjects += b.get(return_type='id', target='subject')
220219
# Get all files first
221220
dataset.get(dataset_path)
222221
api.unlock(path=str(dataset_path), dataset=dataset)
223222
shutil.copytree(dataset_path, output_directory)
224223

225-
combined_subjects = [f"sub-{s}" for s in combined_subjects]
224+
combined_subjects = [f'sub-{s}' for s in combined_subjects]
226225

227226
# Filter participants DataFrame to keep only subjects in combined_subjects list
228227
combined_participants = combined_participants_tsv[
229-
combined_participants_tsv["participant_id"].isin(combined_subjects)
228+
combined_participants_tsv['participant_id'].isin(combined_subjects)
230229
]
231230

232231
# Only write files if a specific download path was provided
233-
dataset_desc_path = Path(output_directory) / "dataset_description.json"
234-
readme_path = Path(output_directory) / "README.md"
232+
dataset_desc_path = Path(output_directory) / 'dataset_description.json'
233+
readme_path = Path(output_directory) / 'README.md'
235234

236-
with open(dataset_desc_path, "w") as f:
235+
with open(dataset_desc_path, 'w') as f:
237236
json.dump(create_dataset_description(), f, indent=4)
238237

239-
with open(readme_path, "w") as f:
238+
with open(readme_path, 'w') as f:
240239
f.write(create_readme_content(pet_datasets, readme_template))
241240
combined_participants.to_csv(
242-
Path(output_directory) / "participants.tsv", sep="\t", index=False
241+
Path(output_directory) / 'participants.tsv', sep='\t', index=False
243242
)
244243

245244

246-
if __name__ == "__main__":
245+
if __name__ == '__main__':
247246
parser = argparse.ArgumentParser(
248-
prog="PETPrepTestDataCollector",
249-
description="Collects PET datasets from OpenNeuro.org and combines them into a single BIDS dataset using datalad and pandas",
247+
prog='PETPrepTestDataCollector',
248+
description='Collects PET datasets from OpenNeuro.org and combines them into a single BIDS dataset using datalad and pandas',
250249
formatter_class=argparse.RawTextHelpFormatter,
251250
)
252251
parser.add_argument(
253-
"--working-directory",
254-
"-w",
252+
'--working-directory',
253+
'-w',
255254
type=str,
256255
default=TemporaryDirectory(),
257-
help="Working directory for downloading and combining datasets, defaults to a temporary directory.",
256+
help='Working directory for downloading and combining datasets, defaults to a temporary directory.',
258257
)
259258
parser.add_argument(
260-
"--output-directory",
261-
"-o",
259+
'--output-directory',
260+
'-o',
262261
type=str,
263262
default=os.getcwd(),
264-
help=f"Output directory of combined dataset, defaults where this script is called from, presently {os.getcwd()}",
263+
help=f'Output directory of combined dataset, defaults where this script is called from, presently {os.getcwd()}',
265264
)
266265
parser.add_argument(
267-
"--datasets-json",
268-
"-j",
266+
'--datasets-json',
267+
'-j',
269268
type=str,
270269
default=None,
271270
help="""Use a custom json of datasets along
@@ -293,7 +292,7 @@ def download_test_data(
293292
args = parser.parse_args()
294293

295294
download_test_data(
296-
working_directory=args.working_directory,
295+
working_directory=args.working_directory,
297296
output_directory=args.output_directory,
298-
pet_datasets_json=args.datasets_json # This will be None if not provided
297+
pet_datasets_json=args.datasets_json, # This will be None if not provided
299298
)

0 commit comments

Comments
 (0)