148
148
# NOTE: even if filename has number that is 0-padded, internally no padding
149
149
# is done
150
150
fix_accession2run = {
151
- 'A000005' : ['^1-' ],
152
- 'A000035' : ['^8-' , '^9-' ],
153
- 'A000067' : ['^9-' ],
154
- 'A000072' : ['^5-' ],
155
- 'A000081' : ['^5-' ],
156
- 'A000082' : ['^5-' ],
157
- 'A000088' : ['^9-' ],
158
- 'A000090' : ['^5-' ],
159
- 'A000127' : ['^21-' ],
160
- 'A000130' : ['^15-' ],
161
- 'A000137' : ['^9-' , '^11-' ],
162
- 'A000297' : ['^12-' ],
163
- 'A000326' : ['^15-' ],
164
- 'A000376' : ['^15-' ],
165
- 'A000384' : ['^8-' , '^11-' ],
166
- 'A000467' : ['^15-' ],
167
- 'A000490' : ['^15-' ],
168
- 'A000511' : ['^15-' ],
169
- 'A000797' : ['^[1-7]-' ],
151
+ # e.g.:
152
+ # 'A000035': ['^8-', '^9-'],
170
153
}
171
154
172
- # dictionary containing fixes, keys are md5sum of study_description from
173
- # dicoms, in the form of PI-Experimenter^protocolname
174
- # values are list of tuples in the form (regex_pattern, substitution)
155
+ # A dictionary containing fixes/remapping for sequence names per study.
156
+ # Keys are md5sum of study_description from DICOMs, in the form of PI-Experimenter^protocolname
157
+ # You can use `heudiconv -f reproin --command ls --files PATH
158
+ # to list the "study hash".
159
+ # Values are list of tuples in the form (regex_pattern, substitution).
160
+ # If the key is an empty string`''''`, it would apply to any study.
175
161
protocols2fix = {
176
- # QA
177
- '43b67d9139e8c7274578b7451ab21123' :
178
- [
179
- # ('anat-scout.*', 'anat-scout_ses-{date}'),
180
- # do not change it so we retain _ses-{date}
181
- # ('anat-scout.*', 'anat-scout'),
182
- ('BOLD_p2_s4_3\.5mm' , 'func_task-rest_acq-p2-s4-3.5mm' ),
183
- ('BOLD_p2_s4' , 'func_task-rest_acq-p2-s4' ),
184
- ('BOLD_p2_noprescannormalize' , 'func-bold_task-rest_acq-p2noprescannormalize' ),
185
- ('BOLD_p2' , 'func-bold_task-rest_acq-p2' ),
186
- ('BOLD_' , 'func_task-rest' ),
187
- ('DTI_30_p2_s4_3\.5mm' , 'dwi_acq-DTI-30-p2-s4-3.5mm' ),
188
- ('DTI_30_p2_s4' , 'dwi_acq-DTI-30-p2-s4' ),
189
- ('DTI_30_p2' , 'dwi_acq-DTI-30-p2' ),
190
- ('_p2_s4_3\.5mm' , '_acq-p2-s4-3.5mm' ),
191
- ('_p2_s4' , '_acq-p2-s4' ),
192
- ('_p2' , '_acq-p2' ),
193
- ],
194
- '9d148e2a05f782273f6343507733309d' :
195
- [('anat_' , 'anat-' ),
196
- ('run-life[0-9]' , 'run+_task-life' ),
197
- ('scout_run\+' , 'scout' ),
198
- ('T2w' , 'T2w_run+' ),
199
- # substitutions for old protocol names
200
- ('AAHead_Scout_32ch-head-coil' , 'anat-scout' ),
201
- ('MPRAGE' , 'anat-T1w_acq-MPRAGE_run+' ),
202
- ('gre_field_mapping_2mm' , 'fmap_run+_acq-2mm' ),
203
- ('gre_field_mapping_3mm' , 'fmap_run+_acq-3mm' ),
204
- ('epi_bold_sms_p2_s4_2mm_life1_748' ,
205
- 'func_run+_task-life_acq-2mm748' ),
206
- ('epi_bold_sms_p2_s4_2mm_life2_692' ,
207
- 'func_run+_task-life_acq-2mm692' ),
208
- ('epi_bold_sms_p2_s4_2mm_life3_754' ,
209
- 'func_run+_task-life_acq-2mm754' ),
210
- ('epi_bold_sms_p2_s4_2mm_life4_824' ,
211
- 'func_run+_task-life_acq-2mm824' ),
212
- ('epi_bold_p2_3mm_nofs_life1_374' ,
213
- 'func_run+_task-life_acq-3mmnofs374' ),
214
- ('epi_bold_p2_3mm_nofs_life2_346' ,
215
- 'func_run+_task-life_acq-3mmnofs346' ),
216
- ('epi_bold_p2_3mm_nofs_life3_377' ,
217
- 'func_run+_task-life_acq-3mmnofs377' ),
218
- ('epi_bold_p2_3mm_nofs_life4_412' ,
219
- 'func_run+_task-life_acq-3mmnofs412' ),
220
- ('t2_space_sag_p4_iso' , 'anat-T2w_run+' ),
221
- ('gre_field_mapping_2.4mm' , 'fmap_run+_acq-2.4mm' ),
222
- ('rest_p2_sms4_2.4mm_64sl_1000tr_32te_600dyn' ,
223
- 'func_run+_task-rest_acq-2.4mm64sl1000tr32te600dyn' ),
224
- ('DTI_30' , 'dwi_run+_acq-30' ),
225
- ('t1_space_sag_p2_iso' , 'anat-T1w_acq-060mm_run+' )],
226
- '76b36c80231b0afaf509e2d52046e964' :
227
- [('fmap_run\+_2mm' , 'fmap_run+_acq-2mm' )],
228
- 'c6d8fbccc72990bee61d28e73b2618a4' :
229
- [('run=' , 'run+' )],
230
- 'a751cc977f1e354fcafcb0ea2de123bd' :
231
- [
232
- ('_unlabeled' , '_task-unlabeled' ),
233
- ('_mSense' , '_acq-mSense' ),
234
- ('_p1_sms4_2.5mm' , '_acq-p1-sms4-2.5mm' ),
235
- ('_p1_sms4_3mm' , '_acq-p1-sms4-3mm' ),
236
- ],
237
- 'd160113cf5ea8c5d0cbbbe14ef625e76' :
238
- [
239
- ('_run0' , '_run-0' ),
240
- ],
241
- '1bd62e10672fe0b435a9aa8d75b45425' :
242
- [
243
- # need to add incrementing session -- study should have 2
244
- # and no need for run+ for the scout!
245
- ('scout(_run\+)?$' , 'scout_ses+' ),
246
- ],
247
- 'da218a66de902adb3ad9407d514e3639' :
248
- [
249
- # those sequences renamed later to include DTI- in their acq-
250
- # so fot consistency
251
- ('hardi_64' , 'dwi_acq-DTI-hardi64' ),
252
- ('acq-hardi' , 'acq-DTI-hardi' ),
253
- ],
254
- 'ed20c1ad4a0861b2b65768e159258eec' :
255
- [
256
- ('fmap_acq-discorr-dti-' , 'fmap_acq-dwi_dir-' ),
257
- ('_test' , '' ),
258
- ],
259
- '1996f745c30c1df1d3851844e56d294f' :
260
- [
261
- ('fmap_acq-discorr-dti-' , 'fmap_acq-dwi_dir-' ),
262
- ],
263
- # '022969bfde39c2940c114edf1db3fabc':
264
- # [ # should be applied only for ses-03!
265
- # ('_acq-MPRAGE_ses-02', '_acq-MPRAGE_ses-03'),
266
- # ],
267
- # to be used only once for one interrupted accession but we cannot
268
- # fix per accession yet
269
- # '23763823d2b9b4b09dafcadc8e8edf21':
270
- # [
271
- # ('anat-T1w_acq-MPRAGE', 'anat-T1w_acq-MPRAGE_run-06'),
272
- # ('anat_T2w', 'anat_T2w_run-06'),
273
- # ('fmap_acq-3mm', 'fmap_acq-3mm_run-06'),
274
- # ],
162
+ # e.g., QA:
163
+ # '43b67d9139e8c7274578b7451ab21123':
164
+ # [
165
+ # ('BOLD_p2_s4_3\.5mm', 'func_task-rest_acq-p2-s4-3.5mm'),
166
+ # ('BOLD_', 'func_task-rest'),
167
+ # ('_p2_s4', '_acq-p2-s4'),
168
+ # ('_p2', '_acq-p2'),
169
+ # ],
170
+ # '': # for any study example with regexes used
171
+ # [
172
+ # ('AAHead_Scout_.*', 'anat-scout'),
173
+ # ('^dti_.*', 'dwi'),
174
+ # ('^.*_distortion_corr.*_([ap]+)_([12])', r'fmap-epi_dir-\1_run-\2'),
175
+ # ('^(.+)_ap.*_r(0[0-9])', r'func_task-\1_run-\2'),
176
+ # ('^t1w_.*', 'anat-T1w'),
177
+ # # problematic case -- multiple identically named pepolar fieldmap runs
178
+ # # I guess we will just sacrifice ability to detect canceled runs here.
179
+ # # And we cannot just use _run+ since it would increment independently
180
+ # # for ap and then for pa. We will rely on having ap preceding pa.
181
+ # # Added _acq-mb8 so they match the one in funcs
182
+ # ('func_task-discorr_acq-ap', r'fmap-epi_dir-ap_acq-mb8_run+'),
183
+ # ('func_task-discorr_acq-pa', r'fmap-epi_dir-pa_acq-mb8_run='),
184
+ # ]
275
185
}
276
- # there was also screw up in the locator specification
277
- # so we need to fix in both
278
- # protocols2fix['67ae5e641ea9d487b6fdf56fb91aeb93'] = protocols2fix['022969bfde39c2940c114edf1db3fabc']
279
186
280
187
# list containing StudyInstanceUID to skip -- hopefully doesn't happen too often
281
188
dicoms2skip = [
282
- '1.3.12.2.1107.5.2.43.66112.30000016110117002435700000001' ,
283
- '1.3.12.2.1107.5.2.43.66112.30000016102813152550600000004' , # double scout
189
+ # e.g.
190
+ # '1.3.12.2.1107.5.2.43.66112.30000016110117002435700000001',
284
191
]
285
192
286
193
DEFAULT_FIELDS = {
287
194
# Let it just be in each json file extracted
288
- # 'Manufacturer': "Siemens",
289
- # 'ManufacturersModelName': "Prisma",
290
195
"Acknowledgements" :
291
196
"We thank Terry Sacket and the rest of the DBIC (Dartmouth Brain Imaging "
292
197
"Center) personnel for assistance in data collection, and "
293
- "Yaroslav Halchenko and Matteo Visconti for preparing BIDS dataset. "
294
- "TODO: more " ,
198
+ "Yaroslav O. Halchenko for preparing BIDS dataset. "
199
+ "TODO: adjust to your case. " ,
295
200
}
296
201
297
202
@@ -311,38 +216,10 @@ def filter_dicom(dcmdata):
311
216
312
217
def filter_files (fn ):
313
218
"""Return True if a file should be kept, else False.
314
- We're using it to filter out files that do not start with a number."""
315
219
316
- # do not check for these accession numbers because they haven't been
317
- # recopied with the initial number
318
- donotfilter = ['A000012' , 'A000013' , 'A000020' , 'A000041' ]
319
-
320
- split = os .path .split (fn )
321
- split2 = os .path .split (split [0 ])
322
- sequence_dir = split2 [1 ]
323
- split3 = os .path .split (split2 [0 ])
324
- accession_number = split3 [1 ]
220
+ ATM reproin does not do any filtering. Override if you need to add some
221
+ """
325
222
return True
326
- if accession_number == 'A000043' :
327
- # crazy one that got copied for some runs but not for others,
328
- # so we are going to discard those that got copied and let heudiconv
329
- # figure out the rest
330
- return False if re .match ('^[0-9]+-' , sequence_dir ) else True
331
- elif accession_number == 'unknown' :
332
- # this one had some stuff without study description, filter stuff before
333
- # collecting info, so it doesn't crash completely
334
- return False if re .match ('^[34][07-9]-sn' , sequence_dir ) else True
335
- elif accession_number in donotfilter :
336
- return True
337
- elif accession_number .startswith ('phantom-' ):
338
- # Accessions on phantoms, e.g. in dartmouth-phantoms/bids_test4-20161014
339
- return True
340
- elif accession_number .startswith ('heudiconvdcm' ):
341
- # we were given some tarball with dicoms which was extracted so we
342
- # better obey
343
- return True
344
- else :
345
- return True if re .match ('^[0-9]+-' , sequence_dir ) else False
346
223
347
224
348
225
def create_key (subdir , file_suffix , outtype = ('nii.gz' , 'dicom' ),
@@ -381,13 +258,17 @@ def get_study_hash(seqinfo):
381
258
def fix_canceled_runs (seqinfo ):
382
259
"""Function that adds cancelme_ to known bad runs which were forgotten
383
260
"""
384
- accession_number = get_unique (seqinfo , 'accession_number' )
385
- if accession_number in fix_accession2run :
386
- lgr .info ("Considering some runs possibly marked to be "
387
- "canceled for accession %s" , accession_number )
388
- badruns = fix_accession2run [accession_number ]
389
- badruns_pattern = '|' .join (badruns )
390
- for i , s in enumerate (seqinfo ):
261
+ if not fix_accession2run :
262
+ return seqinfo # nothing to do
263
+ for i , s in enumerate (seqinfo ):
264
+ accession_number = getattr (s , 'accession_number' )
265
+ if accession_number and accession_number in fix_accession2run :
266
+ lgr .info ("Considering some runs possibly marked to be "
267
+ "canceled for accession %s" , accession_number )
268
+ # This code is reminiscent of prior logic when operating on
269
+ # a single accession, but left as is for now
270
+ badruns = fix_accession2run [accession_number ]
271
+ badruns_pattern = '|' .join (badruns )
391
272
if re .match (badruns_pattern , s .series_id ):
392
273
lgr .info ('Fixing bad run {0}' .format (s .series_id ))
393
274
fixedkwargs = dict ()
0 commit comments