Skip to content

Commit f44c099

Browse files
authored
Merge pull request #508 from dbic/bf-reproin-multiaccession
BF+RF: reproin - allow multiple accessions + remove DBIC specific specs for elderly/pilot studies
2 parents 8866cc5 + 1961f0f commit f44c099

File tree

1 file changed

+48
-167
lines changed

1 file changed

+48
-167
lines changed

heudiconv/heuristics/reproin.py

Lines changed: 48 additions & 167 deletions
Original file line numberDiff line numberDiff line change
@@ -148,150 +148,55 @@
148148
# NOTE: even if filename has number that is 0-padded, internally no padding
149149
# is done
150150
fix_accession2run = {
151-
'A000005': ['^1-'],
152-
'A000035': ['^8-', '^9-'],
153-
'A000067': ['^9-'],
154-
'A000072': ['^5-'],
155-
'A000081': ['^5-'],
156-
'A000082': ['^5-'],
157-
'A000088': ['^9-'],
158-
'A000090': ['^5-'],
159-
'A000127': ['^21-'],
160-
'A000130': ['^15-'],
161-
'A000137': ['^9-', '^11-'],
162-
'A000297': ['^12-'],
163-
'A000326': ['^15-'],
164-
'A000376': ['^15-'],
165-
'A000384': ['^8-', '^11-'],
166-
'A000467': ['^15-'],
167-
'A000490': ['^15-'],
168-
'A000511': ['^15-'],
169-
'A000797': ['^[1-7]-'],
151+
# e.g.:
152+
# 'A000035': ['^8-', '^9-'],
170153
}
171154

172-
# dictionary containing fixes, keys are md5sum of study_description from
173-
# dicoms, in the form of PI-Experimenter^protocolname
174-
# values are list of tuples in the form (regex_pattern, substitution)
155+
# A dictionary containing fixes/remapping for sequence names per study.
156+
# Keys are md5sum of study_description from DICOMs, in the form of PI-Experimenter^protocolname
157+
# You can use `heudiconv -f reproin --command ls --files PATH
158+
# to list the "study hash".
159+
# Values are list of tuples in the form (regex_pattern, substitution).
160+
# If the key is an empty string`''''`, it would apply to any study.
175161
protocols2fix = {
176-
# QA
177-
'43b67d9139e8c7274578b7451ab21123':
178-
[
179-
# ('anat-scout.*', 'anat-scout_ses-{date}'),
180-
# do not change it so we retain _ses-{date}
181-
# ('anat-scout.*', 'anat-scout'),
182-
('BOLD_p2_s4_3\.5mm', 'func_task-rest_acq-p2-s4-3.5mm'),
183-
('BOLD_p2_s4', 'func_task-rest_acq-p2-s4'),
184-
('BOLD_p2_noprescannormalize', 'func-bold_task-rest_acq-p2noprescannormalize'),
185-
('BOLD_p2', 'func-bold_task-rest_acq-p2'),
186-
('BOLD_', 'func_task-rest'),
187-
('DTI_30_p2_s4_3\.5mm', 'dwi_acq-DTI-30-p2-s4-3.5mm'),
188-
('DTI_30_p2_s4', 'dwi_acq-DTI-30-p2-s4'),
189-
('DTI_30_p2', 'dwi_acq-DTI-30-p2'),
190-
('_p2_s4_3\.5mm', '_acq-p2-s4-3.5mm'),
191-
('_p2_s4', '_acq-p2-s4'),
192-
('_p2', '_acq-p2'),
193-
],
194-
'9d148e2a05f782273f6343507733309d':
195-
[('anat_', 'anat-'),
196-
('run-life[0-9]', 'run+_task-life'),
197-
('scout_run\+', 'scout'),
198-
('T2w', 'T2w_run+'),
199-
# substitutions for old protocol names
200-
('AAHead_Scout_32ch-head-coil', 'anat-scout'),
201-
('MPRAGE', 'anat-T1w_acq-MPRAGE_run+'),
202-
('gre_field_mapping_2mm', 'fmap_run+_acq-2mm'),
203-
('gre_field_mapping_3mm', 'fmap_run+_acq-3mm'),
204-
('epi_bold_sms_p2_s4_2mm_life1_748',
205-
'func_run+_task-life_acq-2mm748'),
206-
('epi_bold_sms_p2_s4_2mm_life2_692',
207-
'func_run+_task-life_acq-2mm692'),
208-
('epi_bold_sms_p2_s4_2mm_life3_754',
209-
'func_run+_task-life_acq-2mm754'),
210-
('epi_bold_sms_p2_s4_2mm_life4_824',
211-
'func_run+_task-life_acq-2mm824'),
212-
('epi_bold_p2_3mm_nofs_life1_374',
213-
'func_run+_task-life_acq-3mmnofs374'),
214-
('epi_bold_p2_3mm_nofs_life2_346',
215-
'func_run+_task-life_acq-3mmnofs346'),
216-
('epi_bold_p2_3mm_nofs_life3_377',
217-
'func_run+_task-life_acq-3mmnofs377'),
218-
('epi_bold_p2_3mm_nofs_life4_412',
219-
'func_run+_task-life_acq-3mmnofs412'),
220-
('t2_space_sag_p4_iso', 'anat-T2w_run+'),
221-
('gre_field_mapping_2.4mm', 'fmap_run+_acq-2.4mm'),
222-
('rest_p2_sms4_2.4mm_64sl_1000tr_32te_600dyn',
223-
'func_run+_task-rest_acq-2.4mm64sl1000tr32te600dyn'),
224-
('DTI_30', 'dwi_run+_acq-30'),
225-
('t1_space_sag_p2_iso', 'anat-T1w_acq-060mm_run+')],
226-
'76b36c80231b0afaf509e2d52046e964':
227-
[('fmap_run\+_2mm', 'fmap_run+_acq-2mm')],
228-
'c6d8fbccc72990bee61d28e73b2618a4':
229-
[('run=', 'run+')],
230-
'a751cc977f1e354fcafcb0ea2de123bd':
231-
[
232-
('_unlabeled', '_task-unlabeled'),
233-
('_mSense', '_acq-mSense'),
234-
('_p1_sms4_2.5mm', '_acq-p1-sms4-2.5mm'),
235-
('_p1_sms4_3mm', '_acq-p1-sms4-3mm'),
236-
],
237-
'd160113cf5ea8c5d0cbbbe14ef625e76':
238-
[
239-
('_run0', '_run-0'),
240-
],
241-
'1bd62e10672fe0b435a9aa8d75b45425':
242-
[
243-
# need to add incrementing session -- study should have 2
244-
# and no need for run+ for the scout!
245-
('scout(_run\+)?$', 'scout_ses+'),
246-
],
247-
'da218a66de902adb3ad9407d514e3639':
248-
[
249-
# those sequences renamed later to include DTI- in their acq-
250-
# so fot consistency
251-
('hardi_64', 'dwi_acq-DTI-hardi64'),
252-
('acq-hardi', 'acq-DTI-hardi'),
253-
],
254-
'ed20c1ad4a0861b2b65768e159258eec':
255-
[
256-
('fmap_acq-discorr-dti-', 'fmap_acq-dwi_dir-'),
257-
('_test', ''),
258-
],
259-
'1996f745c30c1df1d3851844e56d294f':
260-
[
261-
('fmap_acq-discorr-dti-', 'fmap_acq-dwi_dir-'),
262-
],
263-
# '022969bfde39c2940c114edf1db3fabc':
264-
# [ # should be applied only for ses-03!
265-
# ('_acq-MPRAGE_ses-02', '_acq-MPRAGE_ses-03'),
266-
# ],
267-
# to be used only once for one interrupted accession but we cannot
268-
# fix per accession yet
269-
# '23763823d2b9b4b09dafcadc8e8edf21':
270-
# [
271-
# ('anat-T1w_acq-MPRAGE', 'anat-T1w_acq-MPRAGE_run-06'),
272-
# ('anat_T2w', 'anat_T2w_run-06'),
273-
# ('fmap_acq-3mm', 'fmap_acq-3mm_run-06'),
274-
# ],
162+
# e.g., QA:
163+
# '43b67d9139e8c7274578b7451ab21123':
164+
# [
165+
# ('BOLD_p2_s4_3\.5mm', 'func_task-rest_acq-p2-s4-3.5mm'),
166+
# ('BOLD_', 'func_task-rest'),
167+
# ('_p2_s4', '_acq-p2-s4'),
168+
# ('_p2', '_acq-p2'),
169+
# ],
170+
# '': # for any study example with regexes used
171+
# [
172+
# ('AAHead_Scout_.*', 'anat-scout'),
173+
# ('^dti_.*', 'dwi'),
174+
# ('^.*_distortion_corr.*_([ap]+)_([12])', r'fmap-epi_dir-\1_run-\2'),
175+
# ('^(.+)_ap.*_r(0[0-9])', r'func_task-\1_run-\2'),
176+
# ('^t1w_.*', 'anat-T1w'),
177+
# # problematic case -- multiple identically named pepolar fieldmap runs
178+
# # I guess we will just sacrifice ability to detect canceled runs here.
179+
# # And we cannot just use _run+ since it would increment independently
180+
# # for ap and then for pa. We will rely on having ap preceding pa.
181+
# # Added _acq-mb8 so they match the one in funcs
182+
# ('func_task-discorr_acq-ap', r'fmap-epi_dir-ap_acq-mb8_run+'),
183+
# ('func_task-discorr_acq-pa', r'fmap-epi_dir-pa_acq-mb8_run='),
184+
# ]
275185
}
276-
# there was also screw up in the locator specification
277-
# so we need to fix in both
278-
# protocols2fix['67ae5e641ea9d487b6fdf56fb91aeb93'] = protocols2fix['022969bfde39c2940c114edf1db3fabc']
279186

280187
# list containing StudyInstanceUID to skip -- hopefully doesn't happen too often
281188
dicoms2skip = [
282-
'1.3.12.2.1107.5.2.43.66112.30000016110117002435700000001',
283-
'1.3.12.2.1107.5.2.43.66112.30000016102813152550600000004', # double scout
189+
# e.g.
190+
# '1.3.12.2.1107.5.2.43.66112.30000016110117002435700000001',
284191
]
285192

286193
DEFAULT_FIELDS = {
287194
# Let it just be in each json file extracted
288-
# 'Manufacturer': "Siemens",
289-
# 'ManufacturersModelName': "Prisma",
290195
"Acknowledgements":
291196
"We thank Terry Sacket and the rest of the DBIC (Dartmouth Brain Imaging "
292197
"Center) personnel for assistance in data collection, and "
293-
"Yaroslav Halchenko and Matteo Visconti for preparing BIDS dataset. "
294-
"TODO: more",
198+
"Yaroslav O. Halchenko for preparing BIDS dataset. "
199+
"TODO: adjust to your case.",
295200
}
296201

297202

@@ -311,38 +216,10 @@ def filter_dicom(dcmdata):
311216

312217
def filter_files(fn):
313218
"""Return True if a file should be kept, else False.
314-
We're using it to filter out files that do not start with a number."""
315219
316-
# do not check for these accession numbers because they haven't been
317-
# recopied with the initial number
318-
donotfilter = ['A000012', 'A000013', 'A000020', 'A000041']
319-
320-
split = os.path.split(fn)
321-
split2 = os.path.split(split[0])
322-
sequence_dir = split2[1]
323-
split3 = os.path.split(split2[0])
324-
accession_number = split3[1]
220+
ATM reproin does not do any filtering. Override if you need to add some
221+
"""
325222
return True
326-
if accession_number == 'A000043':
327-
# crazy one that got copied for some runs but not for others,
328-
# so we are going to discard those that got copied and let heudiconv
329-
# figure out the rest
330-
return False if re.match('^[0-9]+-', sequence_dir) else True
331-
elif accession_number == 'unknown':
332-
# this one had some stuff without study description, filter stuff before
333-
# collecting info, so it doesn't crash completely
334-
return False if re.match('^[34][07-9]-sn', sequence_dir) else True
335-
elif accession_number in donotfilter:
336-
return True
337-
elif accession_number.startswith('phantom-'):
338-
# Accessions on phantoms, e.g. in dartmouth-phantoms/bids_test4-20161014
339-
return True
340-
elif accession_number.startswith('heudiconvdcm'):
341-
# we were given some tarball with dicoms which was extracted so we
342-
# better obey
343-
return True
344-
else:
345-
return True if re.match('^[0-9]+-', sequence_dir) else False
346223

347224

348225
def create_key(subdir, file_suffix, outtype=('nii.gz', 'dicom'),
@@ -381,13 +258,17 @@ def get_study_hash(seqinfo):
381258
def fix_canceled_runs(seqinfo):
382259
"""Function that adds cancelme_ to known bad runs which were forgotten
383260
"""
384-
accession_number = get_unique(seqinfo, 'accession_number')
385-
if accession_number in fix_accession2run:
386-
lgr.info("Considering some runs possibly marked to be "
387-
"canceled for accession %s", accession_number)
388-
badruns = fix_accession2run[accession_number]
389-
badruns_pattern = '|'.join(badruns)
390-
for i, s in enumerate(seqinfo):
261+
if not fix_accession2run:
262+
return seqinfo # nothing to do
263+
for i, s in enumerate(seqinfo):
264+
accession_number = getattr(s, 'accession_number')
265+
if accession_number and accession_number in fix_accession2run:
266+
lgr.info("Considering some runs possibly marked to be "
267+
"canceled for accession %s", accession_number)
268+
# This code is reminiscent of prior logic when operating on
269+
# a single accession, but left as is for now
270+
badruns = fix_accession2run[accession_number]
271+
badruns_pattern = '|'.join(badruns)
391272
if re.match(badruns_pattern, s.series_id):
392273
lgr.info('Fixing bad run {0}'.format(s.series_id))
393274
fixedkwargs = dict()

0 commit comments

Comments
 (0)