Skip to content

Commit 1961f0f

Browse files
committed
RM: reproin - Remove (ancient) DBIC specific craft
ReproIn was developed "as deployed" and it hardcoded many remappings etc relevant now only to some older studies etc. In this commit all of those are removed. I did leave/added some as examples on how protocols2fix could be specified. Also Matteo has moved on, so removed him from Ack
1 parent 8e42f7d commit 1961f0f

File tree

1 file changed

+37
-160
lines changed

1 file changed

+37
-160
lines changed

heudiconv/heuristics/reproin.py

Lines changed: 37 additions & 160 deletions
Original file line numberDiff line numberDiff line change
@@ -148,150 +148,55 @@
148148
# NOTE: even if filename has number that is 0-padded, internally no padding
149149
# is done
150150
fix_accession2run = {
151-
'A000005': ['^1-'],
152-
'A000035': ['^8-', '^9-'],
153-
'A000067': ['^9-'],
154-
'A000072': ['^5-'],
155-
'A000081': ['^5-'],
156-
'A000082': ['^5-'],
157-
'A000088': ['^9-'],
158-
'A000090': ['^5-'],
159-
'A000127': ['^21-'],
160-
'A000130': ['^15-'],
161-
'A000137': ['^9-', '^11-'],
162-
'A000297': ['^12-'],
163-
'A000326': ['^15-'],
164-
'A000376': ['^15-'],
165-
'A000384': ['^8-', '^11-'],
166-
'A000467': ['^15-'],
167-
'A000490': ['^15-'],
168-
'A000511': ['^15-'],
169-
'A000797': ['^[1-7]-'],
151+
# e.g.:
152+
# 'A000035': ['^8-', '^9-'],
170153
}
171154

172-
# dictionary containing fixes, keys are md5sum of study_description from
173-
# dicoms, in the form of PI-Experimenter^protocolname
174-
# values are list of tuples in the form (regex_pattern, substitution)
155+
# A dictionary containing fixes/remapping for sequence names per study.
156+
# Keys are md5sum of study_description from DICOMs, in the form of PI-Experimenter^protocolname
157+
# You can use `heudiconv -f reproin --command ls --files PATH
158+
# to list the "study hash".
159+
# Values are list of tuples in the form (regex_pattern, substitution).
160+
# If the key is an empty string`''''`, it would apply to any study.
175161
protocols2fix = {
176-
# QA
177-
'43b67d9139e8c7274578b7451ab21123':
178-
[
179-
# ('anat-scout.*', 'anat-scout_ses-{date}'),
180-
# do not change it so we retain _ses-{date}
181-
# ('anat-scout.*', 'anat-scout'),
182-
('BOLD_p2_s4_3\.5mm', 'func_task-rest_acq-p2-s4-3.5mm'),
183-
('BOLD_p2_s4', 'func_task-rest_acq-p2-s4'),
184-
('BOLD_p2_noprescannormalize', 'func-bold_task-rest_acq-p2noprescannormalize'),
185-
('BOLD_p2', 'func-bold_task-rest_acq-p2'),
186-
('BOLD_', 'func_task-rest'),
187-
('DTI_30_p2_s4_3\.5mm', 'dwi_acq-DTI-30-p2-s4-3.5mm'),
188-
('DTI_30_p2_s4', 'dwi_acq-DTI-30-p2-s4'),
189-
('DTI_30_p2', 'dwi_acq-DTI-30-p2'),
190-
('_p2_s4_3\.5mm', '_acq-p2-s4-3.5mm'),
191-
('_p2_s4', '_acq-p2-s4'),
192-
('_p2', '_acq-p2'),
193-
],
194-
'9d148e2a05f782273f6343507733309d':
195-
[('anat_', 'anat-'),
196-
('run-life[0-9]', 'run+_task-life'),
197-
('scout_run\+', 'scout'),
198-
('T2w', 'T2w_run+'),
199-
# substitutions for old protocol names
200-
('AAHead_Scout_32ch-head-coil', 'anat-scout'),
201-
('MPRAGE', 'anat-T1w_acq-MPRAGE_run+'),
202-
('gre_field_mapping_2mm', 'fmap_run+_acq-2mm'),
203-
('gre_field_mapping_3mm', 'fmap_run+_acq-3mm'),
204-
('epi_bold_sms_p2_s4_2mm_life1_748',
205-
'func_run+_task-life_acq-2mm748'),
206-
('epi_bold_sms_p2_s4_2mm_life2_692',
207-
'func_run+_task-life_acq-2mm692'),
208-
('epi_bold_sms_p2_s4_2mm_life3_754',
209-
'func_run+_task-life_acq-2mm754'),
210-
('epi_bold_sms_p2_s4_2mm_life4_824',
211-
'func_run+_task-life_acq-2mm824'),
212-
('epi_bold_p2_3mm_nofs_life1_374',
213-
'func_run+_task-life_acq-3mmnofs374'),
214-
('epi_bold_p2_3mm_nofs_life2_346',
215-
'func_run+_task-life_acq-3mmnofs346'),
216-
('epi_bold_p2_3mm_nofs_life3_377',
217-
'func_run+_task-life_acq-3mmnofs377'),
218-
('epi_bold_p2_3mm_nofs_life4_412',
219-
'func_run+_task-life_acq-3mmnofs412'),
220-
('t2_space_sag_p4_iso', 'anat-T2w_run+'),
221-
('gre_field_mapping_2.4mm', 'fmap_run+_acq-2.4mm'),
222-
('rest_p2_sms4_2.4mm_64sl_1000tr_32te_600dyn',
223-
'func_run+_task-rest_acq-2.4mm64sl1000tr32te600dyn'),
224-
('DTI_30', 'dwi_run+_acq-30'),
225-
('t1_space_sag_p2_iso', 'anat-T1w_acq-060mm_run+')],
226-
'76b36c80231b0afaf509e2d52046e964':
227-
[('fmap_run\+_2mm', 'fmap_run+_acq-2mm')],
228-
'c6d8fbccc72990bee61d28e73b2618a4':
229-
[('run=', 'run+')],
230-
'a751cc977f1e354fcafcb0ea2de123bd':
231-
[
232-
('_unlabeled', '_task-unlabeled'),
233-
('_mSense', '_acq-mSense'),
234-
('_p1_sms4_2.5mm', '_acq-p1-sms4-2.5mm'),
235-
('_p1_sms4_3mm', '_acq-p1-sms4-3mm'),
236-
],
237-
'd160113cf5ea8c5d0cbbbe14ef625e76':
238-
[
239-
('_run0', '_run-0'),
240-
],
241-
'1bd62e10672fe0b435a9aa8d75b45425':
242-
[
243-
# need to add incrementing session -- study should have 2
244-
# and no need for run+ for the scout!
245-
('scout(_run\+)?$', 'scout_ses+'),
246-
],
247-
'da218a66de902adb3ad9407d514e3639':
248-
[
249-
# those sequences renamed later to include DTI- in their acq-
250-
# so fot consistency
251-
('hardi_64', 'dwi_acq-DTI-hardi64'),
252-
('acq-hardi', 'acq-DTI-hardi'),
253-
],
254-
'ed20c1ad4a0861b2b65768e159258eec':
255-
[
256-
('fmap_acq-discorr-dti-', 'fmap_acq-dwi_dir-'),
257-
('_test', ''),
258-
],
259-
'1996f745c30c1df1d3851844e56d294f':
260-
[
261-
('fmap_acq-discorr-dti-', 'fmap_acq-dwi_dir-'),
262-
],
263-
# '022969bfde39c2940c114edf1db3fabc':
264-
# [ # should be applied only for ses-03!
265-
# ('_acq-MPRAGE_ses-02', '_acq-MPRAGE_ses-03'),
266-
# ],
267-
# to be used only once for one interrupted accession but we cannot
268-
# fix per accession yet
269-
# '23763823d2b9b4b09dafcadc8e8edf21':
270-
# [
271-
# ('anat-T1w_acq-MPRAGE', 'anat-T1w_acq-MPRAGE_run-06'),
272-
# ('anat_T2w', 'anat_T2w_run-06'),
273-
# ('fmap_acq-3mm', 'fmap_acq-3mm_run-06'),
274-
# ],
162+
# e.g., QA:
163+
# '43b67d9139e8c7274578b7451ab21123':
164+
# [
165+
# ('BOLD_p2_s4_3\.5mm', 'func_task-rest_acq-p2-s4-3.5mm'),
166+
# ('BOLD_', 'func_task-rest'),
167+
# ('_p2_s4', '_acq-p2-s4'),
168+
# ('_p2', '_acq-p2'),
169+
# ],
170+
# '': # for any study example with regexes used
171+
# [
172+
# ('AAHead_Scout_.*', 'anat-scout'),
173+
# ('^dti_.*', 'dwi'),
174+
# ('^.*_distortion_corr.*_([ap]+)_([12])', r'fmap-epi_dir-\1_run-\2'),
175+
# ('^(.+)_ap.*_r(0[0-9])', r'func_task-\1_run-\2'),
176+
# ('^t1w_.*', 'anat-T1w'),
177+
# # problematic case -- multiple identically named pepolar fieldmap runs
178+
# # I guess we will just sacrifice ability to detect canceled runs here.
179+
# # And we cannot just use _run+ since it would increment independently
180+
# # for ap and then for pa. We will rely on having ap preceding pa.
181+
# # Added _acq-mb8 so they match the one in funcs
182+
# ('func_task-discorr_acq-ap', r'fmap-epi_dir-ap_acq-mb8_run+'),
183+
# ('func_task-discorr_acq-pa', r'fmap-epi_dir-pa_acq-mb8_run='),
184+
# ]
275185
}
276-
# there was also screw up in the locator specification
277-
# so we need to fix in both
278-
# protocols2fix['67ae5e641ea9d487b6fdf56fb91aeb93'] = protocols2fix['022969bfde39c2940c114edf1db3fabc']
279186

280187
# list containing StudyInstanceUID to skip -- hopefully doesn't happen too often
281188
dicoms2skip = [
282-
'1.3.12.2.1107.5.2.43.66112.30000016110117002435700000001',
283-
'1.3.12.2.1107.5.2.43.66112.30000016102813152550600000004', # double scout
189+
# e.g.
190+
# '1.3.12.2.1107.5.2.43.66112.30000016110117002435700000001',
284191
]
285192

286193
DEFAULT_FIELDS = {
287194
# Let it just be in each json file extracted
288-
# 'Manufacturer': "Siemens",
289-
# 'ManufacturersModelName': "Prisma",
290195
"Acknowledgements":
291196
"We thank Terry Sacket and the rest of the DBIC (Dartmouth Brain Imaging "
292197
"Center) personnel for assistance in data collection, and "
293-
"Yaroslav Halchenko and Matteo Visconti for preparing BIDS dataset. "
294-
"TODO: more",
198+
"Yaroslav O. Halchenko for preparing BIDS dataset. "
199+
"TODO: adjust to your case.",
295200
}
296201

297202

@@ -311,38 +216,10 @@ def filter_dicom(dcmdata):
311216

312217
def filter_files(fn):
313218
"""Return True if a file should be kept, else False.
314-
We're using it to filter out files that do not start with a number."""
315219
316-
# do not check for these accession numbers because they haven't been
317-
# recopied with the initial number
318-
donotfilter = ['A000012', 'A000013', 'A000020', 'A000041']
319-
320-
split = os.path.split(fn)
321-
split2 = os.path.split(split[0])
322-
sequence_dir = split2[1]
323-
split3 = os.path.split(split2[0])
324-
accession_number = split3[1]
220+
ATM reproin does not do any filtering. Override if you need to add some
221+
"""
325222
return True
326-
if accession_number == 'A000043':
327-
# crazy one that got copied for some runs but not for others,
328-
# so we are going to discard those that got copied and let heudiconv
329-
# figure out the rest
330-
return False if re.match('^[0-9]+-', sequence_dir) else True
331-
elif accession_number == 'unknown':
332-
# this one had some stuff without study description, filter stuff before
333-
# collecting info, so it doesn't crash completely
334-
return False if re.match('^[34][07-9]-sn', sequence_dir) else True
335-
elif accession_number in donotfilter:
336-
return True
337-
elif accession_number.startswith('phantom-'):
338-
# Accessions on phantoms, e.g. in dartmouth-phantoms/bids_test4-20161014
339-
return True
340-
elif accession_number.startswith('heudiconvdcm'):
341-
# we were given some tarball with dicoms which was extracted so we
342-
# better obey
343-
return True
344-
else:
345-
return True if re.match('^[0-9]+-', sequence_dir) else False
346223

347224

348225
def create_key(subdir, file_suffix, outtype=('nii.gz', 'dicom'),

0 commit comments

Comments
 (0)