Skip to content

Commit 778723c

Browse files
authored
Merge pull request #73 from dartmouth-pbs/enh-dbic2
BF: many fixes for _scans.tsv files
2 parents 48bd423 + cf17d76 commit 778723c

File tree

2 files changed

+56
-22
lines changed

2 files changed

+56
-22
lines changed

bin/heudiconv

Lines changed: 48 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -914,7 +914,7 @@ def convert(items, symlink=True, converter=None,
914914
# save acquisition time information if it's BIDS
915915
# at this point we still have acquisition date
916916
if is_bids:
917-
save_scans_key(items, outname_bids_files)
917+
save_scans_key(item, outname_bids_files)
918918
# Fix up and unify BIDS files
919919
tuneup_bids_json_files(outname_bids_files)
920920
# we should provide specific handling for fmap,
@@ -967,13 +967,16 @@ def get_formatted_scans_key_row(item):
967967
mw = ds.wrapper_from_data(dcm.read_file(dcm_fn, stop_before_pixels=True))
968968
# we need to store filenames and acquisition times
969969
# parse date and time and get it into isoformat
970-
date = mw.dcm_data.AcquisitionDate
971-
time = mw.dcm_data.AcquisitionTime.split('.')[0]
970+
date = mw.dcm_data.ContentDate
971+
time = mw.dcm_data.ContentTime.split('.')[0]
972972
td = time + date
973973
acq_time = datetime.strptime(td, '%H%M%S%Y%m%d').isoformat()
974974
# add random string
975975
randstr = ''.join(map(chr, sample(k=8, population=range(33, 127))))
976976
row = [acq_time, mw.dcm_data.PerformingPhysicianName, randstr]
977+
# empty entries should be 'n/a'
978+
# https://github.com/dartmouth-pbs/heudiconv/issues/32
979+
row = ['n/a' if not str(e) else e for e in row]
977980
return row
978981

979982

@@ -1012,35 +1015,59 @@ def add_rows_to_scans_keys_file(fn, newrows):
10121015
writer.writerow([key] + fnames2info[key])
10131016

10141017

1015-
def save_scans_key(items, outname_bids_files):
1018+
def _find_subj_ses(f_name):
1019+
"""Given a path to the bids formatted filename parse out subject/session"""
1020+
# we will allow the match at either directories or within filename
1021+
# assuming that bids layout is "correct"
1022+
regex = re.compile('sub-(?P<subj>[a-zA-Z0-9]*)([/_]ses-(?P<ses>[a-zA-Z0-9]*))?')
1023+
res = regex.search(f_name).groupdict()
1024+
return res.get('subj'), res.get('ses', None)
1025+
1026+
1027+
def save_scans_key(item, bids_files):
10161028
"""
10171029
Parameters
10181030
----------
10191031
items:
1020-
outname_bids_files:
1032+
bids_files: str or list
10211033
10221034
Returns
10231035
-------
10241036
10251037
"""
10261038
rows = dict()
1027-
for item, outname_bids_file in zip(items, outname_bids_files):
1039+
assert bids_files, "we do expect some files since it was called"
1040+
# we will need to deduce subject and session from the bids_filename
1041+
# and if there is a conflict, we would just blow since this function
1042+
# should be invoked only on a result of a single item conversion as far
1043+
# as I see it, so should have the same subject/session
1044+
subj, ses = None, None
1045+
for bids_file in bids_files:
10281046
# get filenames
1029-
f_name = '/'.join(outname_bids_file.split('/')[-2:])
1047+
f_name = '/'.join(bids_file.split('/')[-2:])
10301048
f_name = f_name.replace('json', 'nii.gz')
10311049
rows[f_name] = get_formatted_scans_key_row(item)
1050+
subj_, ses_ = _find_subj_ses(f_name)
1051+
if subj and subj_ != subj:
1052+
raise ValueError(
1053+
"We found before subject %s but now deduced %s from %s"
1054+
% (subj, subj_, f_name)
1055+
)
1056+
subj = subj_
1057+
if ses and ses_ != ses:
1058+
raise ValueError(
1059+
"We found before session %s but now deduced %s from %s"
1060+
% (ses, ses_, f_name)
1061+
)
1062+
ses = ses_
10321063
# where should we store it?
1033-
output_dir = dirname(dirname(outname_bids_file))
1034-
# get subject info
1035-
subj_pattern = re.compile('(sub-[a-zA-Z0-9]*)')
1036-
subj = subj_pattern.findall(f_name)
1037-
assert(len(subj) >= 1)
1038-
subj = subj[0]
1039-
1064+
output_dir = dirname(dirname(bids_file))
10401065
# save
1066+
ses = '_ses-%s' % ses if ses else ''
10411067
add_rows_to_scans_keys_file(
1042-
pjoin(output_dir, '{0}_scans.tsv'.format(subj)),
1043-
rows)
1068+
pjoin(output_dir, 'sub-{0}{1}_scans.tsv'.format(subj, ses)),
1069+
rows
1070+
)
10441071

10451072

10461073
def tuneup_bids_json_files(json_files):
@@ -1052,8 +1079,9 @@ def tuneup_bids_json_files(json_files):
10521079
for jsonfile in json_files:
10531080
json_ = json.load(open(jsonfile))
10541081
# sanitize!
1055-
for f in ['AcquisitionDateTime', 'AcquisitionDate']:
1056-
json_.pop(f, None)
1082+
for f1 in ['Acquisition', 'Study', 'Series']:
1083+
for f2 in ['DateTime', 'Date']:
1084+
json_.pop(f1 + f2, None)
10571085
# TODO: should actually be placed into series file which must
10581086
# go under annex (not under git) and marked as sensitive
10591087
if 'Date' in str(json_):
@@ -1725,8 +1753,9 @@ def add_to_datalad(topdir, studydir, msg=None, bids=False):
17251753
mark_sensitive(ds, 'sourcedata')
17261754
mark_sensitive(ds, '*_scans.tsv') # top level
17271755
mark_sensitive(ds, '*/*_scans.tsv') # within subj
1756+
mark_sensitive(ds, '*/*/*_scans.tsv') # within sess/subj
17281757
mark_sensitive(ds, '*/anat') # within subj
1729-
mark_sensitive(ds, '*/*/anat') # within subj/ses
1758+
mark_sensitive(ds, '*/*/anat') # within ses/subj
17301759
if dsh:
17311760
mark_sensitive(dsh) # entire .heudiconv!
17321761
dsh.save(message=msg)

tests/test_main.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -117,9 +117,9 @@ def test_get_formatted_scans_key_row():
117117
outname_bids_file = '/a/path/Halchenko/Yarik/950_bids_test4/sub-phantom1sid1/fmap/sub-phantom1sid1_acq-3mm_phasediff.json'
118118

119119
row = heudiconv.get_formatted_scans_key_row(item)
120-
assert(len(row) == 3)
121-
assert(row[0] == '2016-10-14T09:26:34')
122-
assert(row[1] == '')
120+
assert len(row) == 3
121+
assert row[0] == '2016-10-14T09:26:36'
122+
assert row[1] == 'n/a'
123123
randstr1 = row[2]
124124
row = heudiconv.get_formatted_scans_key_row(item)
125125
randstr2 = row[2]
@@ -157,3 +157,8 @@ def _check_rows(fn, rows):
157157
heudiconv.add_rows_to_scans_keys_file(fn, extra_rows)
158158
_check_rows(fn, extra_rows)
159159

160+
def test__find_subj_ses():
161+
assert heudiconv._find_subj_ses('950_bids_test4/sub-phantom1sid1/fmap/sub-phantom1sid1_acq-3mm_phasediff.json') == ('phantom1sid1', None)
162+
assert heudiconv._find_subj_ses('sub-s1/ses-s1/fmap/sub-s1_ses-s1_acq-3mm_phasediff.json') == ('s1', 's1')
163+
assert heudiconv._find_subj_ses('sub-s1/ses-s1/fmap/sub-s1_ses-s1_acq-3mm_phasediff.json') == ('s1', 's1')
164+
assert heudiconv._find_subj_ses('fmap/sub-01-fmap_acq-3mm_acq-3mm_phasediff.nii.gz') == ('01', None)

0 commit comments

Comments
 (0)