Skip to content

Commit 0ee1ba4

Browse files
authored
Merge pull request #265 from dartmouth-pbs/bf-scans-randombit
Make randstr field in _scans files deterministic
2 parents c520f05 + b29dd06 commit 0ee1ba4

File tree

2 files changed

+40
-29
lines changed

2 files changed

+40
-29
lines changed

heudiconv/bids.py

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Handle BIDS specific operations"""
22

3+
import hashlib
34
import os
45
import os.path as op
56
import logging
@@ -196,7 +197,7 @@ def save_scans_key(item, bids_files):
196197
"""
197198
Parameters
198199
----------
199-
items:
200+
item:
200201
bids_files: str or list
201202
202203
Returns
@@ -214,7 +215,7 @@ def save_scans_key(item, bids_files):
214215
# get filenames
215216
f_name = '/'.join(bids_file.split('/')[-2:])
216217
f_name = f_name.replace('json', 'nii.gz')
217-
rows[f_name] = get_formatted_scans_key_row(item)
218+
rows[f_name] = get_formatted_scans_key_row(item[-1][0])
218219
subj_, ses_ = find_subj_ses(f_name)
219220
if not subj_:
220221
lgr.warning(
@@ -279,7 +280,7 @@ def add_rows_to_scans_keys_file(fn, newrows):
279280
writer.writerows([header] + data_rows_sorted)
280281

281282

282-
def get_formatted_scans_key_row(item):
283+
def get_formatted_scans_key_row(dcm_fn):
283284
"""
284285
Parameters
285286
----------
@@ -291,25 +292,27 @@ def get_formatted_scans_key_row(item):
291292
[ISO acquisition time, performing physician name, random string]
292293
293294
"""
294-
dcm_fn = item[-1][0]
295-
from heudiconv.external.dcmstack import ds
296-
mw = ds.wrapper_from_data(dcm.read_file(dcm_fn,
297-
stop_before_pixels=True,
298-
force=True))
295+
dcm_data = dcm.read_file(dcm_fn, stop_before_pixels=True, force=True)
299296
# we need to store filenames and acquisition times
300297
# parse date and time and get it into isoformat
301298
try:
302-
date = mw.dcm_data.ContentDate
303-
time = mw.dcm_data.ContentTime.split('.')[0]
299+
date = dcm_data.ContentDate
300+
time = dcm_data.ContentTime.split('.')[0]
304301
td = time + date
305302
acq_time = datetime.strptime(td, '%H%M%S%Y%m%d').isoformat()
306303
except AttributeError as exc:
307304
lgr.warning("Failed to get date/time for the content: %s", str(exc))
308305
acq_time = None
309306
# add random string
310-
randstr = ''.join(map(chr, sample(k=8, population=range(33, 127))))
307+
# But let's make it reproducible by using all UIDs
308+
# (might change across versions?)
309+
randcontent = u''.join(
310+
[getattr(dcm_data, f) or '' for f in sorted(dir(dcm_data))
311+
if f.endswith('UID')]
312+
)
313+
randstr = hashlib.md5(randcontent.encode()).hexdigest()[:8]
311314
try:
312-
perfphys = mw.dcm_data.PerformingPhysicianName
315+
perfphys = dcm_data.PerformingPhysicianName
313316
except AttributeError:
314317
perfphys = ''
315318
row = [acq_time, perfphys, randstr]

tests/test_main.py

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -153,23 +153,31 @@ def test_prepare_for_datalad(tmpdir):
153153

154154

155155
def test_get_formatted_scans_key_row():
156-
item = [
157-
('%s/01-fmap_acq-3mm/1.3.12.2.1107.5.2.43.66112.2016101409263663466202201.dcm'
158-
% TESTS_DATA_PATH,
159-
('nii.gz', 'dicom'),
160-
['%s/01-fmap_acq-3mm/1.3.12.2.1107.5.2.43.66112.2016101409263663466202201.dcm'
161-
% TESTS_DATA_PATH])
162-
]
163-
outname_bids_file = '/a/path/Halchenko/Yarik/950_bids_test4/sub-phantom1sid1/fmap/sub-phantom1sid1_acq-3mm_phasediff.json'
164-
165-
row = get_formatted_scans_key_row(item)
166-
assert len(row) == 3
167-
assert row[0] == '2016-10-14T09:26:36'
168-
assert row[1] == 'n/a'
169-
randstr1 = row[2]
170-
row = get_formatted_scans_key_row(item)
171-
randstr2 = row[2]
172-
assert(randstr1 != randstr2)
156+
dcm_fn = \
157+
'%s/01-fmap_acq-3mm/1.3.12.2.1107.5.2.43.66112.2016101409263663466202201.dcm' \
158+
% TESTS_DATA_PATH
159+
160+
row1 = get_formatted_scans_key_row(dcm_fn)
161+
assert len(row1) == 3
162+
assert row1[0] == '2016-10-14T09:26:36'
163+
assert row1[1] == 'n/a'
164+
prandstr1 = row1[2]
165+
166+
# if we rerun - should be identical!
167+
row2 = get_formatted_scans_key_row(dcm_fn)
168+
prandstr2 = row2[2]
169+
assert(prandstr1 == prandstr2)
170+
assert(row1 == row2)
171+
# So it is consistent across pythons etc, we use explicit value here
172+
assert(prandstr1 == "437fe57c")
173+
174+
# but the prandstr should change when we consider another DICOM file
175+
row3 = get_formatted_scans_key_row(
176+
"%s/01-anat-scout/0001.dcm" % TESTS_DATA_PATH)
177+
assert(row3 != row1)
178+
prandstr3 = row3[2]
179+
assert(prandstr1 != prandstr3)
180+
assert(prandstr3 == "fae3befb")
173181

174182

175183
# TODO: finish this

0 commit comments

Comments
 (0)