Skip to content

Commit c4956b3

Browse files
authored
Merge pull request #511 from dbic/bf-anon-cmd
BF (TST): make anonymize_script actually output anything and map determinstically
2 parents 460a818 + 1d6b41b commit c4956b3

File tree

3 files changed

+44
-12
lines changed

3 files changed

+44
-12
lines changed

heudiconv/tests/anonymize_script.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,12 @@
22

33
import sys
44
import re
5-
import ctypes
5+
import hashlib
66

77

88
def bids_id_(sid):
99
parsed_id = re.compile(r"^(?:sub-|)(.+)$").search(sid).group(1)
10-
return str(ctypes.c_size_t(hash(parsed_id)).value)
10+
return hashlib.md5(parsed_id.encode()).hexdigest()[:8]
1111

1212

1313
def main():
@@ -16,4 +16,4 @@ def main():
1616

1717

1818
if __name__ == '__main__':
19-
main()
19+
print(main())

heudiconv/tests/test_regression.py

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from glob import glob
33
import os
44
import os.path as op
5+
import re
56

67
import pytest
78

@@ -19,30 +20,48 @@
1920

2021

2122
@pytest.mark.skipif(not have_datalad, reason="no datalad")
22-
@pytest.mark.parametrize('subject', ['sub-sid000143'])
23+
@pytest.mark.parametrize('subject', ['sid000143'])
2324
@pytest.mark.parametrize('heuristic', ['reproin.py'])
2425
@pytest.mark.parametrize('anon_cmd', [None, 'anonymize_script.py'])
2526
def test_conversion(tmpdir, subject, heuristic, anon_cmd):
2627
tmpdir.chdir()
2728
try:
2829
datadir = fetch_data(tmpdir.strpath,
2930
"dbic/QA", # path from datalad database root
30-
getpath=op.join('sourcedata', subject))
31+
getpath=op.join('sourcedata', f'sub-{subject}'))
3132
except IncompleteResultsError as exc:
3233
pytest.skip("Failed to fetch test data: %s" % str(exc))
3334
outdir = tmpdir.mkdir('out').strpath
3435

3536
args = gen_heudiconv_args(
3637
datadir, outdir, subject, heuristic, anon_cmd,
37-
template=op.join('sourcedata/{subject}/*/*/*.tgz')
38+
template='sourcedata/sub-{subject}/*/*/*.tgz'
3839
)
3940
runner(args) # run conversion
4041

42+
# Get the possibly anonymized subject id and verify that it was
43+
# anonymized or not:
44+
subject_maybe_anon = glob(f'{outdir}/sub-*')
45+
assert len(subject_maybe_anon) == 1 # just one should be there
46+
subject_maybe_anon = op.basename(subject_maybe_anon[0])[4:]
47+
48+
if anon_cmd:
49+
assert subject_maybe_anon != subject
50+
else:
51+
assert subject_maybe_anon == subject
52+
4153
# verify functionals were converted
42-
assert (
43-
glob('{}/{}/func/*'.format(outdir, subject)) ==
44-
glob('{}/{}/func/*'.format(datadir, subject))
45-
)
54+
outfiles = sorted([f[len(outdir):] for f in glob(f'{outdir}/sub-{subject_maybe_anon}/func/*')])
55+
assert outfiles
56+
datafiles = sorted([f[len(datadir):] for f in glob(f'{datadir}/sub-{subject}/ses-*/func/*')])
57+
# original data has ses- but because we are converting only func, and not
58+
# providing any session, we will not "match". Let's strip away the session
59+
datafiles = [re.sub(r'[/\\_]ses-[^/\\_]*', '', f) for f in datafiles]
60+
if not anon_cmd:
61+
assert outfiles == datafiles
62+
else:
63+
assert outfiles != datafiles # sid was anonymized
64+
assert len(outfiles) == len(datafiles) # but we have the same number of files
4665

4766
# compare some json metadata
4867
json_ = '{}/task-rest_acq-24mm64sl1000tr32te600dyn_bold.json'.format

heudiconv/utils.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,13 @@ def dec(obj):
111111

112112

113113
def anonymize_sid(sid, anon_sid_cmd):
114-
114+
"""
115+
Raises
116+
------
117+
ValueError
118+
if script returned an empty string (after whitespace stripping),
119+
or output with multiple words/lines.
120+
"""
115121
cmd = [anon_sid_cmd, sid]
116122
shell_return = check_output(cmd)
117123

@@ -120,7 +126,14 @@ def anonymize_sid(sid, anon_sid_cmd):
120126
else:
121127
anon_sid = shell_return
122128

123-
return anon_sid.strip()
129+
anon_sid = anon_sid.strip()
130+
if not anon_sid:
131+
raise ValueError(f"{anon_sid_cmd!r} {sid!r} returned empty sid")
132+
# rudimentary check for sanity: no multiple lines or words (in general
133+
# ok, but not ok for BIDS) in the output
134+
if len(anon_sid.split()) > 1:
135+
raise ValueError(f"{anon_sid_cmd!r} {sid!r} returned multiline output")
136+
return anon_sid
124137

125138

126139
def create_file_if_missing(filename, content):

0 commit comments

Comments
 (0)