Skip to content

Commit e6fdd93

Browse files
authored
Merge pull request #97 from dartmouth-pbs/sort-scans
Sort scans, .heudiconv into subdataset in --datalad mode
2 parents 3e0b8ee + c11a92c commit e6fdd93

File tree

3 files changed

+86
-18
lines changed

3 files changed

+86
-18
lines changed

bin/heudiconv

Lines changed: 43 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,10 @@ from os.path import join as pjoin
4949

5050
from random import sample
5151

52+
# Minimal versions of external dependencies
53+
MIN_VERSIONS = {
54+
'datalad': '0.7'
55+
}
5256
PY3 = sys.version_info[0] >= 3
5357

5458
import logging
@@ -1024,11 +1028,13 @@ def add_rows_to_scans_keys_file(fn, newrows):
10241028

10251029
header = ['filename', 'acq_time', 'operator', 'randstr']
10261030
# save
1031+
# prepare all the data rows
1032+
data_rows = [[k] + v for k, v in fnames2info.items()]
1033+
# sort by the date/filename
1034+
data_rows_sorted = sorted(data_rows, key=lambda x: (x[1], x[0]))
10271035
with open(fn, 'a') as csvfile:
10281036
writer = csv.writer(csvfile, delimiter='\t')
1029-
writer.writerow(header)
1030-
for key in sorted(fnames2info.keys()):
1031-
writer.writerow([key] + fnames2info[key])
1037+
writer.writerows([header] + data_rows_sorted)
10321038

10331039

10341040
def _find_subj_ses(f_name):
@@ -1561,6 +1567,9 @@ def create_file_if_missing(filename, content):
15611567
"""Create file if missing, so we do not override any possibly introduced changes"""
15621568
if exists(filename):
15631569
return False
1570+
dirname = os.path.dirname(filename)
1571+
if not os.path.exists(dirname):
1572+
os.makedirs(dirname)
15641573
with open(filename, 'w') as f:
15651574
f.write(content)
15661575
return True
@@ -1697,7 +1706,7 @@ def mark_sensitive(ds, path_glob=None):
16971706
if not paths:
16981707
return
16991708
sens_kwargs['path'] = paths
1700-
ds.metadata(**sens_kwargs)
1709+
ds.metadata(recursive=True, **sens_kwargs)
17011710

17021711

17031712
def add_to_datalad(topdir, studydir, msg=None, bids=False):
@@ -1708,7 +1717,8 @@ def add_to_datalad(topdir, studydir, msg=None, bids=False):
17081717
from datalad.support.annexrepo import AnnexRepo
17091718

17101719
from datalad.support.external_versions import external_versions
1711-
assert external_versions['datalad'] >= '0.5.1', "Need datalad >= 0.5.1"
1720+
# 0.7 added .metadata
1721+
assert external_versions['datalad'] >= MIN_VERSIONS['datalad'], "Need datalad >= 0.7"
17121722

17131723
studyrelpath = os.path.relpath(studydir, topdir)
17141724
assert not studyrelpath.startswith(os.path.pardir) # so we are under
@@ -1753,21 +1763,37 @@ def add_to_datalad(topdir, studydir, msg=None, bids=False):
17531763
ds = Dataset(studydir)
17541764
# Add doesn't have all the options of save such as msg and supers
17551765
ds.add('.gitattributes', to_git=True, save=False)
1756-
dsh = None
1766+
dsh = dsh_path = None
17571767
if os.path.lexists(os.path.join(ds.path, '.heudiconv')):
1758-
dsh = Dataset(opj(ds.path, '.heudiconv'))
1768+
dsh_path = opj(ds.path, '.heudiconv')
1769+
dsh = Dataset(dsh_path)
17591770
if not dsh.is_installed():
1760-
# we need to create it first
1761-
dsh = ds.create(path='.heudiconv',
1762-
force=True,
1763-
shared_access='all')
1771+
# Previously we did not have it as a submodule, and since no
1772+
# automagic migration is implemented, we just need to check first
1773+
# if any path under .heudiconv is already under git control
1774+
if any(x[0].startswith('.heudiconv/') for x in
1775+
ds.repo.repo.index.entries.keys()):
1776+
lgr.warning(
1777+
'%s has .heudiconv not as a submodule from previous versions '
1778+
'of heudiconv. No automagic migration is yet provided', ds
1779+
)
1780+
else:
1781+
# use/create a submodule dataset for .heudiconv
1782+
dsh = ds.create(path='.heudiconv',
1783+
force=True,
1784+
shared_access='all')
17641785
# Since .heudiconv could contain sensitive information
17651786
# we place all files under annex and then add
17661787
if create_file_if_missing(
1767-
opj(dsh.path, '.gitattributes'),
1788+
opj(dsh_path, '.gitattributes'),
17681789
"""* annex.largefiles=anything
17691790
"""):
1770-
dsh.add('.gitattributes', message="Added gitattributes to place all content under annex")
1791+
# should work properly if .heudiconv is a submodule or not
1792+
ds.add(
1793+
'.heudiconv/.gitattributes',
1794+
to_git=True,
1795+
message="Added gitattributes to place all .heudiconv content under annex"
1796+
)
17711797
ds.add('.', recursive=True, save=False,
17721798
# not in effect! ?
17731799
#annex_add_opts=['--include-dotfiles']
@@ -1781,9 +1807,10 @@ def add_to_datalad(topdir, studydir, msg=None, bids=False):
17811807
mark_sensitive(ds, '*/*/*_scans.tsv') # within sess/subj
17821808
mark_sensitive(ds, '*/anat') # within subj
17831809
mark_sensitive(ds, '*/*/anat') # within ses/subj
1784-
if dsh:
1785-
mark_sensitive(dsh) # entire .heudiconv!
1786-
dsh.save(message=msg)
1810+
if dsh_path:
1811+
mark_sensitive(ds, '.heudiconv') # entire .heudiconv!
1812+
# if dsh and dsh.is_installed():
1813+
# dsh.save(message=msg)
17871814
ds.save(message=msg, recursive=True, super_datasets=True)
17881815

17891816
assert not ds.repo.dirty

heuristics/dbic_bids_validator.cfg

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,16 @@
66
"error": [],
77
"ignoredFiles": [
88
"/.heudiconv/*", "/.heudiconv/*/*", "/.heudiconv/*/*/*", "/.heudiconv/*/*/*/*",
9+
"/.heudiconv/.git*",
10+
"/.heudiconv/.git/*",
11+
"/.heudiconv/.git/*/*",
12+
"/.heudiconv/.git/*/*/*",
13+
"/.heudiconv/.git/*/*/*/*",
14+
"/.heudiconv/.git/*/*/*/*/*",
15+
"/.heudiconv/.git/*/*/*/*/*/*",
916
"/.git*",
1017
"/.datalad/*", "/.datalad/.*",
18+
"/.*/.datalad/*", "/.*/.datalad/.*",
1119
"/sub*/ses*/*/*__dup*", "/sub*/*/*__dup*"
1220
]
1321
}

tests/test_main.py

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def test_add_participant_record(tmpdir):
6767

6868

6969
def test_prepare_for_datalad(tmpdir):
70-
pytest.importorskip("datalad")
70+
pytest.importorskip("datalad", minversion=heudiconv.MIN_VERSIONS['datalad'])
7171
studydir = tmpdir.join("PI").join("study")
7272
studydir_ = str(studydir)
7373
os.makedirs(studydir_)
@@ -80,7 +80,7 @@ def test_prepare_for_datalad(tmpdir):
8080

8181
assert superds.is_installed()
8282
assert not superds.repo.dirty
83-
subdss = superds.get_subdatasets(recursive=True)
83+
subdss = superds.subdatasets(recursive=True, result_xfm='relpaths')
8484
for ds_path in sorted(subdss):
8585
ds = Dataset(opj(superds.path, ds_path))
8686
assert ds.is_installed()
@@ -98,6 +98,35 @@ def test_prepare_for_datalad(tmpdir):
9898
assert not ds.repo.is_under_annex(f)
9999
assert not ds.repo.is_under_annex('.gitattributes')
100100

101+
# Above call to add_to_datalad does not create .heudiconv subds since
102+
# directory does not exist (yet).
103+
# Let's first check that it is safe to call it again
104+
heudiconv.add_to_datalad(str(tmpdir), studydir_)
105+
assert not ds.repo.dirty
106+
107+
old_hexsha = ds.repo.get_hexsha()
108+
# Now let's check that if we had previously converted data so that
109+
# .heudiconv was not a submodule, we still would not fail
110+
dsh_path = os.path.join(ds.path, '.heudiconv')
111+
dummy_path = os.path.join(dsh_path, 'dummy.nii.gz')
112+
113+
heudiconv.create_file_if_missing(dummy_path, '')
114+
ds.add(dummy_path, message="added a dummy file")
115+
# next call must not fail, should just issue a warning
116+
heudiconv.add_to_datalad(str(tmpdir), studydir_)
117+
ds.repo.is_under_annex(dummy_path)
118+
assert not ds.repo.dirty
119+
assert '.heudiconv/dummy.nii.gz' in ds.repo.get_files()
120+
121+
# Let's now roll back and make it a proper submodule
122+
ds.repo._git_custom_command([], ['git', 'reset', '--hard', old_hexsha])
123+
# now we do not add dummy to git
124+
heudiconv.create_file_if_missing(dummy_path, '')
125+
heudiconv.add_to_datalad(str(tmpdir), studydir_)
126+
assert '.heudiconv' in ds.subdatasets(result_xfm='relpaths')
127+
assert not ds.repo.dirty
128+
assert '.heudiconv/dummy.nii.gz' not in ds.repo.get_files()
129+
101130

102131
def test_json_dumps_pretty():
103132
pretty = heudiconv.json_dumps_pretty
@@ -146,6 +175,9 @@ def _check_rows(fn, rows):
146175
assert(row_ == ['filename', 'acq_time', 'operator', 'randstr'])
147176
else:
148177
assert(rows[row_[0]] == row_[1:])
178+
# dates, filename should be sorted (date "first", filename "second")
179+
dates = [(r[1], r[0]) for r in rows_loaded[1:]]
180+
assert dates == sorted(dates)
149181

150182
_check_rows(fn, rows)
151183
# add a new one
@@ -157,6 +189,7 @@ def _check_rows(fn, rows):
157189
heudiconv.add_rows_to_scans_keys_file(fn, extra_rows)
158190
_check_rows(fn, extra_rows)
159191

192+
160193
def test__find_subj_ses():
161194
assert heudiconv._find_subj_ses('950_bids_test4/sub-phantom1sid1/fmap/sub-phantom1sid1_acq-3mm_phasediff.json') == ('phantom1sid1', None)
162195
assert heudiconv._find_subj_ses('sub-s1/ses-s1/fmap/sub-s1_ses-s1_acq-3mm_phasediff.json') == ('s1', 's1')

0 commit comments

Comments
 (0)