Skip to content

Commit bb1d542

Browse files
committed
ENH: allow to use with existing datalad datasets which do not have .heudiconv/ as a submodule
1 parent 31e5109 commit bb1d542

File tree

2 files changed

+70
-15
lines changed

2 files changed

+70
-15
lines changed

bin/heudiconv

Lines changed: 38 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,10 @@ from os.path import join as pjoin
4949

5050
from random import sample
5151

52+
# Minimal versions of external dependencies
53+
MIN_VERSIONS = {
54+
'datalad': '0.7'
55+
}
5256
PY3 = sys.version_info[0] >= 3
5357

5458
import logging
@@ -1548,6 +1552,9 @@ def create_file_if_missing(filename, content):
15481552
"""Create file if missing, so we do not override any possibly introduced changes"""
15491553
if exists(filename):
15501554
return False
1555+
dirname = os.path.dirname(filename)
1556+
if not os.path.exists(dirname):
1557+
os.makedirs(dirname)
15511558
with open(filename, 'w') as f:
15521559
f.write(content)
15531560
return True
@@ -1684,7 +1691,7 @@ def mark_sensitive(ds, path_glob=None):
16841691
if not paths:
16851692
return
16861693
sens_kwargs['path'] = paths
1687-
ds.metadata(**sens_kwargs)
1694+
ds.metadata(recursive=True, **sens_kwargs)
16881695

16891696

16901697
def add_to_datalad(topdir, studydir, msg=None, bids=False):
@@ -1695,7 +1702,8 @@ def add_to_datalad(topdir, studydir, msg=None, bids=False):
16951702
from datalad.support.annexrepo import AnnexRepo
16961703

16971704
from datalad.support.external_versions import external_versions
1698-
assert external_versions['datalad'] >= '0.5.1', "Need datalad >= 0.5.1"
1705+
# 0.7 added .metadata
1706+
assert external_versions['datalad'] >= MIN_VERSIONS['datalad'], "Need datalad >= 0.7"
16991707

17001708
studyrelpath = os.path.relpath(studydir, topdir)
17011709
assert not studyrelpath.startswith(os.path.pardir) # so we are under
@@ -1740,21 +1748,37 @@ def add_to_datalad(topdir, studydir, msg=None, bids=False):
17401748
ds = Dataset(studydir)
17411749
# Add doesn't have all the options of save such as msg and supers
17421750
ds.add('.gitattributes', to_git=True, save=False)
1743-
dsh = None
1751+
dsh = dsh_path = None
17441752
if os.path.lexists(os.path.join(ds.path, '.heudiconv')):
1745-
dsh = Dataset(opj(ds.path, '.heudiconv'))
1753+
dsh_path = opj(ds.path, '.heudiconv')
1754+
dsh = Dataset(dsh_path)
17461755
if not dsh.is_installed():
1747-
# we need to create it first
1748-
dsh = ds.create(path='.heudiconv',
1749-
force=True,
1750-
shared_access='all')
1756+
# Previously we did not have it as a submodule, and since no
1757+
# automagic migration is implemented, we just need to check first
1758+
# if any path under .heudiconv is already under git control
1759+
if any(x[0].startswith('.heudiconv/') for x in
1760+
ds.repo.repo.index.entries.keys()):
1761+
lgr.warning(
1762+
'%s has .heudiconv not as a submodule from previous versions '
1763+
'of heudiconv. No automagic migration is yet provided', ds
1764+
)
1765+
else:
1766+
# use/create a submodule dataset for .heudiconv
1767+
dsh = ds.create(path='.heudiconv',
1768+
force=True,
1769+
shared_access='all')
17511770
# Since .heudiconv could contain sensitive information
17521771
# we place all files under annex and then add
17531772
if create_file_if_missing(
1754-
opj(dsh.path, '.gitattributes'),
1773+
opj(dsh_path, '.gitattributes'),
17551774
"""* annex.largefiles=anything
17561775
"""):
1757-
dsh.add('.gitattributes', message="Added gitattributes to place all content under annex")
1776+
# should work properly if .heudiconv is a submodule or not
1777+
ds.add(
1778+
'.heudiconv/.gitattributes',
1779+
to_git=True,
1780+
message="Added gitattributes to place all .heudiconv content under annex"
1781+
)
17581782
ds.add('.', recursive=True, save=False,
17591783
# not in effect! ?
17601784
#annex_add_opts=['--include-dotfiles']
@@ -1768,9 +1792,10 @@ def add_to_datalad(topdir, studydir, msg=None, bids=False):
17681792
mark_sensitive(ds, '*/*/*_scans.tsv') # within sess/subj
17691793
mark_sensitive(ds, '*/anat') # within subj
17701794
mark_sensitive(ds, '*/*/anat') # within ses/subj
1771-
if dsh:
1772-
mark_sensitive(dsh) # entire .heudiconv!
1773-
dsh.save(message=msg)
1795+
if dsh_path:
1796+
mark_sensitive(ds, '.heudiconv') # entire .heudiconv!
1797+
# if dsh and dsh.is_installed():
1798+
# dsh.save(message=msg)
17741799
ds.save(message=msg, recursive=True, super_datasets=True)
17751800

17761801
assert not ds.repo.dirty

tests/test_main.py

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def test_add_participant_record(tmpdir):
6767

6868

6969
def test_prepare_for_datalad(tmpdir):
70-
pytest.importorskip("datalad")
70+
pytest.importorskip("datalad", minversion=heudiconv.MIN_VERSIONS['datalad'])
7171
studydir = tmpdir.join("PI").join("study")
7272
studydir_ = str(studydir)
7373
os.makedirs(studydir_)
@@ -80,7 +80,7 @@ def test_prepare_for_datalad(tmpdir):
8080

8181
assert superds.is_installed()
8282
assert not superds.repo.dirty
83-
subdss = superds.get_subdatasets(recursive=True)
83+
subdss = superds.subdatasets(recursive=True, result_xfm='relpaths')
8484
for ds_path in sorted(subdss):
8585
ds = Dataset(opj(superds.path, ds_path))
8686
assert ds.is_installed()
@@ -98,6 +98,35 @@ def test_prepare_for_datalad(tmpdir):
9898
assert not ds.repo.is_under_annex(f)
9999
assert not ds.repo.is_under_annex('.gitattributes')
100100

101+
# Above call to add_to_datalad does not create .heudiconv subds since
102+
# directory does not exist (yet).
103+
# Let's first check that it is safe to call it again
104+
heudiconv.add_to_datalad(str(tmpdir), studydir_)
105+
assert not ds.repo.dirty
106+
107+
old_hexsha = ds.repo.get_hexsha()
108+
# Now let's check that if we had previously converted data so that
109+
# .heudiconv was not a submodule, we still would not fail
110+
dsh_path = os.path.join(ds.path, '.heudiconv')
111+
dummy_path = os.path.join(dsh_path, 'dummy.nii.gz')
112+
113+
heudiconv.create_file_if_missing(dummy_path, '')
114+
ds.add(dummy_path, message="added a dummy file")
115+
# next call must not fail, should just issue a warning
116+
heudiconv.add_to_datalad(str(tmpdir), studydir_)
117+
ds.repo.is_under_annex(dummy_path)
118+
assert not ds.repo.dirty
119+
assert '.heudiconv/dummy.nii.gz' in ds.repo.get_files()
120+
121+
# Let's now roll back and make it a proper submodule
122+
ds.repo._git_custom_command([], ['git', 'reset', '--hard', old_hexsha])
123+
# now we do not add dummy to git
124+
heudiconv.create_file_if_missing(dummy_path, '')
125+
heudiconv.add_to_datalad(str(tmpdir), studydir_)
126+
assert '.heudiconv' in ds.subdatasets(result_xfm='relpaths')
127+
assert not ds.repo.dirty
128+
assert '.heudiconv/dummy.nii.gz' not in ds.repo.get_files()
129+
101130

102131
def test_json_dumps_pretty():
103132
pretty = heudiconv.json_dumps_pretty
@@ -157,6 +186,7 @@ def _check_rows(fn, rows):
157186
heudiconv.add_rows_to_scans_keys_file(fn, extra_rows)
158187
_check_rows(fn, extra_rows)
159188

189+
160190
def test__find_subj_ses():
161191
assert heudiconv._find_subj_ses('950_bids_test4/sub-phantom1sid1/fmap/sub-phantom1sid1_acq-3mm_phasediff.json') == ('phantom1sid1', None)
162192
assert heudiconv._find_subj_ses('sub-s1/ses-s1/fmap/sub-s1_ses-s1_acq-3mm_phasediff.json') == ('s1', 's1')

0 commit comments

Comments
 (0)