Skip to content

Commit 05ae5b0

Browse files
authored
Merge pull request #218 from dartmouth-pbs/bf-dlmetadata
BF+ENH: set dist-restrictions metadata correctly
2 parents d3b7077 + ddf03b3 commit 05ae5b0

File tree

4 files changed

+110
-7
lines changed

4 files changed

+110
-7
lines changed

heudiconv/external/dlad.py

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1+
import inspect
12
import os
3+
24
import os.path as op
35
import logging
46
from glob import glob
@@ -61,17 +63,30 @@ def add_to_datalad(topdir, studydir, msg, bids):
6163
assert ds.is_installed()
6264
superds = ds
6365

64-
create_file_if_missing(
65-
op.join(studydir, '.gitattributes'),
66-
"""\
66+
# TODO: we need a helper (in DataLad ideally) to ease adding such
67+
# specifications
68+
gitattributes_path = op.join(studydir, '.gitattributes')
69+
# We will just make sure that all our desired rules are present in it
70+
desired_attrs = """\
6771
* annex.largefiles=(largerthan=100kb)
6872
*.json annex.largefiles=nothing
6973
*.txt annex.largefiles=nothing
7074
*.tsv annex.largefiles=nothing
7175
*.nii.gz annex.largefiles=anything
7276
*.tgz annex.largefiles=anything
7377
*_scans.tsv annex.largefiles=anything
74-
""")
78+
"""
79+
if op.exists(gitattributes_path):
80+
with open(gitattributes_path, 'rb') as f:
81+
known_attrs = [line.decode('utf-8').rstrip() for line in f.readlines()]
82+
else:
83+
known_attrs = []
84+
for attr in desired_attrs.split('\n'):
85+
if attr not in known_attrs:
86+
known_attrs.append(attr)
87+
with open(gitattributes_path, 'wb') as f:
88+
f.write('\n'.join(known_attrs).encode('utf-8'))
89+
7590
# so for mortals it just looks like a regular directory!
7691
if not ds.config.get('annex.thin'):
7792
ds.config.add('annex.thin', 'true', where='local')
@@ -155,7 +170,12 @@ def mark_sensitive(ds, path_glob):
155170
paths = glob(op.join(ds.path, path_glob))
156171
if not paths:
157172
return
158-
ds.repo.set_metadata(
173+
lgr.debug("Marking %d files with distribution-restrictions field",
174+
len(paths))
175+
# set_metadata can be a bloody generator
176+
res = ds.repo.set_metadata(
159177
paths,
160-
init=[('distribution-restrictions', 'sensitive')],
161-
recursive=True)
178+
init=dict([('distribution-restrictions', 'sensitive')]),
179+
recursive=True)
180+
if inspect.isgenerator(res):
181+
res = list(res)

heudiconv/external/tests/__init__.py

Whitespace-only changes.

heudiconv/external/tests/test_dlad.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
from ..dlad import mark_sensitive
2+
from datalad.api import Dataset
3+
from ...utils import create_tree
4+
5+
6+
def test_mark_sensitive(tmpdir):
7+
ds = Dataset(str(tmpdir)).create(force=True)
8+
create_tree(
9+
str(tmpdir),
10+
{
11+
'f1': 'd1',
12+
'f2': 'd2',
13+
'g1': 'd3',
14+
'g2': 'd1',
15+
}
16+
)
17+
ds.add('.')
18+
mark_sensitive(ds, 'f*')
19+
all_meta = dict(ds.repo.get_metadata('.'))
20+
target_rec = {'distribution-restrictions': ['sensitive']}
21+
# g2 since the same content
22+
assert not all_meta.pop('g1', None) # nothing or empty record
23+
assert all_meta == {'f1': target_rec, 'f2': target_rec, 'g2': target_rec}

heudiconv/utils.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,3 +389,63 @@ def clear_temp_dicoms(item_dicoms):
389389
def file_md5sum(filename):
390390
with open(filename, 'rb') as f:
391391
return hashlib.md5(f.read()).hexdigest()
392+
393+
394+
# Borrowed from DataLad (MIT license), with "archives" functionality commented
395+
# out
396+
class File(object):
397+
"""Helper for a file entry in the create_tree/@with_tree
398+
399+
It allows to define additional settings for entries
400+
"""
401+
def __init__(self, name, executable=False):
402+
"""
403+
404+
Parameters
405+
----------
406+
name : str
407+
Name of the file
408+
executable: bool, optional
409+
Make it executable
410+
"""
411+
self.name = name
412+
self.executable = executable
413+
414+
def __str__(self):
415+
return self.name
416+
417+
418+
def create_tree(path, tree, archives_leading_dir=True):
419+
"""Given a list of tuples (name, load) or a dict create such a tree
420+
421+
if load is a tuple or a dict itself -- that would create either a subtree
422+
or an archive with that content and place it into the tree if name ends
423+
with .tar.gz
424+
"""
425+
lgr.log(5, "Creating a tree under %s", path)
426+
if not op.exists(path):
427+
os.makedirs(path)
428+
429+
if isinstance(tree, dict):
430+
tree = tree.items()
431+
432+
for file_, load in tree:
433+
if isinstance(file_, File):
434+
executable = file_.executable
435+
name = file_.name
436+
else:
437+
executable = False
438+
name = file_
439+
full_name = op.join(path, name)
440+
if isinstance(load, (tuple, list, dict)):
441+
# if name.endswith('.tar.gz') or name.endswith('.tar') or name.endswith('.zip'):
442+
# create_tree_archive(path, name, load, archives_leading_dir=archives_leading_dir)
443+
# else:
444+
create_tree(full_name, load, archives_leading_dir=archives_leading_dir)
445+
else:
446+
with open(full_name, 'w') as f:
447+
if sys.version_info[0] == 2 and not isinstance(load, str):
448+
load = load.encode('utf-8')
449+
f.write(load)
450+
if executable:
451+
os.chmod(full_name, os.stat(full_name).st_mode | stat.S_IEXEC)

0 commit comments

Comments
 (0)