Skip to content

Commit fb1d648

Browse files
committed
fix: cleaned up provenance output to be more compact and precise
1 parent c4fc02f commit fb1d648

File tree

3 files changed

+67
-28
lines changed

3 files changed

+67
-28
lines changed

nipype/interfaces/base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1116,6 +1116,8 @@ def run_command(runtime, output=None, timeout=0.01):
11161116
cwd=runtime.cwd,
11171117
env=runtime.environ)
11181118
result = {}
1119+
errfile = os.path.join(runtime.cwd, 'stderr.nipype')
1120+
outfile = os.path.join(runtime.cwd, 'stdout.nipype')
11191121
if output == 'stream':
11201122
streams = [Stream('stdout', proc.stdout), Stream('stderr', proc.stderr)]
11211123

@@ -1152,8 +1154,6 @@ def _process(drain=0):
11521154
result['stderr'] = stderr.split('\n')
11531155
result['merged'] = ''
11541156
if output == 'file':
1155-
errfile = os.path.join(runtime.cwd, 'stderr.nipype')
1156-
outfile = os.path.join(runtime.cwd, 'stdout.nipype')
11571157
stderr = open(errfile, 'wt')
11581158
stdout = open(outfile, 'wt')
11591159
proc = subprocess.Popen(runtime.cmdline,

nipype/utils/filemanip.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import cPickle
88
from glob import glob
99
import gzip
10+
import hashlib
1011
from hashlib import md5
1112
import json
1213
import os
@@ -144,20 +145,20 @@ def check_forhash(filename):
144145
return False, None
145146

146147

147-
def hash_infile(afile, chunk_len=8192):
148-
""" Computes md5 hash of a file"""
149-
md5hex = None
148+
def hash_infile(afile, chunk_len=8192, crypto=hashlib.md5):
149+
""" Computes hash of a file using 'crypto' module"""
150+
hex = None
150151
if os.path.isfile(afile):
151-
md5obj = md5()
152+
crypto_obj = crypto()
152153
fp = file(afile, 'rb')
153154
while True:
154155
data = fp.read(chunk_len)
155156
if not data:
156157
break
157-
md5obj.update(data)
158+
crypto_obj.update(data)
158159
fp.close()
159-
md5hex = md5obj.hexdigest()
160-
return md5hex
160+
hex = crypto_obj.hexdigest()
161+
return hex
161162

162163

163164
def hash_timestamp(afile):

nipype/utils/provenance.py

Lines changed: 57 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,17 @@
1717
from ..external import provcopy as pm
1818

1919
from .. import get_info
20-
from .filemanip import (md5, hash_infile)
20+
from .filemanip import (md5, hashlib, hash_infile)
2121
from .. import logging
2222
iflogger = logging.getLogger('interface')
2323

2424
foaf = pm.Namespace("foaf", "http://xmlns.com/foaf/0.1/")
2525
dcterms = pm.Namespace("dcterms", "http://purl.org/dc/terms/")
2626
nipype_ns = pm.Namespace("nipype", "http://nipy.org/nipype/terms/")
2727
niiri = pm.Namespace("niiri", "http://iri.nidash.org/")
28-
28+
crypto = pm.Namespace("crypto",
29+
("http://id.loc.gov/vocabulary/preservation/"
30+
"cryptographicHashFunctions/"))
2931
get_id = lambda: niiri[uuid1().hex]
3032

3133
def get_attr_id(attr, skip=None):
@@ -107,7 +109,7 @@ def safe_encode(x, as_literal=True):
107109
return value
108110
try:
109111
if isinstance(x, (str, unicode)):
110-
if os.path.exists(x) and not os.path.isdir(x):
112+
if os.path.exists(x):
111113
value = 'file://%s%s' % (getfqdn(), x)
112114
if not as_literal:
113115
return value
@@ -171,11 +173,49 @@ def safe_encode(x, as_literal=True):
171173
return pm.Literal(value, pm.XSD['string'])
172174

173175

176+
def prov_encode(graph, value, create_container=True):
177+
if isinstance(value, list) and create_container:
178+
if len(value) > 1:
179+
try:
180+
entities = []
181+
for item in value:
182+
item_entity = prov_encode(graph, item)
183+
if 'file://' not in item_entity.get_value():
184+
raise ValueError('No file found')
185+
entities.append(item_entity)
186+
id = get_id()
187+
entity = graph.collection(identifier=id)
188+
for item_entity in entities:
189+
graph.hadMember(id, item_entity.get_identifier())
190+
except ValueError:
191+
entity = prov_encode(graph, value, create_container=False)
192+
else:
193+
entity = prov_encode(graph, value[0])
194+
else:
195+
encoded_literal = safe_encode(value)
196+
attr = {pm.PROV['value']: encoded_literal}
197+
if isinstance(value, basestring) and os.path.exists(value):
198+
attr.update({pm.PROV['Location']: encoded_literal})
199+
if not os.path.isdir(value):
200+
sha512 = hash_infile(value, crypto=hashlib.sha512)
201+
attr.update({crypto['sha512']: pm.Literal(sha512,
202+
pm.XSD['string'])})
203+
id = get_attr_id(attr, skip=[pm.PROV['Location'],
204+
pm.PROV['value']])
205+
else:
206+
id = get_attr_id(attr, skip=[pm.PROV['Location']])
207+
else:
208+
id = get_attr_id(attr)
209+
entity = graph.entity(id, attr)
210+
return entity
211+
212+
174213
def write_provenance(results, filename='provenance', format='turtle'):
175214
ps = ProvStore()
176215
ps.add_results(results)
177216
return ps.write_provenance(filename=filename, format=format)
178217

218+
179219
class ProvStore(object):
180220

181221
def __init__(self):
@@ -252,15 +292,14 @@ def add_results(self, results):
252292
input_collection.add_extra_attributes({pm.PROV['type']:
253293
nipype_ns['inputs'],
254294
pm.PROV['label']: "Inputs"})
255-
self.g.used(a0, id)
256295
# write input entities
257296
for idx, (key, val) in enumerate(sorted(inputs.items())):
258-
in_attr = {pm.PROV["label"]: key,
259-
nipype_ns["in_port"]: key,
260-
pm.PROV["value"]: safe_encode(val)}
261-
id = get_attr_id(in_attr)
262-
self.g.entity(id, in_attr)
263-
self.g.hadMember(input_collection, id)
297+
in_entity = prov_encode(self.g, val).get_identifier()
298+
self.g.hadMember(input_collection, in_entity)
299+
used_attr = {pm.PROV["label"]: key,
300+
nipype_ns["in_port"]: key}
301+
self.g.used(activity=a0, entity=in_entity,
302+
other_attributes=used_attr)
264303
# write output entities
265304
if outputs:
266305
id = get_id()
@@ -274,12 +313,12 @@ def add_results(self, results):
274313
self.g.wasGeneratedBy(output_collection, a0)
275314
# write output entities
276315
for idx, (key, val) in enumerate(sorted(outputs.items())):
277-
out_attr = {pm.PROV["label"]: key,
278-
nipype_ns["out_port"]: key,
279-
pm.PROV["value"]: safe_encode(val)}
280-
id = get_attr_id(out_attr)
281-
self.g.entity(id, out_attr)
282-
self.g.hadMember(output_collection, id)
316+
out_entity = prov_encode(self.g, val).get_identifier()
317+
self.g.hadMember(output_collection, out_entity)
318+
gen_attr = {pm.PROV["label"]: key,
319+
nipype_ns["out_port"]: key}
320+
self.g.generation(out_entity, activity=a0,
321+
other_attributes=gen_attr)
283322
# write runtime entities
284323
id = get_id()
285324
runtime_collection = self.g.collection(id)
@@ -312,9 +351,8 @@ def add_results(self, results):
312351
agent_attr.update({nipype_ns[key]: safe_encode(value)})
313352
software_agent = self.g.agent(get_attr_id(agent_attr), agent_attr)
314353
self.g.wasAssociatedWith(a0, user_agent, None, None,
315-
{pm.PROV["Role"]: nipype_ns["LoggedInUser"]})
316-
self.g.wasAssociatedWith(a0, software_agent, None, None,
317-
{pm.PROV["Role"]: nipype_ns["Software"]})
354+
{pm.PROV["hadRole"]: nipype_ns["LoggedInUser"]})
355+
self.g.wasAssociatedWith(a0, software_agent)
318356
return self.g
319357

320358
def write_provenance(self, filename='provenance', format='turtle'):

0 commit comments

Comments
 (0)