17
17
from ..external import provcopy as pm
18
18
19
19
from .. import get_info
20
- from .filemanip import (md5 , hash_infile )
20
+ from .filemanip import (md5 , hashlib , hash_infile )
21
21
from .. import logging
22
22
iflogger = logging .getLogger ('interface' )
23
23
24
24
foaf = pm .Namespace ("foaf" , "http://xmlns.com/foaf/0.1/" )
25
25
dcterms = pm .Namespace ("dcterms" , "http://purl.org/dc/terms/" )
26
26
nipype_ns = pm .Namespace ("nipype" , "http://nipy.org/nipype/terms/" )
27
27
niiri = pm .Namespace ("niiri" , "http://iri.nidash.org/" )
28
-
28
+ crypto = pm .Namespace ("crypto" ,
29
+ ("http://id.loc.gov/vocabulary/preservation/"
30
+ "cryptographicHashFunctions/" ))
29
31
get_id = lambda : niiri [uuid1 ().hex ]
30
32
31
33
def get_attr_id (attr , skip = None ):
@@ -107,7 +109,7 @@ def safe_encode(x, as_literal=True):
107
109
return value
108
110
try :
109
111
if isinstance (x , (str , unicode )):
110
- if os .path .exists (x ) and not os . path . isdir ( x ) :
112
+ if os .path .exists (x ):
111
113
value = 'file://%s%s' % (getfqdn (), x )
112
114
if not as_literal :
113
115
return value
@@ -171,11 +173,49 @@ def safe_encode(x, as_literal=True):
171
173
return pm .Literal (value , pm .XSD ['string' ])
172
174
173
175
176
+ def prov_encode (graph , value , create_container = True ):
177
+ if isinstance (value , list ) and create_container :
178
+ if len (value ) > 1 :
179
+ try :
180
+ entities = []
181
+ for item in value :
182
+ item_entity = prov_encode (graph , item )
183
+ if 'file://' not in item_entity .get_value ():
184
+ raise ValueError ('No file found' )
185
+ entities .append (item_entity )
186
+ id = get_id ()
187
+ entity = graph .collection (identifier = id )
188
+ for item_entity in entities :
189
+ graph .hadMember (id , item_entity .get_identifier ())
190
+ except ValueError :
191
+ entity = prov_encode (graph , value , create_container = False )
192
+ else :
193
+ entity = prov_encode (graph , value [0 ])
194
+ else :
195
+ encoded_literal = safe_encode (value )
196
+ attr = {pm .PROV ['value' ]: encoded_literal }
197
+ if isinstance (value , basestring ) and os .path .exists (value ):
198
+ attr .update ({pm .PROV ['Location' ]: encoded_literal })
199
+ if not os .path .isdir (value ):
200
+ sha512 = hash_infile (value , crypto = hashlib .sha512 )
201
+ attr .update ({crypto ['sha512' ]: pm .Literal (sha512 ,
202
+ pm .XSD ['string' ])})
203
+ id = get_attr_id (attr , skip = [pm .PROV ['Location' ],
204
+ pm .PROV ['value' ]])
205
+ else :
206
+ id = get_attr_id (attr , skip = [pm .PROV ['Location' ]])
207
+ else :
208
+ id = get_attr_id (attr )
209
+ entity = graph .entity (id , attr )
210
+ return entity
211
+
212
+
174
213
def write_provenance (results , filename = 'provenance' , format = 'turtle' ):
175
214
ps = ProvStore ()
176
215
ps .add_results (results )
177
216
return ps .write_provenance (filename = filename , format = format )
178
217
218
+
179
219
class ProvStore (object ):
180
220
181
221
def __init__ (self ):
@@ -252,15 +292,14 @@ def add_results(self, results):
252
292
input_collection .add_extra_attributes ({pm .PROV ['type' ]:
253
293
nipype_ns ['inputs' ],
254
294
pm .PROV ['label' ]: "Inputs" })
255
- self .g .used (a0 , id )
256
295
# write input entities
257
296
for idx , (key , val ) in enumerate (sorted (inputs .items ())):
258
- in_attr = { pm . PROV [ "label" ]: key ,
259
- nipype_ns [ "in_port" ]: key ,
260
- pm .PROV ["value " ]: safe_encode ( val )}
261
- id = get_attr_id ( in_attr )
262
- self .g .entity ( id , in_attr )
263
- self . g . hadMember ( input_collection , id )
297
+ in_entity = prov_encode ( self . g , val ). get_identifier ()
298
+ self . g . hadMember ( input_collection , in_entity )
299
+ used_attr = { pm .PROV ["label " ]: key ,
300
+ nipype_ns [ "in_port" ]: key }
301
+ self .g .used ( activity = a0 , entity = in_entity ,
302
+ other_attributes = used_attr )
264
303
# write output entities
265
304
if outputs :
266
305
id = get_id ()
@@ -274,12 +313,12 @@ def add_results(self, results):
274
313
self .g .wasGeneratedBy (output_collection , a0 )
275
314
# write output entities
276
315
for idx , (key , val ) in enumerate (sorted (outputs .items ())):
277
- out_attr = { pm . PROV [ "label" ]: key ,
278
- nipype_ns [ "out_port" ]: key ,
279
- pm .PROV ["value " ]: safe_encode ( val )}
280
- id = get_attr_id ( out_attr )
281
- self .g .entity ( id , out_attr )
282
- self . g . hadMember ( output_collection , id )
316
+ out_entity = prov_encode ( self . g , val ). get_identifier ()
317
+ self . g . hadMember ( output_collection , out_entity )
318
+ gen_attr = { pm .PROV ["label " ]: key ,
319
+ nipype_ns [ "out_port" ]: key }
320
+ self .g .generation ( out_entity , activity = a0 ,
321
+ other_attributes = gen_attr )
283
322
# write runtime entities
284
323
id = get_id ()
285
324
runtime_collection = self .g .collection (id )
@@ -312,9 +351,8 @@ def add_results(self, results):
312
351
agent_attr .update ({nipype_ns [key ]: safe_encode (value )})
313
352
software_agent = self .g .agent (get_attr_id (agent_attr ), agent_attr )
314
353
self .g .wasAssociatedWith (a0 , user_agent , None , None ,
315
- {pm .PROV ["Role" ]: nipype_ns ["LoggedInUser" ]})
316
- self .g .wasAssociatedWith (a0 , software_agent , None , None ,
317
- {pm .PROV ["Role" ]: nipype_ns ["Software" ]})
354
+ {pm .PROV ["hadRole" ]: nipype_ns ["LoggedInUser" ]})
355
+ self .g .wasAssociatedWith (a0 , software_agent )
318
356
return self .g
319
357
320
358
def write_provenance (self , filename = 'provenance' , format = 'turtle' ):
0 commit comments