5
5
from socket import getfqdn
6
6
from uuid import uuid1
7
7
8
+ import numpy as np
9
+ try :
10
+ from collections import OrderedDict
11
+ except ImportError :
12
+ from ordereddict import OrderedDict
13
+
8
14
try :
9
15
import prov .model as pm
10
16
except ImportError :
11
17
from ..external import provcopy as pm
12
18
13
19
from .. import get_info
20
+ from .filemanip import (md5 , hash_infile )
14
21
from .. import logging
15
22
iflogger = logging .getLogger ('interface' )
16
23
17
24
foaf = pm .Namespace ("foaf" , "http://xmlns.com/foaf/0.1/" )
18
25
dcterms = pm .Namespace ("dcterms" , "http://purl.org/dc/terms/" )
19
26
nipype_ns = pm .Namespace ("nipype" , "http://nipy.org/nipype/terms/" )
20
- niiri = pm .Namespace ("niiri" , "http://nidm .nidash.org/iri /" )
27
+ niiri = pm .Namespace ("niiri" , "http://iri .nidash.org/" )
21
28
22
29
get_id = lambda : niiri [uuid1 ().hex ]
23
30
31
+ def get_attr_id (attr , skip = None ):
32
+ dictwithhash , hashval = get_hashval (attr , skip = None )
33
+ return niiri [hashval ]
34
+
24
35
max_text_len = 1024000
25
36
26
- def safe_encode (x ):
37
+ def get_hashval (inputdict , skip = None ):
38
+ """Return a dictionary of our items with hashes for each file.
39
+
40
+ Searches through dictionary items and if an item is a file, it
41
+ calculates the md5 hash of the file contents and stores the
42
+ file name and hash value as the new key value.
43
+
44
+ However, the overall bunch hash is calculated only on the hash
45
+ value of a file. The path and name of the file are not used in
46
+ the overall hash calculation.
47
+
48
+ Returns
49
+ -------
50
+ dict_withhash : dict
51
+ Copy of our dictionary with the new file hashes included
52
+ with each file.
53
+ hashvalue : str
54
+ The md5 hash value of the traited spec
55
+
56
+ """
57
+
58
+ dict_withhash = {}
59
+ dict_nofilename = OrderedDict ()
60
+ for name , val in sorted (inputdict .items ()):
61
+ if skip is not None and name in skip :
62
+ continue
63
+ outname = name .get_uri ()
64
+ if isinstance (val , pm .QName ):
65
+ val = val .get_uri ()
66
+ if isinstance (val , pm .Literal ):
67
+ val = val .get_value ()
68
+ dict_nofilename [outname ] = _get_sorteddict (val )
69
+ dict_withhash [outname ] = _get_sorteddict (val , True )
70
+ return (dict_withhash , md5 (str (dict_nofilename )).hexdigest ())
71
+
72
+ def _get_sorteddict (object , dictwithhash = False ):
73
+ if isinstance (object , dict ):
74
+ out = OrderedDict ()
75
+ for key , val in sorted (object .items ()):
76
+ if val :
77
+ out [key ] = _get_sorteddict (val , dictwithhash )
78
+ elif isinstance (object , (list , tuple )):
79
+ out = []
80
+ for val in object :
81
+ if val :
82
+ out .append (_get_sorteddict (val , dictwithhash ))
83
+ if isinstance (object , tuple ):
84
+ out = tuple (out )
85
+ else :
86
+ if isinstance (object , str ) and os .path .isfile (object ):
87
+ hash = hash_infile (object )
88
+ if dictwithhash :
89
+ out = (object , hash )
90
+ else :
91
+ out = hash
92
+ elif isinstance (object , float ):
93
+ out = '%.10f' % object
94
+ else :
95
+ out = object
96
+ return out
97
+
98
+
99
+ def safe_encode (x , as_literal = True ):
27
100
"""Encodes a python value for prov
28
101
"""
29
102
if x is None :
30
- return pm .Literal ("Unknown" , pm .XSD ['string' ])
103
+ value = "Unknown"
104
+ if as_literal :
105
+ return pm .Literal (value , pm .XSD ['string' ])
106
+ else :
107
+ return value
31
108
try :
32
109
if isinstance (x , (str , unicode )):
33
- if os .path .exists (x ):
110
+ if os .path .exists (x ) and not os .path .isdir (x ):
111
+ value = 'file://%s%s' % (getfqdn (), x )
112
+ if not as_literal :
113
+ return value
34
114
try :
35
- return pm .URIRef ('file://%s%s' % ( getfqdn (), x ) )
115
+ return pm .URIRef (value )
36
116
except AttributeError :
37
- return pm .Literal ('file://%s%s' % (getfqdn (), x ),
38
- pm .XSD ['anyURI' ])
117
+ return pm .Literal (value , pm .XSD ['anyURI' ])
39
118
else :
40
119
if len (x ) > max_text_len :
41
- return pm .Literal (x [:max_text_len - 13 ] + ['...Clipped...' ],
42
- pm .XSD ['string' ])
120
+ value = x [:max_text_len - 13 ] + ['...Clipped...' ]
43
121
else :
44
- return pm .Literal (x , pm .XSD ['string' ])
122
+ value = x
123
+ if not as_literal :
124
+ return value
125
+ return pm .Literal (value , pm .XSD ['string' ])
45
126
if isinstance (x , (int ,)):
127
+ if not as_literal :
128
+ return x
46
129
return pm .Literal (int (x ), pm .XSD ['integer' ])
47
130
if isinstance (x , (float ,)):
131
+ if not as_literal :
132
+ return x
48
133
return pm .Literal (x , pm .XSD ['float' ])
49
134
if isinstance (x , dict ):
50
135
outdict = {}
51
136
for key , value in x .items ():
52
- encoded_value = safe_encode (value )
137
+ encoded_value = safe_encode (value , as_literal = False )
53
138
if isinstance (encoded_value , (pm .Literal ,)):
54
139
outdict [key ] = encoded_value .json_representation ()
55
140
else :
56
141
outdict [key ] = encoded_value
142
+ if not as_literal :
143
+ return json .dumps (outdict )
57
144
return pm .Literal (json .dumps (outdict ), pm .XSD ['string' ])
58
145
if isinstance (x , list ):
59
- outlist = []
60
- for value in x :
61
- encoded_value = safe_encode (value )
62
- if isinstance (encoded_value , (pm .Literal ,)):
63
- outlist .append (encoded_value .json_representation ())
64
- else :
65
- outlist .append (encoded_value )
146
+ try :
147
+ nptype = np .array (x ).dtype
148
+ if nptype == np .dtype (object ):
149
+ raise ValueError ('dtype object' )
150
+ except ValueError , e :
151
+ outlist = []
152
+ for value in x :
153
+ encoded_value = safe_encode (value , as_literal = False )
154
+ if isinstance (encoded_value , (pm .Literal ,)):
155
+ outlist .append (encoded_value .json_representation ())
156
+ else :
157
+ outlist .append (encoded_value )
158
+ else :
159
+ outlist = x
160
+ if not as_literal :
161
+ return json .dumps (outlist )
66
162
return pm .Literal (json .dumps (outlist ), pm .XSD ['string' ])
163
+ if not as_literal :
164
+ return dumps (x )
67
165
return pm .Literal (dumps (x ), nipype_ns ['pickle' ])
68
166
except TypeError , e :
69
167
iflogger .info (e )
70
- return pm .Literal ("Could not encode: " + str (e ), pm .XSD ['string' ])
168
+ value = "Could not encode: " + str (e )
169
+ if not as_literal :
170
+ return value
171
+ return pm .Literal (value , pm .XSD ['string' ])
71
172
72
173
73
174
def write_provenance (results , filename = 'provenance' , format = 'turtle' ):
@@ -95,14 +196,14 @@ def add_results(self, results):
95
196
interface = results .interface
96
197
inputs = results .inputs
97
198
outputs = results .outputs
98
- classname = interface .__class__ . __name__
199
+ classname = interface .__name__
99
200
100
- a0_attrs = {nipype_ns ['module' ]: self .__module__ ,
201
+ a0_attrs = {nipype_ns ['module' ]: interface .__module__ ,
101
202
nipype_ns ["interface" ]: classname ,
102
203
pm .PROV ["label" ]: classname ,
103
204
nipype_ns ['duration' ]: safe_encode (runtime .duration ),
104
205
nipype_ns ['working_directory' ]: safe_encode (runtime .cwd ),
105
- nipype_ns ['return_code' ]: runtime .returncode ,
206
+ nipype_ns ['return_code' ]: safe_encode ( runtime .returncode ) ,
106
207
nipype_ns ['platform' ]: safe_encode (runtime .platform ),
107
208
nipype_ns ['version' ]: safe_encode (runtime .version ),
108
209
}
@@ -131,10 +232,17 @@ def add_results(self, results):
131
232
self .g .used (a0 , id )
132
233
# write environment entities
133
234
for idx , (key , val ) in enumerate (sorted (runtime .environ .items ())):
235
+ if key not in ['PATH' , 'FSLDIR' , 'FREESURFER_HOME' , 'ANTSPATH' ,
236
+ 'CAMINOPATH' , 'CLASSPATH' , 'LD_LIBRARY_PATH' ,
237
+ 'DYLD_LIBRARY_PATH' , 'FIX_VERTEX_AREA' ,
238
+ 'FSF_OUTPUT_FORMAT' , 'FSLCONFDIR' , 'FSLOUTPUTTYPE' ,
239
+ 'LOGNAME' , 'USER' ,
240
+ 'MKL_NUM_THREADS' , 'OMP_NUM_THREADS' ]:
241
+ continue
134
242
in_attr = {pm .PROV ["label" ]: key ,
135
243
nipype_ns ["environment_variable" ]: key ,
136
244
nipype_ns ["value" ]: safe_encode (val )}
137
- id = get_id ( )
245
+ id = get_attr_id ( in_attr )
138
246
self .g .entity (id , in_attr )
139
247
self .g .hadMember (env_collection , id )
140
248
# write input entities
@@ -150,7 +258,7 @@ def add_results(self, results):
150
258
in_attr = {pm .PROV ["label" ]: key ,
151
259
nipype_ns ["in_port" ]: key ,
152
260
nipype_ns ["value" ]: safe_encode (val )}
153
- id = get_id ( )
261
+ id = get_attr_id ( in_attr )
154
262
self .g .entity (id , in_attr )
155
263
self .g .hadMember (input_collection , id )
156
264
# write output entities
@@ -164,12 +272,12 @@ def add_results(self, results):
164
272
pm .PROV ['label' ]:
165
273
"Outputs" })
166
274
self .g .wasGeneratedBy (output_collection , a0 )
167
- # write input entities
275
+ # write output entities
168
276
for idx , (key , val ) in enumerate (sorted (outputs .items ())):
169
277
out_attr = {pm .PROV ["label" ]: key ,
170
278
nipype_ns ["out_port" ]: key ,
171
279
nipype_ns ["value" ]: safe_encode (val )}
172
- id = get_id ( )
280
+ id = get_attr_id ( out_attr )
173
281
self .g .entity (id , out_attr )
174
282
self .g .hadMember (output_collection , id )
175
283
# write runtime entities
@@ -190,19 +298,19 @@ def add_results(self, results):
190
298
id = get_id ()
191
299
self .g .entity (get_id (), attr )
192
300
self .g .hadMember (runtime_collection , id )
301
+
193
302
# create agents
194
- user_agent = self .g .agent (get_id (),
195
- {pm .PROV ["type" ]: pm .PROV ["Person" ],
196
- pm .PROV ["label" ]:
197
- pwd .getpwuid (os .geteuid ()).pw_name ,
198
- foaf ["name" ]:
199
- safe_encode (pwd .getpwuid (os .geteuid ()).pw_name )})
303
+ user_attr = {pm .PROV ["type" ]: pm .PROV ["Person" ],
304
+ pm .PROV ["label" ]: pwd .getpwuid (os .geteuid ()).pw_name ,
305
+ foaf ["name" ]:
306
+ safe_encode (pwd .getpwuid (os .geteuid ()).pw_name )}
307
+ user_agent = self .g .agent (get_attr_id (user_attr ), user_attr )
200
308
agent_attr = {pm .PROV ["type" ]: pm .PROV ["SoftwareAgent" ],
201
309
pm .PROV ["label" ]: "Nipype" ,
202
310
foaf ["name" ]: safe_encode ("Nipype" )}
203
311
for key , value in get_info ().items ():
204
312
agent_attr .update ({nipype_ns [key ]: safe_encode (value )})
205
- software_agent = self .g .agent (get_id ( ), agent_attr )
313
+ software_agent = self .g .agent (get_attr_id ( agent_attr ), agent_attr )
206
314
self .g .wasAssociatedWith (a0 , user_agent , None , None ,
207
315
{pm .PROV ["Role" ]: nipype_ns ["LoggedInUser" ]})
208
316
self .g .wasAssociatedWith (a0 , software_agent , None , None ,
0 commit comments