Skip to content

Commit db1fb83

Browse files
committed
Merge pull request #679 from satra/fix/prov
refactors provenance so that it needs enabling via config
2 parents cd857b5 + 373341c commit db1fb83

File tree

13 files changed

+374
-427
lines changed

13 files changed

+374
-427
lines changed

doc/devel/provenance.rst

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,9 @@ write out a provenance of the workflow if instructed.
1818

1919
This is very much an experimental feature as we continue to refine how exactly
2020
the provenance should be stored and how such information can be used for
21-
reporting or reconstituting workflows.
21+
reporting or reconstituting workflows. By default provenance writing is disabled
22+
for the 0.9 release, to enable insert the following code at the top of your
23+
script::
24+
25+
>>> from nipype import config
26+
>>> config.enable_provenance()

examples/rsfmri_preprocessing.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@
5353
from nipype.interfaces.base import CommandLine
5454
CommandLine.set_default_terminal_output('file')
5555

56+
from nipype import config
57+
config.enable_provenance()
58+
5659
from nipype import (ants, afni, fsl, freesurfer, nipy, Function, DataSink)
5760
from nipype import Workflow, Node, MapNode
5861

nipype/external/provcopy.py

Lines changed: 80 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
except ImportError:
2626
pass
2727

28-
from copy import deepcopy
28+
from copy import deepcopy, copy
2929

3030
try:
3131
from collections import OrderedDict
@@ -173,6 +173,12 @@
173173
PROV_ID_ATTRIBUTES_MAP = dict((prov_id, attribute) for (prov_id, attribute) in PROV_RECORD_ATTRIBUTES)
174174
PROV_ATTRIBUTES_ID_MAP = dict((attribute, prov_id) for (prov_id, attribute) in PROV_RECORD_ATTRIBUTES)
175175

176+
177+
# Converting an attribute to the normal form for comparison purposes
178+
_normalise_attributes = lambda attr: (unicode(attr[0]), unicode(attr[1]))
179+
180+
181+
# Datatypes
176182
_r_xsd_dateTime = re.compile(""" ^
177183
(?P<year>-?[0-9]{4}) - (?P<month>[0-9]{2}) - (?P<day>[0-9]{2})
178184
T (?P<hour>[0-9]{2}) : (?P<minute>[0-9]{2}) : (?P<second>[0-9]{2})
@@ -181,29 +187,23 @@
181187
Z | (?P<tz_hr>[-+][0-9]{2}) : (?P<tz_min>[0-9]{2})
182188
)?
183189
$ """, re.X)
184-
_r_typed_literal_uri = re.compile(r'^"(?P<value>[^"\\]*(?:\\.[^"\\]*)*)"\^\^<(?P<datatype>[^>\\]*(?:\\.[^>\\]*)*)>$', re.X)
185-
_r_typed_literal_qname = re.compile(r'^"(?P<value>[^"\\]*(?:\\.[^"\\]*)*)"\^\^(?P<datatype>[^>\\]*(?:\\.[^>\\]*)*)$', re.X)
186190

187191
attr2rdf = lambda attr: PROV[PROV_ID_ATTRIBUTES_MAP[attr].split('prov:')[1]].rdf_representation()
188192

189-
# Converting an attribute to the normal form for comparison purposes
190-
_normalise_attributes = lambda attr: (unicode(attr[0]), unicode(attr[1]))
191-
192-
# Datatypes
193193
def _parse_xsd_dateTime(s):
194194
"""Returns datetime or None."""
195195
m = _r_xsd_dateTime.match(s)
196196
if m is not None:
197197
values = m.groupdict()
198-
if values["microsecond"] is None:
199-
values["microsecond"] = 0
198+
if values["microsecond"] is None:
199+
values["microsecond"] = 0
200+
else:
201+
values["microsecond"] = values["microsecond"][1:]
202+
values["microsecond"] += "0" * (6 - len(values["microsecond"]))
203+
values = dict((k, int(v)) for k, v in values.iteritems() if not k.startswith("tz"))
204+
return datetime.datetime(**values)
200205
else:
201-
values["microsecond"] = values["microsecond"][1:]
202-
values["microsecond"] += "0" * (6 - len(values["microsecond"]))
203-
values = dict((k, int(v)) for k, v in values.iteritems()
204-
if not k.startswith("tz"))
205-
206-
return datetime.datetime(**values)
206+
return None
207207

208208

209209
def _ensure_datetime(time):
@@ -251,9 +251,16 @@ def parse_xsd_types(value, datatype):
251251
return XSD_DATATYPE_PARSERS[datatype](value) if datatype in XSD_DATATYPE_PARSERS else None
252252

253253

254+
def _ensure_multiline_string_triple_quoted(s):
255+
format_str = '%s'
256+
if isinstance(s, basestring):
257+
format_str = u'"""%s"""' if '\n' in s else u'"%s"'
258+
return format_str % s
259+
260+
254261
def encoding_PROV_N_value(value):
255262
if isinstance(value, basestring):
256-
return '"%s"' % value
263+
return _ensure_multiline_string_triple_quoted(value)
257264
elif isinstance(value, datetime.datetime):
258265
return value.isoformat()
259266
elif isinstance(value, float):
@@ -304,9 +311,9 @@ def has_no_langtag(self):
304311
def provn_representation(self):
305312
if self._langtag:
306313
# a langtag can only goes with string
307-
return u'"%s"@%s' % (unicode(self._value), unicode(self._langtag))
314+
return u'%s@%s' % (_ensure_multiline_string_triple_quoted(self._value), unicode(self._langtag))
308315
else:
309-
return u'"%s" %%%% %s' % (unicode(self._value), unicode(self._datatype))
316+
return u'%s %%%% %s' % (_ensure_multiline_string_triple_quoted(self._value), unicode(self._datatype))
310317

311318
def json_representation(self):
312319
if self._langtag:
@@ -494,7 +501,7 @@ def add_asserted_type(self, type_identifier):
494501
if type_identifier not in asserted_types:
495502
if self._extra_attributes is None:
496503
self._extra_attributes = set()
497-
self._extra_attributes.update(set([(PROV['type'], type_identifier)]))
504+
self._extra_attributes.add((PROV['type'], type_identifier))
498505

499506
def get_attribute(self, attr_name):
500507
if not self._extra_attributes:
@@ -542,19 +549,18 @@ def _auto_literal_conversion(self, literal):
542549

543550
def parse_extra_attributes(self, extra_attributes):
544551
if isinstance(extra_attributes, dict):
545-
# This will only work if extra_attributes is a dictionary
546552
# Converting the dictionary into a list of tuples (i.e. attribute-value pairs)
547553
extra_attributes = extra_attributes.items()
548-
attr_list = set((self._bundle.valid_identifier(attribute), self._auto_literal_conversion(value)) for attribute, value in extra_attributes)
549-
return attr_list
554+
attr_set = set((self._bundle.valid_identifier(attribute), self._auto_literal_conversion(value)) for attribute, value in extra_attributes)
555+
return attr_set
550556

551557
def add_extra_attributes(self, extra_attributes):
552558
if extra_attributes:
553559
if self._extra_attributes is None:
554560
self._extra_attributes = set()
555-
attr_list = self.parse_extra_attributes(extra_attributes)
556561
# Check attributes for valid qualified names
557-
self._extra_attributes.update(attr_list)
562+
attr_set = self.parse_extra_attributes(extra_attributes)
563+
self._extra_attributes.update(attr_set)
558564

559565
def add_attributes(self, attributes, extra_attributes):
560566
if attributes:
@@ -1219,11 +1225,12 @@ def add_attributes(self, attributes, extra_attributes):
12191225

12201226
# Bundle
12211227
class NamespaceManager(dict):
1222-
def __init__(self, default_namespaces={PROV.get_prefix(): PROV, XSD.get_prefix(): XSD}, default=None, parent=None):
1228+
def __init__(self, namespaces={}, default_namespaces={PROV.get_prefix(): PROV, XSD.get_prefix(): XSD}, default=None, parent=None):
12231229
self._default_namespaces = {}
12241230
self._default_namespaces.update(default_namespaces)
1225-
self._namespaces = {}
12261231
self.update(self._default_namespaces)
1232+
self._namespaces = {}
1233+
12271234
if default is not None:
12281235
self.set_default_namespace(default)
12291236
else:
@@ -1232,6 +1239,7 @@ def __init__(self, default_namespaces={PROV.get_prefix(): PROV, XSD.get_prefix()
12321239
# TODO check if default is in the default namespaces
12331240
self._anon_id_count = 0
12341241
self._rename_map = {}
1242+
self.add_namespaces(namespaces)
12351243

12361244
def get_namespace(self, uri):
12371245
for namespace in self.values():
@@ -1267,18 +1275,35 @@ def add_namespace(self, namespace):
12671275
namespace = new_namespace
12681276
self._namespaces[prefix] = namespace
12691277
self[prefix] = namespace
1278+
return namespace
1279+
1280+
def add_namespaces(self, namespaces):
1281+
if namespaces:
1282+
for prefix, uri in namespaces.items():
1283+
ns = Namespace(prefix, uri)
1284+
self.add_namespace(ns)
12701285

12711286
def get_valid_identifier(self, identifier):
12721287
if not identifier:
12731288
return None
12741289
if isinstance(identifier, Identifier):
12751290
if isinstance(identifier, QName):
12761291
# Register the namespace if it has not been registered before
1277-
namespace = identifier.get_namespace()
1278-
if namespace not in self.values():
1279-
self.add_namespace(namespace)
1280-
# return the original identifier
1281-
return identifier
1292+
namespace = identifier._namespace
1293+
prefix = namespace.get_prefix()
1294+
if prefix in self and self[prefix] == namespace:
1295+
# No need to add the namespace
1296+
existing_ns = self[prefix]
1297+
if existing_ns is namespace:
1298+
return identifier
1299+
else:
1300+
return existing_ns[identifier._localpart] # reuse the existing namespace
1301+
else:
1302+
ns = self.add_namespace(deepcopy(namespace)) # Do not reuse the namespace object
1303+
return ns[identifier._localpart]
1304+
else:
1305+
# return the original identifier
1306+
return identifier
12821307
elif isinstance(identifier, (str, unicode)):
12831308
if identifier.startswith('_:'):
12841309
return None
@@ -1325,15 +1350,16 @@ def _get_unused_prefix(self, original_prefix):
13251350

13261351

13271352
class ProvBundle(ProvEntity):
1328-
def __init__(self, bundle=None, identifier=None, attributes=None, other_attributes=None, asserted=True):
1353+
def __init__(self, bundle=None, identifier=None, attributes=None, other_attributes=None, asserted=True, namespaces={}):
13291354
# Initializing bundle-specific attributes
13301355
self._records = list()
13311356
self._id_map = dict()
13321357
self._bundles = dict()
13331358
if bundle is None:
1334-
self._namespaces = NamespaceManager()
1359+
self._namespaces = NamespaceManager(namespaces)
13351360
else:
13361361
self._namespaces = bundle._namespaces
1362+
self._namespaces.add_namespaces(namespaces)
13371363

13381364
# Initializing record-specific attributes
13391365
super(ProvBundle, self).__init__(bundle, identifier, attributes, other_attributes, asserted)
@@ -1631,14 +1657,28 @@ def from_provjson(json_content, **kw):
16311657
return json.loads(json_content, cls=ProvBundle.JSONDecoder, **kw)
16321658

16331659
def get_flattened(self):
1634-
flattened = deepcopy(self)
1635-
for bundle in flattened._bundles.values():
1660+
namespaces = dict((ns.get_prefix(), ns.get_uri()) for ns in self.get_registered_namespaces())
1661+
document = ProvBundle(namespaces=namespaces)
1662+
default_ns_uri = self.get_default_namespace()
1663+
if default_ns_uri is not None:
1664+
document.set_default_namespace(default_ns_uri)
1665+
# Enumerate records and bundles
1666+
bundles = []
1667+
records = []
1668+
for record in self.get_records():
1669+
if isinstance(record, ProvBundle):
1670+
bundles.append(record)
1671+
else:
1672+
records.append(record)
1673+
records = deepcopy(records)
1674+
for record in records:
1675+
document._add_record(record)
1676+
for bundle in bundles:
16361677
for record in bundle._records:
1637-
flattened.add_record(record.get_type(), record._identifier, record._attributes, record._extra_attributes, record._asserted)
1638-
flattened._records.remove(bundle)
1639-
1640-
flattened._bundles = {}
1641-
return flattened
1678+
document.add_record(record.get_type(), copy(record._identifier),
1679+
deepcopy(record._attributes), deepcopy(record._extra_attributes),
1680+
record._asserted)
1681+
return document
16421682

16431683
def __eq__(self, other):
16441684
try:

0 commit comments

Comments
 (0)