Skip to content

Commit eece2b2

Browse files
committed
Merge pull request #835 from satra/fix/prov
Fix/prov
2 parents 918da62 + c376a2d commit eece2b2

File tree

2 files changed

+81
-56
lines changed

2 files changed

+81
-56
lines changed

nipype/external/provcopy.py

Lines changed: 78 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,13 @@ def get_anon_id(self, obj, local_prefix="id"):
262262
class Literal(object):
263263
def __init__(self, value, datatype=None, langtag=None):
264264
self._value = value
265+
if langtag:
266+
if datatype is None:
267+
logger.debug('Assuming prov:InternationalizedString as the type of "%s"@%s' % (value, langtag))
268+
datatype = PROV["InternationalizedString"]
269+
elif datatype != PROV["InternationalizedString"]:
270+
logger.warn('Invalid data type (%s) for "%s"@%s, overridden as prov:InternationalizedString.' % (value, langtag))
271+
datatype = PROV["InternationalizedString"]
265272
self._datatype = datatype
266273
self._langtag = langtag
267274

@@ -291,14 +298,14 @@ def has_no_langtag(self):
291298

292299
def provn_representation(self):
293300
if self._langtag:
294-
# a langtag can only goes with string
301+
# a language tag can only go with prov:InternationalizedString
295302
return u'%s@%s' % (_ensure_multiline_string_triple_quoted(self._value), unicode(self._langtag))
296303
else:
297304
return u'%s %%%% %s' % (_ensure_multiline_string_triple_quoted(self._value), unicode(self._datatype))
298305

299306
def json_representation(self):
300307
if self._langtag:
301-
# a langtag can only goes with string
308+
# a language tag can only go with prov:InternationalizedString
302309
return {'$': unicode(self._value), 'lang': self._langtag}
303310
else:
304311
if isinstance(self._datatype, QName):
@@ -424,6 +431,9 @@ def __init__(self, record_type, attribute_id):
424431
self.attribute_id = attribute_id
425432
self.args += (PROV_N_MAP[record_type], attribute_id)
426433

434+
def __str__(self):
435+
return 'Missing the required attribute "%s" in %s' % (PROV_ID_ATTRIBUTES_MAP[self.attribute_id], PROV_N_MAP[self.record_type])
436+
427437

428438
class ProvExceptionNotValidAttribute(ProvException):
429439
def __init__(self, record_type, attribute, attribute_types):
@@ -432,6 +442,9 @@ def __init__(self, record_type, attribute, attribute_types):
432442
self.attribute_types = attribute_types
433443
self.args += (PROV_N_MAP[record_type], unicode(attribute), attribute_types)
434444

445+
def __str__(self):
446+
return 'Invalid attribute value: %s. %s expected' % (self.attribute, self.attribute_types)
447+
435448

436449
class ProvExceptionCannotUnifyAttribute(ProvException):
437450
def __init__(self, identifier, record_type1, record_type2):
@@ -440,6 +453,9 @@ def __init__(self, identifier, record_type1, record_type2):
440453
self.record_type2 = record_type2
441454
self.args += (identifier, PROV_N_MAP[record_type1], PROV_N_MAP[record_type2])
442455

456+
def __str__(self):
457+
return 'Cannot unify two records of type %s and %s with same identifier (%s)' % (self.identifier, PROV_N_MAP[self.record_type1], PROV_N_MAP[self.record_type2])
458+
443459

444460
class ProvExceptionContraint(ProvException):
445461
def __init__(self, record_type, attribute1, attribute2, msg):
@@ -517,12 +533,10 @@ def get_value(self):
517533
def _auto_literal_conversion(self, literal):
518534
'''This method normalise datatype for literals
519535
'''
536+
if isinstance(literal, URIRef):
537+
return literal
538+
520539
if isinstance(literal, basestring):
521-
# try if this is a QName
522-
qname = self._bundle.valid_identifier(literal)
523-
if isinstance(qname, QName):
524-
return qname
525-
# if not a QName, convert all strings to unicode
526540
return unicode(literal)
527541

528542
if isinstance(literal, Literal) and literal.has_no_langtag():
@@ -539,7 +553,9 @@ def parse_extra_attributes(self, extra_attributes):
539553
if isinstance(extra_attributes, dict):
540554
# Converting the dictionary into a list of tuples (i.e. attribute-value pairs)
541555
extra_attributes = extra_attributes.items()
542-
attr_set = set((self._bundle.valid_identifier(attribute), self._auto_literal_conversion(value)) for attribute, value in extra_attributes)
556+
attr_set = set((self._bundle.valid_identifier(attribute),
557+
self._auto_literal_conversion(value))
558+
for attribute, value in extra_attributes)
543559
return attr_set
544560

545561
def add_extra_attributes(self, extra_attributes):
@@ -653,7 +669,7 @@ def optional_attribute(self, attributes, attribute_id, attribute_types):
653669
return self._validate_attribute(attribute, attribute_types)
654670

655671
def __eq__(self, other):
656-
if self.__class__ != other.__class__:
672+
if self.get_prov_type() != other.get_prov_type():
657673
return False
658674
if self._identifier and not (self._identifier == other._identifier):
659675
return False
@@ -682,6 +698,13 @@ def __eq__(self, other):
682698
sattr = sorted(self._extra_attributes, key=_normalise_attributes) if self._extra_attributes else None
683699
oattr = sorted(other._extra_attributes, key=_normalise_attributes) if other._extra_attributes else None
684700
if sattr != oattr:
701+
if logger.isEnabledFor(logging.DEBUG):
702+
for spair, opair in zip(sattr, oattr):
703+
# Log the first unequal pair of attributes
704+
if spair != opair:
705+
logger.debug("Equality (ProvRecord): unequal attribute-value pairs - %s = %s - %s = %s",
706+
spair[0], spair[1], opair[0], opair[1])
707+
break
685708
return False
686709
return True
687710

@@ -740,17 +763,28 @@ def rdf(self, graph=None, subj=None):
740763
graph.add((subj, pred, obj))
741764
if self._extra_attributes:
742765
for (attr, value) in self._extra_attributes:
766+
try:
767+
# try if there is a RDF representation defined
768+
obj = value.rdf_representation()
769+
except Exception, e:
770+
obj = RDFLiteral(value)
771+
if attr == PROV['location']:
772+
pred = PROV['atLocation'].rdf_representation()
773+
if isinstance(value, (URIRef, QName)):
774+
if isinstance(value, QName):
775+
value = URIRef(value.get_uri())
776+
graph.add((subj, pred, value))
777+
graph.add((value, RDF.type,
778+
PROV['Location'].rdf_representation()))
779+
else:
780+
graph.add((subj, pred, obj))
781+
continue
743782
if attr == PROV['type']:
744783
pred = RDF.type
745784
elif attr == PROV['label']:
746785
pred = RDFS.label
747786
else:
748787
pred = attr.rdf_representation()
749-
try:
750-
# try if there is a RDF representation defined
751-
obj = value.rdf_representation()
752-
except Exception, e:
753-
obj = RDFLiteral(value)
754788
graph.add((subj, pred, obj))
755789
return graph
756790

@@ -853,8 +887,7 @@ def add_attributes(self, attributes, extra_attributes):
853887
startTime = self.optional_attribute(attributes, PROV_ATTR_STARTTIME, datetime.datetime)
854888
endTime = self.optional_attribute(attributes, PROV_ATTR_ENDTIME, datetime.datetime)
855889
if startTime and endTime and startTime > endTime:
856-
# TODO Raise logic exception here
857-
pass
890+
raise ValueError('StartTime %s > EndTime %s' % (startTime, endTime))
858891
attributes = OrderedDict()
859892
attributes[PROV_ATTR_STARTTIME] = startTime
860893
attributes[PROV_ATTR_ENDTIME] = endTime
@@ -1337,8 +1370,8 @@ def get_valid_identifier(self, identifier):
13371370
# create and return an identifier in the default namespace
13381371
return self._default[identifier]
13391372
else:
1340-
# TODO Should an exception raised here
1341-
return Identifier(identifier)
1373+
# This is not an identifier
1374+
return None
13421375

13431376
def get_anonymous_identifier(self, local_prefix='id'):
13441377
self._anon_id_count += 1
@@ -1362,11 +1395,7 @@ def __init__(self, bundle=None, identifier=None, attributes=None, other_attribut
13621395
self._records = list()
13631396
self._id_map = dict()
13641397
self._bundles = dict()
1365-
if bundle is None:
1366-
self._namespaces = NamespaceManager(namespaces)
1367-
else:
1368-
self._namespaces = bundle._namespaces
1369-
self._namespaces.add_namespaces(namespaces)
1398+
self._namespaces = NamespaceManager(namespaces, parent=(bundle._namespaces if bundle is not None else None))
13701399

13711400
# Initializing record-specific attributes
13721401
super(ProvBundle, self).__init__(bundle, identifier, attributes, other_attributes, asserted)
@@ -1379,9 +1408,6 @@ def get_default_namespace(self):
13791408
return self._namespaces.get_default_namespace()
13801409

13811410
def add_namespace(self, namespace_or_prefix, uri=None):
1382-
if self._bundle is not None: # This is a bundle
1383-
logger.warn("Namespace cannot be added into a bundle. It will be added to the document instead.")
1384-
13851411
if uri is None:
13861412
self._namespaces.add_namespace(namespace_or_prefix)
13871413
else:
@@ -1398,10 +1424,12 @@ def get_anon_id(self, record):
13981424
return self._namespaces.get_anonymous_identifier()
13991425

14001426
def get_records(self, class_or_type_or_tuple=None):
1401-
if class_or_type_or_tuple is None:
1402-
return self._records
1427+
# Only returning asserted records
1428+
results = [rec for rec in self._records if rec.is_asserted()]
1429+
if class_or_type_or_tuple:
1430+
return filter(lambda rec: isinstance(rec, class_or_type_or_tuple), results)
14031431
else:
1404-
return filter(lambda rec: isinstance(rec, class_or_type_or_tuple), self._records)
1432+
return results
14051433

14061434
def get_record(self, identifier):
14071435
if identifier is None:
@@ -1453,22 +1481,21 @@ def _encode_json_representation(self, value):
14531481
return value
14541482

14551483
def _decode_json_representation(self, literal):
1456-
try:
1484+
if isinstance(literal, dict):
1485+
# complex type
14571486
value = literal['$']
1458-
if 'lang' in literal:
1459-
return Literal(value, langtag=literal['lang'])
1487+
datatype = literal['type'] if 'type' in literal else None
1488+
langtag = literal['lang'] if 'lang' in literal else None
1489+
if datatype == u'xsd:anyURI':
1490+
return Identifier(value)
1491+
elif datatype == u'xsd:QName':
1492+
return self.valid_identifier(value)
14601493
else:
1461-
datatype = literal['type']
1462-
if datatype == u'xsd:anyURI':
1463-
return Identifier(value)
1464-
elif datatype == u'xsd:QName':
1465-
return self.valid_identifier(value)
1466-
else:
1467-
# The literal of standard Python types is not converted here
1468-
# It will be automatically converted when added to a record by _auto_literal_conversion()
1469-
return Literal(value, self.valid_identifier(datatype))
1470-
except:
1471-
# simple type, just return it
1494+
# The literal of standard Python types is not converted here
1495+
# It will be automatically converted when added to a record by _auto_literal_conversion()
1496+
return Literal(value, self.valid_identifier(datatype), langtag)
1497+
else:
1498+
# simple type, just return it
14721499
return literal
14731500

14741501
def _encode_JSON_container(self):
@@ -1701,12 +1728,10 @@ def get_flattened(self):
17011728
return document
17021729

17031730
def __eq__(self, other):
1704-
try:
1705-
other_records = set(other._records)
1706-
except:
1707-
# other is not a bundle
1731+
if not isinstance(other, ProvBundle):
17081732
return False
1709-
this_records = set(self._records)
1733+
other_records = set(other.get_records())
1734+
this_records = set(self.get_records())
17101735
if len(this_records) != len(other_records):
17111736
return False
17121737
# check if all records for equality
@@ -1721,12 +1746,12 @@ def __eq__(self, other):
17211746
other_records.remove(record_b)
17221747
continue
17231748
else:
1724-
logger.debug("Unequal PROV records:")
1725-
logger.debug("%s" % unicode(record_a))
1726-
logger.debug("%s" % unicode(record_b))
1749+
logger.debug("Equality (ProvBundle): Unequal PROV records:")
1750+
logger.debug("%s", unicode(record_a))
1751+
logger.debug("%s", unicode(record_b))
17271752
return False
17281753
else:
1729-
logger.debug("Could not find a record with this identifier: %s" % unicode(record_a._identifier))
1754+
logger.debug("Equality (ProvBundle): Could not find a record with this identifier: %s", unicode(record_a._identifier))
17301755
return False
17311756
else:
17321757
# Manually look for the record
@@ -1737,7 +1762,7 @@ def __eq__(self, other):
17371762
found = True
17381763
break
17391764
if not found:
1740-
logger.debug("Could not find this record: %s" % unicode(record_a))
1765+
logger.debug("Equality (ProvBundle): Could not find this record: %s", unicode(record_a))
17411766
return False
17421767
return True
17431768

nipype/utils/provenance.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -214,15 +214,15 @@ def prov_encode(graph, value, create_container=True):
214214
encoded_literal = safe_encode(value)
215215
attr = {pm.PROV['value']: encoded_literal}
216216
if isinstance(value, basestring) and os.path.exists(value):
217-
attr.update({pm.PROV['Location']: encoded_literal})
217+
attr.update({pm.PROV['location']: encoded_literal})
218218
if not os.path.isdir(value):
219219
sha512 = hash_infile(value, crypto=hashlib.sha512)
220220
attr.update({crypto['sha512']: pm.Literal(sha512,
221221
pm.XSD['string'])})
222-
id = get_attr_id(attr, skip=[pm.PROV['Location'],
222+
id = get_attr_id(attr, skip=[pm.PROV['location'],
223223
pm.PROV['value']])
224224
else:
225-
id = get_attr_id(attr, skip=[pm.PROV['Location']])
225+
id = get_attr_id(attr, skip=[pm.PROV['location']])
226226
else:
227227
id = get_attr_id(attr)
228228
entity = graph.entity(id, attr)

0 commit comments

Comments
 (0)