Skip to content

Commit bdf3ea1

Browse files
committed
fix: updated prov model
1 parent cba8e41 commit bdf3ea1

File tree

1 file changed

+76
-53
lines changed

1 file changed

+76
-53
lines changed

nipype/external/provcopy.py

Lines changed: 76 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,13 @@ def get_anon_id(self, obj, local_prefix="id"):
262262
class Literal(object):
263263
def __init__(self, value, datatype=None, langtag=None):
264264
self._value = value
265+
if langtag:
266+
if datatype is None:
267+
logger.debug('Assuming prov:InternationalizedString as the type of "%s"@%s' % (value, langtag))
268+
datatype = PROV["InternationalizedString"]
269+
elif datatype != PROV["InternationalizedString"]:
270+
logger.warn('Invalid data type (%s) for "%s"@%s, overridden as prov:InternationalizedString.' % (value, langtag))
271+
datatype = PROV["InternationalizedString"]
265272
self._datatype = datatype
266273
self._langtag = langtag
267274

@@ -291,14 +298,14 @@ def has_no_langtag(self):
291298

292299
def provn_representation(self):
293300
if self._langtag:
294-
# a langtag can only goes with string
301+
# a language tag can only go with prov:InternationalizedString
295302
return u'%s@%s' % (_ensure_multiline_string_triple_quoted(self._value), unicode(self._langtag))
296303
else:
297304
return u'%s %%%% %s' % (_ensure_multiline_string_triple_quoted(self._value), unicode(self._datatype))
298305

299306
def json_representation(self):
300307
if self._langtag:
301-
# a langtag can only goes with string
308+
# a language tag can only go with prov:InternationalizedString
302309
return {'$': unicode(self._value), 'lang': self._langtag}
303310
else:
304311
if isinstance(self._datatype, QName):
@@ -424,6 +431,9 @@ def __init__(self, record_type, attribute_id):
424431
self.attribute_id = attribute_id
425432
self.args += (PROV_N_MAP[record_type], attribute_id)
426433

434+
def __str__(self):
435+
return 'Missing the required attribute "%s" in %s' % (PROV_ID_ATTRIBUTES_MAP[self.attribute_id], PROV_N_MAP[self.record_type])
436+
427437

428438
class ProvExceptionNotValidAttribute(ProvException):
429439
def __init__(self, record_type, attribute, attribute_types):
@@ -432,6 +442,9 @@ def __init__(self, record_type, attribute, attribute_types):
432442
self.attribute_types = attribute_types
433443
self.args += (PROV_N_MAP[record_type], unicode(attribute), attribute_types)
434444

445+
def __str__(self):
446+
return 'Invalid attribute value: %s. %s expected' % (self.attribute, self.attribute_types)
447+
435448

436449
class ProvExceptionCannotUnifyAttribute(ProvException):
437450
def __init__(self, identifier, record_type1, record_type2):
@@ -440,6 +453,9 @@ def __init__(self, identifier, record_type1, record_type2):
440453
self.record_type2 = record_type2
441454
self.args += (identifier, PROV_N_MAP[record_type1], PROV_N_MAP[record_type2])
442455

456+
def __str__(self):
457+
return 'Cannot unify two records of type %s and %s with same identifier (%s)' % (self.identifier, PROV_N_MAP[self.record_type1], PROV_N_MAP[self.record_type2])
458+
443459

444460
class ProvExceptionContraint(ProvException):
445461
def __init__(self, record_type, attribute1, attribute2, msg):
@@ -517,12 +533,10 @@ def get_value(self):
517533
def _auto_literal_conversion(self, literal):
518534
'''This method normalise datatype for literals
519535
'''
536+
if isinstance(literal, URIRef):
537+
return literal
538+
520539
if isinstance(literal, basestring):
521-
# try if this is a QName
522-
qname = self._bundle.valid_identifier(literal)
523-
if isinstance(qname, QName):
524-
return qname
525-
# if not a QName, convert all strings to unicode
526540
return unicode(literal)
527541

528542
if isinstance(literal, Literal) and literal.has_no_langtag():
@@ -539,7 +553,9 @@ def parse_extra_attributes(self, extra_attributes):
539553
if isinstance(extra_attributes, dict):
540554
# Converting the dictionary into a list of tuples (i.e. attribute-value pairs)
541555
extra_attributes = extra_attributes.items()
542-
attr_set = set((self._bundle.valid_identifier(attribute), self._auto_literal_conversion(value)) for attribute, value in extra_attributes)
556+
attr_set = set((self._bundle.valid_identifier(attribute),
557+
self._auto_literal_conversion(value))
558+
for attribute, value in extra_attributes)
543559
return attr_set
544560

545561
def add_extra_attributes(self, extra_attributes):
@@ -653,7 +669,7 @@ def optional_attribute(self, attributes, attribute_id, attribute_types):
653669
return self._validate_attribute(attribute, attribute_types)
654670

655671
def __eq__(self, other):
656-
if self.__class__ != other.__class__:
672+
if self.get_prov_type() != other.get_prov_type():
657673
return False
658674
if self._identifier and not (self._identifier == other._identifier):
659675
return False
@@ -682,6 +698,13 @@ def __eq__(self, other):
682698
sattr = sorted(self._extra_attributes, key=_normalise_attributes) if self._extra_attributes else None
683699
oattr = sorted(other._extra_attributes, key=_normalise_attributes) if other._extra_attributes else None
684700
if sattr != oattr:
701+
if logger.isEnabledFor(logging.DEBUG):
702+
for spair, opair in zip(sattr, oattr):
703+
# Log the first unequal pair of attributes
704+
if spair != opair:
705+
logger.debug("Equality (ProvRecord): unequal attribute-value pairs - %s = %s - %s = %s",
706+
spair[0], spair[1], opair[0], opair[1])
707+
break
685708
return False
686709
return True
687710

@@ -740,17 +763,26 @@ def rdf(self, graph=None, subj=None):
740763
graph.add((subj, pred, obj))
741764
if self._extra_attributes:
742765
for (attr, value) in self._extra_attributes:
766+
try:
767+
# try if there is a RDF representation defined
768+
obj = value.rdf_representation()
769+
except Exception, e:
770+
obj = RDFLiteral(value)
771+
if attr == PROV['location']:
772+
pred = PROV['atLocation'].rdf_representation()
773+
if isinstance(value, URIRef):
774+
graph.add((subj, pred, value))
775+
graph.add((value, RDF.type,
776+
PROV['Location'].rdf_representation()))
777+
else:
778+
graph.add((subj, pred, obj))
779+
continue
743780
if attr == PROV['type']:
744781
pred = RDF.type
745782
elif attr == PROV['label']:
746783
pred = RDFS.label
747784
else:
748785
pred = attr.rdf_representation()
749-
try:
750-
# try if there is a RDF representation defined
751-
obj = value.rdf_representation()
752-
except Exception, e:
753-
obj = RDFLiteral(value)
754786
graph.add((subj, pred, obj))
755787
return graph
756788

@@ -853,8 +885,7 @@ def add_attributes(self, attributes, extra_attributes):
853885
startTime = self.optional_attribute(attributes, PROV_ATTR_STARTTIME, datetime.datetime)
854886
endTime = self.optional_attribute(attributes, PROV_ATTR_ENDTIME, datetime.datetime)
855887
if startTime and endTime and startTime > endTime:
856-
# TODO Raise logic exception here
857-
pass
888+
raise ValueError('StartTime %s > EndTime %s' % (startTime, endTime))
858889
attributes = OrderedDict()
859890
attributes[PROV_ATTR_STARTTIME] = startTime
860891
attributes[PROV_ATTR_ENDTIME] = endTime
@@ -1337,8 +1368,8 @@ def get_valid_identifier(self, identifier):
13371368
# create and return an identifier in the default namespace
13381369
return self._default[identifier]
13391370
else:
1340-
# TODO Should an exception raised here
1341-
return Identifier(identifier)
1371+
# This is not an identifier
1372+
return None
13421373

13431374
def get_anonymous_identifier(self, local_prefix='id'):
13441375
self._anon_id_count += 1
@@ -1362,11 +1393,7 @@ def __init__(self, bundle=None, identifier=None, attributes=None, other_attribut
13621393
self._records = list()
13631394
self._id_map = dict()
13641395
self._bundles = dict()
1365-
if bundle is None:
1366-
self._namespaces = NamespaceManager(namespaces)
1367-
else:
1368-
self._namespaces = bundle._namespaces
1369-
self._namespaces.add_namespaces(namespaces)
1396+
self._namespaces = NamespaceManager(namespaces, parent=(bundle._namespaces if bundle is not None else None))
13701397

13711398
# Initializing record-specific attributes
13721399
super(ProvBundle, self).__init__(bundle, identifier, attributes, other_attributes, asserted)
@@ -1379,9 +1406,6 @@ def get_default_namespace(self):
13791406
return self._namespaces.get_default_namespace()
13801407

13811408
def add_namespace(self, namespace_or_prefix, uri=None):
1382-
if self._bundle is not None: # This is a bundle
1383-
logger.warn("Namespace cannot be added into a bundle. It will be added to the document instead.")
1384-
13851409
if uri is None:
13861410
self._namespaces.add_namespace(namespace_or_prefix)
13871411
else:
@@ -1398,10 +1422,12 @@ def get_anon_id(self, record):
13981422
return self._namespaces.get_anonymous_identifier()
13991423

14001424
def get_records(self, class_or_type_or_tuple=None):
1401-
if class_or_type_or_tuple is None:
1402-
return self._records
1425+
# Only returning asserted records
1426+
results = [rec for rec in self._records if rec.is_asserted()]
1427+
if class_or_type_or_tuple:
1428+
return filter(lambda rec: isinstance(rec, class_or_type_or_tuple), results)
14031429
else:
1404-
return filter(lambda rec: isinstance(rec, class_or_type_or_tuple), self._records)
1430+
return results
14051431

14061432
def get_record(self, identifier):
14071433
if identifier is None:
@@ -1453,22 +1479,21 @@ def _encode_json_representation(self, value):
14531479
return value
14541480

14551481
def _decode_json_representation(self, literal):
1456-
try:
1482+
if isinstance(literal, dict):
1483+
# complex type
14571484
value = literal['$']
1458-
if 'lang' in literal:
1459-
return Literal(value, langtag=literal['lang'])
1485+
datatype = literal['type'] if 'type' in literal else None
1486+
langtag = literal['lang'] if 'lang' in literal else None
1487+
if datatype == u'xsd:anyURI':
1488+
return Identifier(value)
1489+
elif datatype == u'xsd:QName':
1490+
return self.valid_identifier(value)
14601491
else:
1461-
datatype = literal['type']
1462-
if datatype == u'xsd:anyURI':
1463-
return Identifier(value)
1464-
elif datatype == u'xsd:QName':
1465-
return self.valid_identifier(value)
1466-
else:
1467-
# The literal of standard Python types is not converted here
1468-
# It will be automatically converted when added to a record by _auto_literal_conversion()
1469-
return Literal(value, self.valid_identifier(datatype))
1470-
except:
1471-
# simple type, just return it
1492+
# The literal of standard Python types is not converted here
1493+
# It will be automatically converted when added to a record by _auto_literal_conversion()
1494+
return Literal(value, self.valid_identifier(datatype), langtag)
1495+
else:
1496+
# simple type, just return it
14721497
return literal
14731498

14741499
def _encode_JSON_container(self):
@@ -1701,12 +1726,10 @@ def get_flattened(self):
17011726
return document
17021727

17031728
def __eq__(self, other):
1704-
try:
1705-
other_records = set(other._records)
1706-
except:
1707-
# other is not a bundle
1729+
if not isinstance(other, ProvBundle):
17081730
return False
1709-
this_records = set(self._records)
1731+
other_records = set(other.get_records())
1732+
this_records = set(self.get_records())
17101733
if len(this_records) != len(other_records):
17111734
return False
17121735
# check if all records for equality
@@ -1721,12 +1744,12 @@ def __eq__(self, other):
17211744
other_records.remove(record_b)
17221745
continue
17231746
else:
1724-
logger.debug("Unequal PROV records:")
1725-
logger.debug("%s" % unicode(record_a))
1726-
logger.debug("%s" % unicode(record_b))
1747+
logger.debug("Equality (ProvBundle): Unequal PROV records:")
1748+
logger.debug("%s", unicode(record_a))
1749+
logger.debug("%s", unicode(record_b))
17271750
return False
17281751
else:
1729-
logger.debug("Could not find a record with this identifier: %s" % unicode(record_a._identifier))
1752+
logger.debug("Equality (ProvBundle): Could not find a record with this identifier: %s", unicode(record_a._identifier))
17301753
return False
17311754
else:
17321755
# Manually look for the record
@@ -1737,7 +1760,7 @@ def __eq__(self, other):
17371760
found = True
17381761
break
17391762
if not found:
1740-
logger.debug("Could not find this record: %s" % unicode(record_a))
1763+
logger.debug("Equality (ProvBundle): Could not find this record: %s", unicode(record_a))
17411764
return False
17421765
return True
17431766

0 commit comments

Comments
 (0)