25
25
except ImportError :
26
26
pass
27
27
28
- from copy import deepcopy
28
+ from copy import deepcopy , copy
29
29
30
30
try :
31
31
from collections import OrderedDict
173
173
PROV_ID_ATTRIBUTES_MAP = dict ((prov_id , attribute ) for (prov_id , attribute ) in PROV_RECORD_ATTRIBUTES )
174
174
PROV_ATTRIBUTES_ID_MAP = dict ((attribute , prov_id ) for (prov_id , attribute ) in PROV_RECORD_ATTRIBUTES )
175
175
176
+
177
+ # Converting an attribute to the normal form for comparison purposes
178
+ _normalise_attributes = lambda attr : (unicode (attr [0 ]), unicode (attr [1 ]))
179
+
180
+
181
+ # Datatypes
176
182
_r_xsd_dateTime = re .compile (""" ^
177
183
(?P<year>-?[0-9]{4}) - (?P<month>[0-9]{2}) - (?P<day>[0-9]{2})
178
184
T (?P<hour>[0-9]{2}) : (?P<minute>[0-9]{2}) : (?P<second>[0-9]{2})
181
187
Z | (?P<tz_hr>[-+][0-9]{2}) : (?P<tz_min>[0-9]{2})
182
188
)?
183
189
$ """ , re .X )
184
- _r_typed_literal_uri = re .compile (r'^"(?P<value>[^"\\]*(?:\\.[^"\\]*)*)"\^\^<(?P<datatype>[^>\\]*(?:\\.[^>\\]*)*)>$' , re .X )
185
- _r_typed_literal_qname = re .compile (r'^"(?P<value>[^"\\]*(?:\\.[^"\\]*)*)"\^\^(?P<datatype>[^>\\]*(?:\\.[^>\\]*)*)$' , re .X )
186
190
187
191
attr2rdf = lambda attr : PROV [PROV_ID_ATTRIBUTES_MAP [attr ].split ('prov:' )[1 ]].rdf_representation ()
188
192
189
- # Converting an attribute to the normal form for comparison purposes
190
- _normalise_attributes = lambda attr : (unicode (attr [0 ]), unicode (attr [1 ]))
191
-
192
- # Datatypes
193
193
def _parse_xsd_dateTime (s ):
194
194
"""Returns datetime or None."""
195
195
m = _r_xsd_dateTime .match (s )
196
196
if m is not None :
197
197
values = m .groupdict ()
198
- if values ["microsecond" ] is None :
199
- values ["microsecond" ] = 0
198
+ if values ["microsecond" ] is None :
199
+ values ["microsecond" ] = 0
200
+ else :
201
+ values ["microsecond" ] = values ["microsecond" ][1 :]
202
+ values ["microsecond" ] += "0" * (6 - len (values ["microsecond" ]))
203
+ values = dict ((k , int (v )) for k , v in values .iteritems () if not k .startswith ("tz" ))
204
+ return datetime .datetime (** values )
200
205
else :
201
- values ["microsecond" ] = values ["microsecond" ][1 :]
202
- values ["microsecond" ] += "0" * (6 - len (values ["microsecond" ]))
203
- values = dict ((k , int (v )) for k , v in values .iteritems ()
204
- if not k .startswith ("tz" ))
205
-
206
- return datetime .datetime (** values )
206
+ return None
207
207
208
208
209
209
def _ensure_datetime (time ):
@@ -251,9 +251,16 @@ def parse_xsd_types(value, datatype):
251
251
return XSD_DATATYPE_PARSERS [datatype ](value ) if datatype in XSD_DATATYPE_PARSERS else None
252
252
253
253
254
+ def _ensure_multiline_string_triple_quoted (s ):
255
+ format_str = '%s'
256
+ if isinstance (s , basestring ):
257
+ format_str = u'"""%s"""' if '\n ' in s else u'"%s"'
258
+ return format_str % s
259
+
260
+
254
261
def encoding_PROV_N_value (value ):
255
262
if isinstance (value , basestring ):
256
- return '"%s"' % value
263
+ return _ensure_multiline_string_triple_quoted ( value )
257
264
elif isinstance (value , datetime .datetime ):
258
265
return value .isoformat ()
259
266
elif isinstance (value , float ):
@@ -304,9 +311,9 @@ def has_no_langtag(self):
304
311
def provn_representation (self ):
305
312
if self ._langtag :
306
313
# a langtag can only goes with string
307
- return u'"%s" @%s' % (unicode (self ._value ), unicode (self ._langtag ))
314
+ return u'%s @%s' % (_ensure_multiline_string_triple_quoted (self ._value ), unicode (self ._langtag ))
308
315
else :
309
- return u'"%s" %%%% %s' % (unicode (self ._value ), unicode (self ._datatype ))
316
+ return u'%s %%%% %s' % (_ensure_multiline_string_triple_quoted (self ._value ), unicode (self ._datatype ))
310
317
311
318
def json_representation (self ):
312
319
if self ._langtag :
@@ -494,7 +501,7 @@ def add_asserted_type(self, type_identifier):
494
501
if type_identifier not in asserted_types :
495
502
if self ._extra_attributes is None :
496
503
self ._extra_attributes = set ()
497
- self ._extra_attributes .update ( set ([( PROV ['type' ], type_identifier )] ))
504
+ self ._extra_attributes .add (( PROV ['type' ], type_identifier ))
498
505
499
506
def get_attribute (self , attr_name ):
500
507
if not self ._extra_attributes :
@@ -542,19 +549,18 @@ def _auto_literal_conversion(self, literal):
542
549
543
550
def parse_extra_attributes (self , extra_attributes ):
544
551
if isinstance (extra_attributes , dict ):
545
- # This will only work if extra_attributes is a dictionary
546
552
# Converting the dictionary into a list of tuples (i.e. attribute-value pairs)
547
553
extra_attributes = extra_attributes .items ()
548
- attr_list = set ((self ._bundle .valid_identifier (attribute ), self ._auto_literal_conversion (value )) for attribute , value in extra_attributes )
549
- return attr_list
554
+ attr_set = set ((self ._bundle .valid_identifier (attribute ), self ._auto_literal_conversion (value )) for attribute , value in extra_attributes )
555
+ return attr_set
550
556
551
557
def add_extra_attributes (self , extra_attributes ):
552
558
if extra_attributes :
553
559
if self ._extra_attributes is None :
554
560
self ._extra_attributes = set ()
555
- attr_list = self .parse_extra_attributes (extra_attributes )
556
561
# Check attributes for valid qualified names
557
- self ._extra_attributes .update (attr_list )
562
+ attr_set = self .parse_extra_attributes (extra_attributes )
563
+ self ._extra_attributes .update (attr_set )
558
564
559
565
def add_attributes (self , attributes , extra_attributes ):
560
566
if attributes :
@@ -1219,11 +1225,12 @@ def add_attributes(self, attributes, extra_attributes):
1219
1225
1220
1226
# Bundle
1221
1227
class NamespaceManager (dict ):
1222
- def __init__ (self , default_namespaces = {PROV .get_prefix (): PROV , XSD .get_prefix (): XSD }, default = None , parent = None ):
1228
+ def __init__ (self , namespaces = {}, default_namespaces = {PROV .get_prefix (): PROV , XSD .get_prefix (): XSD }, default = None , parent = None ):
1223
1229
self ._default_namespaces = {}
1224
1230
self ._default_namespaces .update (default_namespaces )
1225
- self ._namespaces = {}
1226
1231
self .update (self ._default_namespaces )
1232
+ self ._namespaces = {}
1233
+
1227
1234
if default is not None :
1228
1235
self .set_default_namespace (default )
1229
1236
else :
@@ -1232,6 +1239,7 @@ def __init__(self, default_namespaces={PROV.get_prefix(): PROV, XSD.get_prefix()
1232
1239
# TODO check if default is in the default namespaces
1233
1240
self ._anon_id_count = 0
1234
1241
self ._rename_map = {}
1242
+ self .add_namespaces (namespaces )
1235
1243
1236
1244
def get_namespace (self , uri ):
1237
1245
for namespace in self .values ():
@@ -1267,18 +1275,35 @@ def add_namespace(self, namespace):
1267
1275
namespace = new_namespace
1268
1276
self ._namespaces [prefix ] = namespace
1269
1277
self [prefix ] = namespace
1278
+ return namespace
1279
+
1280
+ def add_namespaces (self , namespaces ):
1281
+ if namespaces :
1282
+ for prefix , uri in namespaces .items ():
1283
+ ns = Namespace (prefix , uri )
1284
+ self .add_namespace (ns )
1270
1285
1271
1286
def get_valid_identifier (self , identifier ):
1272
1287
if not identifier :
1273
1288
return None
1274
1289
if isinstance (identifier , Identifier ):
1275
1290
if isinstance (identifier , QName ):
1276
1291
# Register the namespace if it has not been registered before
1277
- namespace = identifier .get_namespace ()
1278
- if namespace not in self .values ():
1279
- self .add_namespace (namespace )
1280
- # return the original identifier
1281
- return identifier
1292
+ namespace = identifier ._namespace
1293
+ prefix = namespace .get_prefix ()
1294
+ if prefix in self and self [prefix ] == namespace :
1295
+ # No need to add the namespace
1296
+ existing_ns = self [prefix ]
1297
+ if existing_ns is namespace :
1298
+ return identifier
1299
+ else :
1300
+ return existing_ns [identifier ._localpart ] # reuse the existing namespace
1301
+ else :
1302
+ ns = self .add_namespace (deepcopy (namespace )) # Do not reuse the namespace object
1303
+ return ns [identifier ._localpart ]
1304
+ else :
1305
+ # return the original identifier
1306
+ return identifier
1282
1307
elif isinstance (identifier , (str , unicode )):
1283
1308
if identifier .startswith ('_:' ):
1284
1309
return None
@@ -1325,15 +1350,16 @@ def _get_unused_prefix(self, original_prefix):
1325
1350
1326
1351
1327
1352
class ProvBundle (ProvEntity ):
1328
- def __init__ (self , bundle = None , identifier = None , attributes = None , other_attributes = None , asserted = True ):
1353
+ def __init__ (self , bundle = None , identifier = None , attributes = None , other_attributes = None , asserted = True , namespaces = {} ):
1329
1354
# Initializing bundle-specific attributes
1330
1355
self ._records = list ()
1331
1356
self ._id_map = dict ()
1332
1357
self ._bundles = dict ()
1333
1358
if bundle is None :
1334
- self ._namespaces = NamespaceManager ()
1359
+ self ._namespaces = NamespaceManager (namespaces )
1335
1360
else :
1336
1361
self ._namespaces = bundle ._namespaces
1362
+ self ._namespaces .add_namespaces (namespaces )
1337
1363
1338
1364
# Initializing record-specific attributes
1339
1365
super (ProvBundle , self ).__init__ (bundle , identifier , attributes , other_attributes , asserted )
@@ -1631,14 +1657,28 @@ def from_provjson(json_content, **kw):
1631
1657
return json .loads (json_content , cls = ProvBundle .JSONDecoder , ** kw )
1632
1658
1633
1659
def get_flattened (self ):
1634
- flattened = deepcopy (self )
1635
- for bundle in flattened ._bundles .values ():
1660
+ namespaces = dict ((ns .get_prefix (), ns .get_uri ()) for ns in self .get_registered_namespaces ())
1661
+ document = ProvBundle (namespaces = namespaces )
1662
+ default_ns_uri = self .get_default_namespace ()
1663
+ if default_ns_uri is not None :
1664
+ document .set_default_namespace (default_ns_uri )
1665
+ # Enumerate records and bundles
1666
+ bundles = []
1667
+ records = []
1668
+ for record in self .get_records ():
1669
+ if isinstance (record , ProvBundle ):
1670
+ bundles .append (record )
1671
+ else :
1672
+ records .append (record )
1673
+ records = deepcopy (records )
1674
+ for record in records :
1675
+ document ._add_record (record )
1676
+ for bundle in bundles :
1636
1677
for record in bundle ._records :
1637
- flattened .add_record (record .get_type (), record ._identifier , record ._attributes , record ._extra_attributes , record ._asserted )
1638
- flattened ._records .remove (bundle )
1639
-
1640
- flattened ._bundles = {}
1641
- return flattened
1678
+ document .add_record (record .get_type (), copy (record ._identifier ),
1679
+ deepcopy (record ._attributes ), deepcopy (record ._extra_attributes ),
1680
+ record ._asserted )
1681
+ return document
1642
1682
1643
1683
def __eq__ (self , other ):
1644
1684
try :
0 commit comments