@@ -73,7 +73,7 @@ def save_element(self, e, node=None):
73
73
fmt = e .format ()
74
74
75
75
if not node :
76
- curr_node = URIRef (odmlns + str (e .id ))
76
+ curr_node = URIRef (odmlns + unicode (e .id ))
77
77
else :
78
78
curr_node = node
79
79
@@ -88,6 +88,11 @@ def save_element(self, e, node=None):
88
88
if isinstance (fmt , Document .__class__ ):
89
89
self .g .add ((self .hub_root , odmlns .hasDocument , curr_node ))
90
90
91
+ # If available add the documents filename to the document node
92
+ # so we can identify where the data came from.
93
+ if hasattr (e , "_origin_file_name" ):
94
+ self .g .add ((curr_node , odmlns .hasFileName , Literal (e ._origin_file_name )))
95
+
91
96
for k in fmt .rdf_map_keys :
92
97
if k == 'id' :
93
98
continue
@@ -101,7 +106,7 @@ def save_element(self, e, node=None):
101
106
self .g .add ((curr_node , fmt .rdf_map (k ), terminology_node ))
102
107
else :
103
108
# adding terminology to the hub and to link with the doc
104
- node = URIRef (odmlns + str (uuid .uuid4 ()))
109
+ node = URIRef (odmlns + unicode (uuid .uuid4 ()))
105
110
self .g .add ((node , RDF .type , URIRef (terminology_url )))
106
111
self .g .add ((self .hub_root , odmlns .hasTerminology , node ))
107
112
self .g .add ((curr_node , fmt .rdf_map (k ), node ))
@@ -111,20 +116,20 @@ def save_element(self, e, node=None):
111
116
k == 'sections' and len (getattr (e , k )) > 0 :
112
117
sections = getattr (e , k )
113
118
for s in sections :
114
- node = URIRef (odmlns + str (s .id ))
119
+ node = URIRef (odmlns + unicode (s .id ))
115
120
self .g .add ((curr_node , fmt .rdf_map (k ), node ))
116
121
self .save_element (s , node )
117
122
elif isinstance (fmt , Section .__class__ ) and \
118
123
k == 'properties' and len (getattr (e , k )) > 0 :
119
124
properties = getattr (e , k )
120
125
for p in properties :
121
- node = URIRef (odmlns + str (p .id ))
126
+ node = URIRef (odmlns + unicode (p .id ))
122
127
self .g .add ((curr_node , fmt .rdf_map (k ), node ))
123
128
self .save_element (p , node )
124
129
elif isinstance (fmt , Property .__class__ ) and \
125
130
k == 'value' and len (getattr (e , k )) > 0 :
126
131
values = getattr (e , k )
127
- seq = URIRef (odmlns + str (uuid .uuid4 ()))
132
+ seq = URIRef (odmlns + unicode (uuid .uuid4 ()))
128
133
self .g .add ((seq , RDF .type , RDF .Seq ))
129
134
self .g .add ((curr_node , fmt .rdf_map (k ), seq ))
130
135
# rdflib so far does not respect RDF:li item order
@@ -133,15 +138,15 @@ def save_element(self, e, node=None):
133
138
# this should be reversed to RDF:li again!
134
139
# see https://github.com/RDFLib/rdflib/issues/280
135
140
# -- keep until supported
136
- # bag = URIRef(odmlns + str (uuid.uuid4()))
141
+ # bag = URIRef(odmlns + unicode (uuid.uuid4()))
137
142
# self.g.add((bag, RDF.type, RDF.Bag))
138
143
# self.g.add((curr_node, fmt.rdf_map(k), bag))
139
144
# for v in values:
140
145
# self.g.add((bag, RDF.li, Literal(v)))
141
146
142
147
counter = 1
143
148
for v in values :
144
- pred = "%s_%s" % (str (RDF ), counter )
149
+ pred = "%s_%s" % (unicode (RDF ), counter )
145
150
self .g .add ((seq , URIRef (pred ), Literal (v )))
146
151
counter = counter + 1
147
152
@@ -222,7 +227,11 @@ def to_odml(self):
222
227
223
228
def from_file (self , filename , doc_format ):
224
229
self .g = Graph ().parse (source = filename , format = doc_format )
225
- return self .to_odml ()
230
+ docs = self .to_odml ()
231
+ for d in docs :
232
+ # Provide original file name via the document
233
+ d ._origin_file_name = os .path .basename (filename )
234
+ return docs
226
235
227
236
def from_string (self , file , doc_format ):
228
237
self .g = Graph ().parse (source = StringIO (file ), format = doc_format )
@@ -242,7 +251,7 @@ def parse_document(self, doc_uri):
242
251
doc_attrs [attr [0 ]] = doc_uri .split ("#" , 1 )[1 ]
243
252
else :
244
253
if len (elems ) > 0 :
245
- doc_attrs [attr [0 ]] = str (elems [0 ].toPython ())
254
+ doc_attrs [attr [0 ]] = unicode (elems [0 ].toPython ())
246
255
247
256
return {'Document' : doc_attrs , 'odml-version' : FORMAT_VERSION }
248
257
@@ -264,7 +273,7 @@ def parse_section(self, sec_uri):
264
273
sec_attrs [attr [0 ]] = sec_uri .split ("#" , 1 )[1 ]
265
274
else :
266
275
if len (elems ) > 0 :
267
- sec_attrs [attr [0 ]] = str (elems [0 ].toPython ())
276
+ sec_attrs [attr [0 ]] = unicode (elems [0 ].toPython ())
268
277
self ._check_mandatory_attrs (sec_attrs )
269
278
return sec_attrs
270
279
@@ -293,7 +302,7 @@ def parse_property(self, prop_uri):
293
302
prop_attrs [attr [0 ]] = prop_uri .split ("#" , 1 )[1 ]
294
303
else :
295
304
if len (elems ) > 0 :
296
- prop_attrs [attr [0 ]] = str (elems [0 ].toPython ())
305
+ prop_attrs [attr [0 ]] = unicode (elems [0 ].toPython ())
297
306
self ._check_mandatory_attrs (prop_attrs )
298
307
return prop_attrs
299
308
0 commit comments