Skip to content

Commit 8da968e

Browse files
committed
Add generic odML parser for XML, YAML, JSON
1 parent e4cfdf0 commit 8da968e

File tree

2 files changed

+385
-0
lines changed

2 files changed

+385
-0
lines changed

odml/tools/odmlparser.py

Lines changed: 303 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,303 @@
1+
#!/usr/bin/env python
2+
"""
3+
4+
A generic odML parsing module.
5+
6+
Parses odML files and documents.
7+
8+
"""
9+
10+
import yaml
11+
import json
12+
from odml import format
13+
from . import xmlparser
14+
15+
# FIX ME: Version should not be hardcoded here. Import from odML module after
16+
# fixing the circular imports issue.
17+
odml_version = '1'
18+
19+
allowed_parsers = ['ODML', 'XML', 'YAML', 'JSON']
20+
21+
22+
class ODMLWriter:
23+
'''
24+
A generic odML document writer, for XML, YAML and JSON.
25+
26+
Usage:
27+
xml_writer = ODMLWriter(parser='XML')
28+
xml_writer.write_file(odml_document, filepath)
29+
'''
30+
31+
def __init__(self, parser='XML'):
32+
self.doc = None # odML document
33+
self.parsed_doc = None # Python dictionary object equivalent
34+
parser = parser.upper()
35+
if parser not in allowed_parsers:
36+
raise NotImplementedError("'%s' odML parser does not exist!" % parser)
37+
self.parser = parser
38+
39+
def to_dict(self, odml_document):
40+
parsed_doc = {}
41+
42+
for i in format.Document._args:
43+
attr = i
44+
if i in format.Document._map:
45+
attr = format.Document._map[i]
46+
if hasattr(odml_document, attr):
47+
if attr == 'sections':
48+
sections = self.get_sections(odml_document.sections)
49+
parsed_doc[attr] = sections
50+
else:
51+
t = getattr(odml_document, attr)
52+
if t:
53+
parsed_doc[attr] = t
54+
55+
self.parsed_doc = parsed_doc
56+
57+
def get_sections(self, section_list):
58+
59+
section_seq = []
60+
61+
for section in section_list:
62+
section_dict = {}
63+
for i in format.Section._args:
64+
attr = i
65+
if i in format.Section._map:
66+
attr = format.Section._map[i]
67+
if hasattr(section, attr):
68+
if attr == 'properties':
69+
properties = self.get_properties(section.properties)
70+
section_dict[attr] = properties
71+
elif attr == 'sections':
72+
sections = self.get_sections(section.sections)
73+
section_dict[attr] = sections
74+
else:
75+
t = getattr(section, attr)
76+
if t:
77+
section_dict[attr] = t
78+
79+
section_seq.append(section_dict)
80+
81+
return section_seq
82+
83+
def get_properties(self, props_list):
84+
85+
props_seq = []
86+
87+
for prop in props_list:
88+
prop_dict = {}
89+
for i in format.Property._args:
90+
attr = i
91+
if i in format.Property._map:
92+
attr = format.Property._map[i]
93+
if hasattr(prop, attr):
94+
if attr == 'values':
95+
values = self.get_values(prop.values)
96+
prop_dict[attr] = values
97+
else:
98+
t = getattr(prop, attr)
99+
if (t == []) or t: # Even if 'value' is empty, allow '[]'
100+
prop_dict[attr] = t
101+
102+
props_seq.append(prop_dict)
103+
104+
return props_seq
105+
106+
def get_values(self, value_list):
107+
value_seq = []
108+
109+
for value in value_list:
110+
val_dict = {}
111+
for i in format.Value._args:
112+
attr = i
113+
if i in format.Value._map:
114+
attr = format.Value._map[i]
115+
if hasattr(value, attr):
116+
t = getattr(value, attr)
117+
if t:
118+
val_dict[attr] = t
119+
120+
value_seq.append(val_dict)
121+
122+
return value_seq
123+
124+
def write_file(self, odml_document, filename):
125+
126+
if self.parser == 'XML' or self.parser == 'ODML':
127+
xmlparser.XMLWriter(odml_document).write_file(filename)
128+
else:
129+
self.to_dict(odml_document)
130+
odml_output = {}
131+
odml_output['Document'] = self.parsed_doc
132+
odml_output['odml-version'] = odml_version
133+
134+
f = open(filename, 'w')
135+
if self.parser == 'YAML':
136+
f.write(yaml.dump(odml_output, default_flow_style=False))
137+
elif self.parser == 'JSON':
138+
f.write(json.dumps(odml_output, indent=4))
139+
f.close()
140+
141+
142+
class ODMLReader:
143+
"""
144+
A reader to parse odML files or strings into odml documents,
145+
based on the given data exchange format, like XML, YAML or JSON.
146+
147+
Usage:
148+
yaml_odml_doc = ODMLReader(parser='YAML').fromFile(open("odml_doc.yaml"))
149+
json_odml_doc = ODMLReader(parser='JSON').fromFile(open("odml_doc.json"))
150+
"""
151+
152+
def __init__(self, parser='XML'):
153+
self.odml_version = None # odML version of input file
154+
self.doc = None # odML document
155+
self.parsed_doc = None # Python dictionary object equivalent
156+
parser = parser.upper()
157+
if parser not in allowed_parsers:
158+
raise NotImplementedError("'%s' odML parser does not exist!" % parser)
159+
self.parser = parser
160+
161+
def is_valid_attribute(self, attr, fmt):
162+
if attr in fmt._args:
163+
return attr
164+
if fmt.revmap(attr):
165+
return attr
166+
print("Invalid element <%s> inside <%s> tag" % (attr, fmt.__class__.__name__))
167+
return None
168+
169+
def to_odml(self):
170+
171+
self.odml_version = self.parsed_doc.get('odml-version', odml_version)
172+
self.parsed_doc = self.parsed_doc['Document']
173+
174+
doc_attrs = {}
175+
doc_secs = []
176+
177+
for i in self.parsed_doc:
178+
attr = self.is_valid_attribute(i, format.Document)
179+
if attr == 'sections':
180+
doc_secs = self.parse_sections(self.parsed_doc['sections'])
181+
elif attr:
182+
doc_attrs[i] = self.parsed_doc[i]
183+
184+
doc = format.Document.create(**doc_attrs)
185+
for sec in doc_secs:
186+
doc.append(sec)
187+
self.doc = doc
188+
return self.doc
189+
190+
def parse_sections(self, section_list):
191+
192+
odml_sections = []
193+
194+
for section in section_list:
195+
sec_attrs = {}
196+
children_secs = []
197+
sec_props = []
198+
for i in section:
199+
attr = self.is_valid_attribute(i, format.Section)
200+
if attr == 'properties':
201+
sec_props = self.parse_properties(section['properties'])
202+
elif attr == 'sections':
203+
children_secs = self.parse_sections(section['sections'])
204+
elif attr:
205+
sec_attrs[attr] = section[attr]
206+
207+
sec = format.Section.create(**sec_attrs)
208+
for prop in sec_props:
209+
sec.append(prop)
210+
for child_sec in children_secs:
211+
sec.append(child_sec)
212+
odml_sections.append(sec)
213+
214+
return odml_sections
215+
216+
217+
def parse_properties(self, props_list):
218+
odml_props = []
219+
220+
for _property in props_list:
221+
prop_attrs = {}
222+
223+
for i in _property:
224+
attr = self.is_valid_attribute(i, format.Property)
225+
if attr == 'values':
226+
# prop_attrs['value'] = self.parse_values(_property['values'])
227+
children_values = self.parse_values(_property['values'])
228+
elif attr:
229+
prop_attrs[attr] = _property[attr]
230+
231+
prop = format.Property.create(value=children_values, **prop_attrs)
232+
odml_props.append(prop)
233+
234+
return odml_props
235+
236+
def parse_values(self, value_list):
237+
odml_values = []
238+
239+
for value in value_list:
240+
value_attrs = {}
241+
242+
for i in value:
243+
attr = self.is_valid_attribute(i, format.Value)
244+
if attr:
245+
if attr == 'value':
246+
value_attrs[attr] = str(value[attr])
247+
else:
248+
value_attrs[attr] = str(value[attr])
249+
250+
val = format.Value.create(**value_attrs)
251+
odml_values.append(val)
252+
253+
return odml_values
254+
255+
256+
def fromFile(self, file):
257+
258+
if self.parser == 'XML' or self.parser == 'ODML':
259+
odml_doc = xmlparser.XMLReader().fromFile(file)
260+
self.doc = odml_doc
261+
return odml_doc
262+
263+
elif self.parser == 'YAML':
264+
try:
265+
self.parsed_doc = yaml.load(file)
266+
except yaml.parser.ParserError as e:
267+
print(e)
268+
return
269+
finally:
270+
file.close()
271+
return self.to_odml()
272+
273+
elif self.parser == 'JSON':
274+
try:
275+
self.parsed_doc = json.load(file)
276+
except json.decoder.JSONDecodeError as e:
277+
print(e)
278+
return
279+
finally:
280+
file.close()
281+
return self.to_odml()
282+
283+
284+
def fromString(self, string):
285+
286+
if self.parser == 'XML' or self.parser == 'ODML':
287+
odml_doc = xmlparser.XMLReader().fromString(string)
288+
self.doc = odml_doc
289+
return self.doc
290+
elif self.parser == 'YAML':
291+
try:
292+
odml_doc = yaml.load(string)
293+
except yaml.parser.ParserError as e:
294+
print(e)
295+
return
296+
return self.to_odml()
297+
elif self.parser == 'JSON':
298+
try:
299+
odml_doc = json.loads(string)
300+
except json.decoder.JSONDecodeError as e:
301+
print(e)
302+
return
303+
return self.to_odml()

test/test_parser.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
import unittest
2+
import os
3+
from odml.tools import odmlparser
4+
5+
6+
class TestParser(unittest.TestCase):
7+
8+
def setUp(self):
9+
self.basepath = 'doc/example_odMLs/'
10+
self.basefile = 'doc/example_odMLs/THGTTG.odml'
11+
12+
self.xml_file = 'doc/example_odMLs/THGTTG_xml.odml'
13+
self.yaml_file = 'doc/example_odMLs/THGTTG_yaml.odml'
14+
self.json_file = 'doc/example_odMLs/THGTTG_json.odml'
15+
16+
self.xml_reader = odmlparser.ODMLReader(parser='XML')
17+
self.yaml_reader = odmlparser.ODMLReader(parser='YAML')
18+
self.json_reader = odmlparser.ODMLReader(parser='JSON')
19+
20+
self.xml_writer = odmlparser.ODMLWriter(parser='XML')
21+
self.yaml_writer = odmlparser.ODMLWriter(parser='YAML')
22+
self.json_writer = odmlparser.ODMLWriter(parser='JSON')
23+
24+
self.odml_doc = self.xml_reader.fromFile(self.basefile)
25+
26+
27+
def tearDown(self):
28+
if os.path.exists(self.xml_file):
29+
os.remove(self.xml_file)
30+
31+
if os.path.exists(self.yaml_file):
32+
os.remove(self.yaml_file)
33+
34+
if os.path.exists(self.json_file):
35+
os.remove(self.json_file)
36+
37+
38+
def test_xml(self):
39+
40+
self.xml_writer.write_file(self.odml_doc, self.xml_file)
41+
xml_doc = self.xml_reader.fromFile(open(self.xml_file))
42+
43+
self.assertEqual(xml_doc, self.odml_doc)
44+
45+
def test_yaml(self):
46+
47+
self.yaml_writer.write_file(self.odml_doc, self.yaml_file)
48+
yaml_doc = self.yaml_reader.fromFile(open(self.yaml_file))
49+
50+
self.assertEqual(yaml_doc, self.odml_doc)
51+
52+
53+
def test_json(self):
54+
55+
self.json_writer.write_file(self.odml_doc, self.json_file)
56+
json_doc = self.json_reader.fromFile(open(self.json_file))
57+
58+
self.assertEqual(json_doc, self.odml_doc)
59+
60+
61+
def test_json_yaml_xml(self):
62+
63+
self.json_writer.write_file(self.odml_doc, self.json_file)
64+
json_doc = self.json_reader.fromFile(open(self.json_file))
65+
66+
self.yaml_writer.write_file(json_doc, self.yaml_file)
67+
yaml_doc = self.yaml_reader.fromFile(open(self.yaml_file))
68+
69+
self.xml_writer.write_file(yaml_doc, self.xml_file)
70+
xml_doc = self.xml_reader.fromFile(open(self.xml_file))
71+
72+
self.assertEqual(json_doc, self.odml_doc)
73+
self.assertEqual(json_doc, yaml_doc)
74+
self.assertEqual(json_doc, xml_doc)
75+
76+
self.assertEqual(yaml_doc, self.odml_doc)
77+
self.assertEqual(yaml_doc, xml_doc)
78+
self.assertEqual(yaml_doc, json_doc)
79+
80+
self.assertEqual(xml_doc, self.odml_doc)
81+
self.assertEqual(xml_doc, json_doc)
82+
self.assertEqual(xml_doc, yaml_doc)

0 commit comments

Comments
 (0)