Skip to content

Commit dfdf61d

Browse files
authored
Merge pull request #166 from OpenDataServices/cove-777-multilingual
IATI multilingual support
2 parents 4eb82ec + c617d48 commit dfdf61d

File tree

2 files changed

+38
-13
lines changed

2 files changed

+38
-13
lines changed

flattentool/input.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -549,8 +549,9 @@ def unflatten_main_with_parser(parser, line, timezone, xml, id_name):
549549
if current_type and current_type != 'array':
550550
raise ValueError("There is an array at '{}' when the schema says there should be a '{}'".format(path_till_now, current_type))
551551
list_index = int(next_path_item)
552+
current_type = 'array'
552553

553-
if isint(next_path_item) or current_type == 'array':
554+
if current_type == 'array':
554555
list_as_dict = current_path.get(path_item)
555556
if list_as_dict is None:
556557
list_as_dict = ListAsDict()
@@ -564,7 +565,10 @@ def unflatten_main_with_parser(parser, line, timezone, xml, id_name):
564565
new_path = OrderedDict()
565566
list_as_dict[list_index] = new_path
566567
current_path = new_path
567-
continue
568+
if not xml or num < len(path_list)-2:
569+
# In xml "arrays" can have text values, if they're the final element
570+
# This corresponds to a tag with text, but also possibly attributes
571+
continue
568572

569573
## Object
570574
if current_type == 'object' or (not current_type and next_path_item):
@@ -578,7 +582,7 @@ def unflatten_main_with_parser(parser, line, timezone, xml, id_name):
578582
break
579583
current_path = new_path
580584
continue
581-
if current_type and current_type != 'object' and next_path_item:
585+
if current_type and current_type not in ['object', 'array'] and next_path_item:
582586
raise ValueError("There is an object or list at '{}' but it should be an {}".format(path_till_now, current_type))
583587

584588
## Other Types
@@ -607,7 +611,9 @@ def unflatten_main_with_parser(parser, line, timezone, xml, id_name):
607611
if path_item.startswith('@'):
608612
current_path[path_item] = cell
609613
else:
610-
if path_item not in current_path:
614+
if current_type == 'array':
615+
current_path['text()'] = cell
616+
elif path_item not in current_path:
611617
current_path[path_item] = {'text()': cell}
612618
else:
613619
current_path[path_item]['text()'] = cell

flattentool/xml_output.py

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,62 @@
11
try:
22
import lxml.etree as ET
3+
# If we're using lxml we have to do some extra work to support namespaces,
4+
# so we have a variable to check whether we're using lxml:
5+
USING_LXML = True
36
except ImportError:
47
import xml.etree.ElementTree as ET
8+
USING_LXML = False
59
from warnings import warn
610
from flattentool.exceptions import DataErrorWarning
711

812

9-
def child_to_xml(parent_el, tagname, child):
13+
def child_to_xml(parent_el, tagname, child, toplevel=False, nsmap=None):
1014
if hasattr(child, 'items'):
11-
child_el = dict_to_xml(child, tagname)
15+
child_el = dict_to_xml(child, tagname, toplevel=False, nsmap=nsmap)
1216
if child_el is not None:
1317
parent_el.append(child_el)
1418
else:
1519
if tagname.startswith('@'):
20+
if USING_LXML and toplevel and tagname.startswith('@xmlns'):
21+
nsmap[tagname[1:].split(':', 1)[1]] = str(child)
22+
return
1623
try:
17-
parent_el.attrib[tagname[1:]] = str(child)
24+
attr_name = tagname[1:]
25+
if USING_LXML and ':' in attr_name:
26+
attr_name = '{' + nsmap.get(attr_name.split(':', 1)[0], '') + '}' + attr_name.split(':', 1)[1]
27+
parent_el.attrib[attr_name] = str(child)
1828
except ValueError as e:
1929
warn(str(e), DataErrorWarning)
2030
elif tagname == 'text()':
2131
parent_el.text = str(child)
2232
else:
23-
raise('Everything should end with text() or an attirbute!')
33+
raise('Everything should end with text() or an attribute!')
2434

2535

26-
def dict_to_xml(data, tagname):
36+
def dict_to_xml(data, tagname, toplevel=True, nsmap=None):
37+
if USING_LXML and ':' in tagname and not toplevel:
38+
tagname = '{' + nsmap.get(tagname.split(':', 1)[0], '') + '}' + tagname.split(':', 1)[1]
2739
try:
28-
el = ET.Element(tagname)
40+
if USING_LXML:
41+
el = ET.Element(tagname, nsmap=nsmap)
42+
else:
43+
el = ET.Element(tagname)
2944
except ValueError as e:
3045
warn(str(e), DataErrorWarning)
3146
return
3247

3348
for k, v in data.items():
3449
if type(v) == list:
3550
for item in v:
36-
child_to_xml(el, k, item)
51+
child_to_xml(el, k, item, nsmap=nsmap)
3752
else:
38-
child_to_xml(el, k, v)
53+
child_to_xml(el, k, v, toplevel=toplevel, nsmap=nsmap)
3954
return el
4055

4156

4257
def toxml(data):
43-
return ET.tostring(dict_to_xml(data, 'iati-activities'))
58+
nsmap = {
59+
# This is "bound by definition" - see https://www.w3.org/XML/1998/namespace
60+
'xml': 'http://www.w3.org/XML/1998/namespace'
61+
}
62+
return ET.tostring(dict_to_xml(data, 'iati-activities', nsmap=nsmap))

0 commit comments

Comments
 (0)