Merge pull request #166 from OpenDataServices/cove-777-multilingual

Bjwebb · web-flow · commit dfdf61d66f63 · 2017-08-07T11:24:01.000+01:00
IATI multilingual support
diff --git a/flattentool/input.py b/flattentool/input.py
@@ -549,8 +549,9 @@ def unflatten_main_with_parser(parser, line, timezone, xml, id_name):
                 if current_type and current_type != 'array':
                     raise ValueError("There is an array at '{}' when the schema says there should be a '{}'".format(path_till_now, current_type))
                 list_index = int(next_path_item)
+                current_type = 'array'
 
-            if isint(next_path_item) or current_type == 'array':
+            if current_type == 'array':
                 list_as_dict = current_path.get(path_item)
                 if list_as_dict is None:
                     list_as_dict = ListAsDict()
@@ -564,7 +565,10 @@ def unflatten_main_with_parser(parser, line, timezone, xml, id_name):
                     new_path = OrderedDict()
                     list_as_dict[list_index] = new_path
                 current_path = new_path
-                continue
+                if not xml or num < len(path_list)-2:
+                    # In xml "arrays" can have text values, if they're the final element
+                    # This corresponds to a tag with text, but also possibly attributes
+                    continue
 
             ## Object
             if current_type == 'object' or (not current_type and next_path_item):
@@ -578,7 +582,7 @@ def unflatten_main_with_parser(parser, line, timezone, xml, id_name):
                     break
                 current_path = new_path
                 continue
-            if current_type and current_type != 'object' and next_path_item:
+            if current_type and current_type not in ['object', 'array'] and next_path_item:
                 raise ValueError("There is an object or list at '{}' but it should be an {}".format(path_till_now, current_type))
 
             ## Other Types
@@ -607,7 +611,9 @@ def unflatten_main_with_parser(parser, line, timezone, xml, id_name):
                     if path_item.startswith('@'):
                         current_path[path_item] = cell
                     else:
-                        if path_item not in current_path:
+                        if current_type == 'array':
+                            current_path['text()'] = cell
+                        elif path_item not in current_path:
                             current_path[path_item] = {'text()': cell}
                         else:
                             current_path[path_item]['text()'] = cell
diff --git a/flattentool/xml_output.py b/flattentool/xml_output.py
@@ -1,43 +1,62 @@
 try:
     import lxml.etree as ET
+    # If we're using lxml we have to do some extra work to support namespaces,
+    # so we have a variable to check whether we're using lxml:
+    USING_LXML = True
 except ImportError:
     import xml.etree.ElementTree as ET
+    USING_LXML = False
 from warnings import warn
 from flattentool.exceptions import DataErrorWarning
 
 
-def child_to_xml(parent_el, tagname, child):
+def child_to_xml(parent_el, tagname, child, toplevel=False, nsmap=None):
     if hasattr(child, 'items'):
-        child_el = dict_to_xml(child, tagname)
+        child_el = dict_to_xml(child, tagname, toplevel=False, nsmap=nsmap)
         if child_el is not None:
             parent_el.append(child_el)
     else:
         if tagname.startswith('@'):
+            if USING_LXML and toplevel and tagname.startswith('@xmlns'):
+                nsmap[tagname[1:].split(':', 1)[1]] = str(child)
+                return
             try:
-                parent_el.attrib[tagname[1:]] = str(child)
+                attr_name = tagname[1:]
+                if USING_LXML and ':' in attr_name:
+                    attr_name = '{' + nsmap.get(attr_name.split(':', 1)[0], '') + '}' + attr_name.split(':', 1)[1]
+                parent_el.attrib[attr_name] = str(child)
             except ValueError as e:
                 warn(str(e), DataErrorWarning)
         elif tagname == 'text()':
             parent_el.text = str(child)
         else:
-            raise('Everything should end with text() or an attirbute!')
+            raise('Everything should end with text() or an attribute!')
 
 
-def dict_to_xml(data, tagname):
+def dict_to_xml(data, tagname, toplevel=True, nsmap=None):
+    if USING_LXML and ':' in tagname and not toplevel:
+        tagname = '{' + nsmap.get(tagname.split(':', 1)[0], '') + '}' + tagname.split(':', 1)[1]
     try:
-        el = ET.Element(tagname)
+        if USING_LXML:
+            el = ET.Element(tagname, nsmap=nsmap)
+        else:
+            el = ET.Element(tagname)
     except ValueError as e:
         warn(str(e), DataErrorWarning)
         return
 
     for k, v in data.items():
         if type(v) == list:
             for item in v:
-                child_to_xml(el, k, item)
+                child_to_xml(el, k, item, nsmap=nsmap)
         else:
-            child_to_xml(el, k, v)
+            child_to_xml(el, k, v, toplevel=toplevel, nsmap=nsmap)
     return el
 
 
 def toxml(data):
-    return ET.tostring(dict_to_xml(data, 'iati-activities'))
+    nsmap = {
+        # This is "bound by definition" - see https://www.w3.org/XML/1998/namespace
+        'xml': 'http://www.w3.org/XML/1998/namespace'
+    }
+    return ET.tostring(dict_to_xml(data, 'iati-activities', nsmap=nsmap))