Merge pull request #54 from mqnifestkelvin/feature_xml_3

pgleeson · web-flow · commit 024ea24b617d · 2023-08-15T15:26:50.000+01:00
Feature xml 3
diff --git a/examples/neuroml2/TestNeuroML.xml b/examples/neuroml2/TestNeuroML.xml
@@ -1,5 +1,5 @@
 <?xml version="1.0" ?>
-<neuroml xmlns="http://www.neuroml.org/schema/neuroml2" id="TestNeuroML">
+<neuroml xmlns="http://www.neuroml.org/schema/neuroml2" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" id="TestNeuroML" xsi:schemaLocation="http://www.neuroml.org/schema/neuroml2https://raw.github.com/NeuroML/NeuroML2/development/Schemas/NeuroML2/NeuroML_v2.3.xsd">
     <izhikevich2007Cell id="izh2007RS0" C="100pF" v0="-60mV" k="0.7nS_per_mV" vr="-60mV" vt="-40mV" vpeak="35mV" a="0.03per_ms" b="-2nS" c="-50.0mV" d="100pA"/>
     <pulseGenerator id="pulseGen_0" delay="100ms" duration="800ms" amplitude="0.07 nA"/>
     <network id="IzNet">
diff --git a/examples/neuroml2/neuroml2_spec.py b/examples/neuroml2/neuroml2_spec.py
@@ -109,6 +109,13 @@ class neuroml(Base):
     xmlns: str = field(
         validator=instance_of(str), default="http://www.neuroml.org/schema/neuroml2"
     )
+    xmlns_xsi: str = field(
+        validator=instance_of(str), default="http://www.w3.org/2001/XMLSchema-instance"
+    )
+    xmlns_loc: str = field(
+        validator=instance_of(str),
+        default="http://www.neuroml.org/schema/neuroml2https://raw.github.com/NeuroML/NeuroML2/development/Schemas/NeuroML2/NeuroML_v2.3.xsd",
+    )
 
     izhikevich2007Cells: List[izhikevich2007Cell] = field(factory=list)
     pulseGenerators: List[pulseGenerator] = field(factory=list)
@@ -187,3 +194,8 @@ class neuroml(Base):
         yy = yaml.dump(doc_dict, indent=4, sort_keys=False)
         print(yy)
         d.write(yy)
+
+    from modelspec.utils import load_xml
+
+    new_neuroml = load_xml("hello_world_neuroml.net.nml")
+    print(new_neuroml)
diff --git a/src/modelspec/base_types.py b/src/modelspec/base_types.py
@@ -125,7 +125,6 @@ def to_xml(self) -> str:
             )
         from modelspec.utils import build_xml_element
 
-        # root = ET.Element("modelspec")
         root = build_xml_element(self)
 
         xml_string = ET.tostring(
@@ -170,12 +169,38 @@ def from_bson(cls, bson_str: str) -> "Base":
     @classmethod
     def from_xml(cls, xml_str: str) -> "Base":
         """Instantiate a Base object from an XML string"""
-        from modelspec.utils import element_to_dict, handle_id, convert_values
+        from modelspec.utils import (
+            elementtree_element_to_dict,
+            handle_xml_dict_id,
+            convert_xml_dict_values,
+            process_xml_namespace,
+        )
+        import re
+
+        # When the to_xml() method is used it messes up the string therefore,
+        # it is necessary to convert it into an elementree object then decode into a string.
+        xml_string_a = ET.fromstring(xml_str)
+        xml_string_b = ET.tostring(xml_string_a).decode()
+
+        # while trying to obtain a useable xml structure, using the conversion above it acquires
+        # some unusual string element that sometimes can be incremental from either :ns0 to :nsX or ns0: to nsX:.
+        # Using the regex expression pattern catches it in any form and removes it from the xml string structure.
+        ns_prefix_pattern = r"(ns\d+:|:ns\d+)"
+        cleaned_xml = re.sub(ns_prefix_pattern, "", xml_string_b).strip()
+
+        # For the xml to be useable in modelspec unnecessary string elements which only serve as asthetics for the xml must
+        # be removed when converting to a dict, the process_xml_namespaes function does just that.
+        removed_namespaces = process_xml_namespace(cleaned_xml)
+
+        # process_xml_namespace function returns an elementtree object which can be directly worked upon by the elementtree_element_to_dict
+        # function, this returns a python dictionary
+        data_dict = elementtree_element_to_dict(removed_namespaces)
+
+        # This strips every instance of 'id' from the resulting dictionary structure
+        removed_id = handle_xml_dict_id(data_dict)
 
-        root = ET.fromstring(xml_str)
-        data_dict = element_to_dict(root)
-        removed_id = handle_id(data_dict)
-        converted_to_actual_val = convert_values(removed_id)
+        # XML conversions do not returns exact values, instead all values are returned as a string, this reassigns their actual values
+        converted_to_actual_val = convert_xml_dict_values(removed_id)
 
         return cls.from_dict(converted_to_actual_val)
 
@@ -377,15 +402,38 @@ def from_xml_file(cls, filename: str) -> "Base":
         Returns:
             A modelspec Base for this XML.
         """
-        from modelspec.utils import element_to_dict, handle_id, convert_values
+        from modelspec.utils import (
+            elementtree_element_to_dict,
+            handle_xml_dict_id,
+            convert_xml_dict_values,
+            process_xml_namespace,
+        )
+        import re
 
         with open(filename) as infile:
-            tree = ET.parse(infile)
-            root = tree.getroot()
+            tree = ET.parse(infile)  # Parse the XML file into an ElementTree object
+            root = tree.getroot()  # Get the root element
+
+        # This defines regular expressions to match the namespace patterns to be removed
+        ns_prefix_pattern = r"(ns\d+:|:ns\d+)"
+
+        # Converts the loaded xml into a string and removes unwanted string values ':ns0' to :ns∞ and 'ns0:' to ns∞:
+        # They prevent the xml from loading correctly
+        xml_string = ET.tostring(root).decode()
+        cleaned_xml = re.sub(ns_prefix_pattern, "", xml_string).strip()
+
+        # Removes xmlns, xmlns:xsi and xsi:schemaLocation from the xml structure for conversion
+        # it passes an element tree object to the elementtree_element_to_dict function
+        removed_namespaces = process_xml_namespace(cleaned_xml)
+
+        # Converts the resulting xml stripped of xmlns, xmlns:xsi and xsi:schemaLocation into a dict
+        data_dict = elementtree_element_to_dict(removed_namespaces)
+
+        # Removes every key having 'id' and replaces it with it's value
+        removed_id = handle_xml_dict_id(data_dict)
 
-        data_dict = element_to_dict(root)
-        removed_id = handle_id(data_dict)
-        converted_to_actual_val = convert_values(removed_id)
+        # Values are returned as strings after conversion, this corrects them to their actual values
+        converted_to_actual_val = convert_xml_dict_values(removed_id)
         return cls.from_dict(converted_to_actual_val)
 
     def get_child(self, id: str, type_: str) -> Any:
diff --git a/src/modelspec/utils.py b/src/modelspec/utils.py
@@ -67,19 +67,35 @@ def load_xml(filename: str):
     Args:
         filename: The name of the XML file to load.
     """
+    import re
+
     with open(filename, "rb") as infile:
         tree = ET.parse(infile)  # Parse the XML file into an ElementTree object
         root = tree.getroot()  # Get the root element
 
-    # Convert the ElementTree object to a dictionary
-    data = element_to_dict(root)
-    removed_id = handle_id(data)
-    converted_to_actual_val = convert_values(removed_id)
+    # This defines regular expressions to match the namespace patterns to be removed
+    ns_prefix_pattern = r"(ns\d+:|:ns\d+)"
+
+    # Converts the loaded xml into a string and removes unwanted string values ':ns0' to :ns∞ and 'ns0:' to ns∞:
+    # They prevent the xml from loading correctly
+    xml_string = ET.tostring(root).decode()
+    cleaned_xml = re.sub(ns_prefix_pattern, "", xml_string).strip()
+
+    # Removes xmlns, xmlns:xsi and xsi:schemaLocation from the xml structure for conversion
+    # it passes an element tree object to the elementtree_element_to_dict function
+    removed_namespaces = process_xml_namespace(cleaned_xml)
+
+    # Converts the resulting xml stripped of xmlns, xmlns:xsi and xsi:schemaLocation into a dict
+    data = elementtree_element_to_dict(removed_namespaces)
 
-    return convert_values(converted_to_actual_val)
+    # Removes every key having 'id' and replaces it with it's value
+    removed_id = handle_xml_dict_id(data)
 
+    # Values are returned as strings after conversion, this corrects them to their actual values
+    return convert_xml_dict_values(removed_id)
 
-def element_to_dict(element):
+
+def elementtree_element_to_dict(element):
     """
     This convert an ElementTree element to a dictionary.
 
@@ -94,35 +110,57 @@ def element_to_dict(element):
     if attrs:
         result.update(attrs)
 
+    children_by_tag = {}
     for child_element in element:
-        child_key = child_element.tag
-        child_value = element_to_dict(child_element)
+        child_key = child_element.tag + "s"
+        child_value = elementtree_element_to_dict(child_element)
 
-        if child_key in result:
-            if not isinstance(result[child_key], list):
-                result[child_key] = [result[child_key]]
-            result[child_key].append(child_value)
-        else:
+        # Check if the child element has an 'id' attribute
+        if "id" in child_element.attrib:
+            # If the child element has an 'id', add it to the result dictionary directly
             result[child_key] = child_value
+        else:
+            # If the child element does not have an 'id', represent it as a list
+            children_by_tag.setdefault(child_key, []).append(child_value)
+
+    # Append the lists to the result dictionary
+    result.update(children_by_tag)
 
     return result
 
 
-def handle_id(dictionary):
+def process_xml_namespace(xml_string):
+    # Remove ignored elements from the XML string
+    ignored_elements = [
+        'xmlns="http://www.neuroml.org/schema/neuroml2"',
+        'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"',
+        'xsi:schemaLocation="http://www.neuroml.org/schema/neuroml2 https://raw.github.com/NeuroML/NeuroML2/development/Schemas/NeuroML2/NeuroML_v2.3.xsd"',
+    ]
+
+    # Loops through the xml string and removes every instance of the elements in the list named ignored_elements
+    for ignored_element in ignored_elements:
+        xml_string = xml_string.replace(ignored_element, "").strip()
+
+    # Parse the XML string into an ElementTree
+    root = ET.fromstring(xml_string)
+    return root
+
+
+def handle_xml_dict_id(dictionary):
     if isinstance(dictionary, dict):
         if "id" in dictionary:
             nested_dict = {dictionary["id"]: dictionary.copy()}
             del nested_dict[dictionary["id"]]["id"]
-            return {k: handle_id(v) for k, v in nested_dict.items()}
+            return {k: handle_xml_dict_id(v) for k, v in nested_dict.items()}
         else:
-            return {k: handle_id(v) for k, v in dictionary.items()}
+            return {k: handle_xml_dict_id(v) for k, v in dictionary.items()}
     elif isinstance(dictionary, list):
-        return [handle_id(item) for item in dictionary]
+        return [handle_xml_dict_id(item) for item in dictionary]
     else:
         return dictionary
 
 
-def convert_values(value):
+def convert_xml_dict_values(value):
     """
     This recursively converts values to their actual types.
 
@@ -146,9 +184,9 @@ def convert_values(value):
         elif value.lower() == "none":
             return None
     elif isinstance(value, dict):
-        return {key: convert_values(val) for key, val in value.items()}
+        return {key: convert_xml_dict_values(val) for key, val in value.items()}
     elif isinstance(value, list):
-        return [convert_values(item) for item in value]
+        return [convert_xml_dict_values(item) for item in value]
 
     return value
 
@@ -219,11 +257,20 @@ def build_xml_element(data, parent=None):
             for child in children:
                 child_element = build_xml_element(child)
                 parent.append(child_element)
-        else:
+
+        # Filters name space and schemaLoacation attributes, only allows non name space attributes to be added as attributes
+        elif not isinstance(aattr.default, str):
             attribute_name = aattr.name
             attribute_value = data.__getattribute__(aattr.name)
             parent.set(attribute_name, str(attribute_value))
 
+    # This defines the various namespaces and schemaLocation of the generated xml
+    if hasattr(data, "xmlns"):
+        parent.set("xmlns", data.xmlns)
+    if hasattr(data, "xmlns_xsi"):
+        parent.set("xmlns:xsi", data.xmlns_xsi)
+    if hasattr(data, "xmlns_loc"):
+        parent.set("xsi:schemaLocation", str(data.xmlns_loc))
     return parent