Skip to content

Commit ed0c66e

Browse files
extended new namespace support handling to from_xml and from_xml_file methods
1 parent 5371939 commit ed0c66e

File tree

1 file changed

+56
-7
lines changed

1 file changed

+56
-7
lines changed

src/modelspec/base_types.py

Lines changed: 56 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -169,11 +169,37 @@ def from_bson(cls, bson_str: str) -> "Base":
169169
@classmethod
170170
def from_xml(cls, xml_str: str) -> "Base":
171171
"""Instantiate a Base object from an XML string"""
172-
from modelspec.utils import element_to_dict, handle_id, convert_values
172+
from modelspec.utils import (
173+
element_to_dict,
174+
handle_id,
175+
convert_values,
176+
process_xml_namespace,
177+
)
178+
import re
179+
180+
# When the to_xml() method is used it messes up the string therefore,
181+
# it is necessary to convert it into an elementree object the decode into a string.
182+
xml_string_a = ET.fromstring(xml_str)
183+
xml_string_b = ET.tostring(xml_string_a).decode()
184+
185+
# while trying to obtain a useable xml structure, using the conversion above it acquires
186+
# some unusual string element that sometimes can be incremental from either :ns0 to :nsX or ns0: to nsX:.
187+
# Using the regex expression pattern catches it in any form and removes it from the xml string structure.
188+
ns_prefix_pattern = r"(ns\d+:|:ns\d+)"
189+
cleaned_xml = re.sub(ns_prefix_pattern, "", xml_string_b).strip()
173190

174-
root = ET.fromstring(xml_str)
175-
data_dict = element_to_dict(root)
191+
# For the xml to be useable in modelspec unnecessary string elements which only serve as asthetics for the xml must
192+
# be removed when converting to a dict, the process_xml_namespaes function does just that.
193+
removed_namespaces = process_xml_namespace(cleaned_xml)
194+
195+
# process_xml_namespace function returns an elementtree object which can be directly worked upon by the element_to_dict
196+
# function, this returns a python dictionary
197+
data_dict = element_to_dict(removed_namespaces)
198+
199+
# This strips every instance of 'id' from the resulting dictionary structure
176200
removed_id = handle_id(data_dict)
201+
202+
# XML conversions do not returns exact values, instead all values are returned as a string, this reassigns their actual values
177203
converted_to_actual_val = convert_values(removed_id)
178204

179205
return cls.from_dict(converted_to_actual_val)
@@ -376,14 +402,37 @@ def from_xml_file(cls, filename: str) -> "Base":
376402
Returns:
377403
A modelspec Base for this XML.
378404
"""
379-
from modelspec.utils import element_to_dict, handle_id, convert_values
405+
from modelspec.utils import (
406+
element_to_dict,
407+
handle_id,
408+
convert_values,
409+
process_xml_namespace,
410+
)
411+
import re
380412

381413
with open(filename) as infile:
382-
tree = ET.parse(infile)
383-
root = tree.getroot()
414+
tree = ET.parse(infile) # Parse the XML file into an ElementTree object
415+
root = tree.getroot() # Get the root element
416+
417+
# This defines regular expressions to match the namespace patterns to be removed
418+
ns_prefix_pattern = r"(ns\d+:|:ns\d+)"
419+
420+
# Converts the loaded xml into a string and removes unwanted string values ':ns0' to :ns∞ and 'ns0:' to ns∞:
421+
# They prevent the xml from loading correctly
422+
xml_string = ET.tostring(root).decode()
423+
cleaned_xml = re.sub(ns_prefix_pattern, "", xml_string).strip()
384424

385-
data_dict = element_to_dict(root)
425+
# Removes xmlns, xmlns:xsi and xsi:schemaLocation from the xml structure for conversion
426+
# it passes an element tree object to the element_to_dict function
427+
removed_namespaces = process_xml_namespace(cleaned_xml)
428+
429+
# Converts the resulting xml stripped of xmlns, xmlns:xsi and xsi:schemaLocation into a dict
430+
data_dict = element_to_dict(removed_namespaces)
431+
432+
# Removes every key having 'id' and replaces it with it's value
386433
removed_id = handle_id(data_dict)
434+
435+
# Values are returned as strings after conversion, this corrects them to their actual values
387436
converted_to_actual_val = convert_values(removed_id)
388437
return cls.from_dict(converted_to_actual_val)
389438

0 commit comments

Comments
 (0)