@@ -169,11 +169,37 @@ def from_bson(cls, bson_str: str) -> "Base":
169
169
@classmethod
170
170
def from_xml (cls , xml_str : str ) -> "Base" :
171
171
"""Instantiate a Base object from an XML string"""
172
- from modelspec .utils import element_to_dict , handle_id , convert_values
172
+ from modelspec .utils import (
173
+ element_to_dict ,
174
+ handle_id ,
175
+ convert_values ,
176
+ process_xml_namespace ,
177
+ )
178
+ import re
179
+
180
+ # When the to_xml() method is used it messes up the string therefore,
181
+ # it is necessary to convert it into an elementree object the decode into a string.
182
+ xml_string_a = ET .fromstring (xml_str )
183
+ xml_string_b = ET .tostring (xml_string_a ).decode ()
184
+
185
+ # while trying to obtain a useable xml structure, using the conversion above it acquires
186
+ # some unusual string element that sometimes can be incremental from either :ns0 to :nsX or ns0: to nsX:.
187
+ # Using the regex expression pattern catches it in any form and removes it from the xml string structure.
188
+ ns_prefix_pattern = r"(ns\d+:|:ns\d+)"
189
+ cleaned_xml = re .sub (ns_prefix_pattern , "" , xml_string_b ).strip ()
173
190
174
- root = ET .fromstring (xml_str )
175
- data_dict = element_to_dict (root )
191
+ # For the xml to be useable in modelspec unnecessary string elements which only serve as asthetics for the xml must
192
+ # be removed when converting to a dict, the process_xml_namespaes function does just that.
193
+ removed_namespaces = process_xml_namespace (cleaned_xml )
194
+
195
+ # process_xml_namespace function returns an elementtree object which can be directly worked upon by the element_to_dict
196
+ # function, this returns a python dictionary
197
+ data_dict = element_to_dict (removed_namespaces )
198
+
199
+ # This strips every instance of 'id' from the resulting dictionary structure
176
200
removed_id = handle_id (data_dict )
201
+
202
+ # XML conversions do not returns exact values, instead all values are returned as a string, this reassigns their actual values
177
203
converted_to_actual_val = convert_values (removed_id )
178
204
179
205
return cls .from_dict (converted_to_actual_val )
@@ -376,14 +402,37 @@ def from_xml_file(cls, filename: str) -> "Base":
376
402
Returns:
377
403
A modelspec Base for this XML.
378
404
"""
379
- from modelspec .utils import element_to_dict , handle_id , convert_values
405
+ from modelspec .utils import (
406
+ element_to_dict ,
407
+ handle_id ,
408
+ convert_values ,
409
+ process_xml_namespace ,
410
+ )
411
+ import re
380
412
381
413
with open (filename ) as infile :
382
- tree = ET .parse (infile )
383
- root = tree .getroot ()
414
+ tree = ET .parse (infile ) # Parse the XML file into an ElementTree object
415
+ root = tree .getroot () # Get the root element
416
+
417
+ # This defines regular expressions to match the namespace patterns to be removed
418
+ ns_prefix_pattern = r"(ns\d+:|:ns\d+)"
419
+
420
+ # Converts the loaded xml into a string and removes unwanted string values ':ns0' to :ns∞ and 'ns0:' to ns∞:
421
+ # They prevent the xml from loading correctly
422
+ xml_string = ET .tostring (root ).decode ()
423
+ cleaned_xml = re .sub (ns_prefix_pattern , "" , xml_string ).strip ()
384
424
385
- data_dict = element_to_dict (root )
425
+ # Removes xmlns, xmlns:xsi and xsi:schemaLocation from the xml structure for conversion
426
+ # it passes an element tree object to the element_to_dict function
427
+ removed_namespaces = process_xml_namespace (cleaned_xml )
428
+
429
+ # Converts the resulting xml stripped of xmlns, xmlns:xsi and xsi:schemaLocation into a dict
430
+ data_dict = element_to_dict (removed_namespaces )
431
+
432
+ # Removes every key having 'id' and replaces it with it's value
386
433
removed_id = handle_id (data_dict )
434
+
435
+ # Values are returned as strings after conversion, this corrects them to their actual values
387
436
converted_to_actual_val = convert_values (removed_id )
388
437
return cls .from_dict (converted_to_actual_val )
389
438
0 commit comments