@@ -14,6 +14,31 @@ def escape_name_content(match):
1414 return f'name="{ escaped_value } "'
1515
1616
17+ def preserve_cdata_sections (content ):
18+ """Replace CDATA sections with placeholders to preserve them through XML parsing"""
19+ cdata_sections = []
20+ placeholder_pattern = "CDATA_PLACEHOLDER_{}"
21+
22+ def replace_cdata (match ):
23+ cdata_sections .append (match .group (0 ))
24+ return placeholder_pattern .format (len (cdata_sections ) - 1 )
25+
26+ # Find and replace all CDATA sections with placeholders
27+ processed_content = re .sub (r"<!\[CDATA\[.*?\]\]>" , replace_cdata , content , flags = re .DOTALL )
28+ return processed_content , cdata_sections
29+
30+
31+ def restore_cdata_sections (content , cdata_sections ):
32+ """Restore CDATA sections from placeholders"""
33+ placeholder_pattern = "CDATA_PLACEHOLDER_{}"
34+
35+ for i , cdata_section in enumerate (cdata_sections ):
36+ placeholder = placeholder_pattern .format (i )
37+ content = content .replace (placeholder , cdata_section )
38+
39+ return content
40+
41+
1742def escape_xml_content (content ):
1843 """Escape < and > characters in XML content while preserving XML structure"""
1944 lines = content .split ("\n " )
@@ -43,9 +68,14 @@ def unescape_xml_output(content):
4368podio_gen_file , manual_file , output_file = sys .argv [1 :]
4469
4570with open (podio_gen_file , "r" , encoding = "utf-8" ) as f :
46- gen_content = escape_xml_content (f .read ())
71+ gen_content , gen_cdata = preserve_cdata_sections (f .read ())
72+
4773with open (manual_file , "r" , encoding = "utf-8" ) as f :
48- manual_content = escape_xml_content (f .read ())
74+ manual_content , manual_cdata = preserve_cdata_sections (f .read ())
75+
76+ # Escape XML content for parsing
77+ gen_content = escape_xml_content (gen_content )
78+ manual_content = escape_xml_content (manual_content )
4979
5080gen_tree = ET .fromstring (gen_content )
5181manual_tree = ET .fromstring (manual_content )
@@ -57,5 +87,10 @@ def unescape_xml_output(content):
5787
5888ET .indent (gen_tree , space = " " , level = 0 )
5989output_content = unescape_xml_output (ET .tostring (gen_tree , encoding = "unicode" ))
90+
91+ # Restore CDATA sections in the final output
92+ all_cdata = gen_cdata + manual_cdata
93+ output_content = restore_cdata_sections (output_content , all_cdata )
94+
6095with open (output_file , "w" , encoding = "utf-8" ) as f :
6196 f .write (output_content )
0 commit comments