Skip to content

Commit e38da23

Browse files
committed
Make sure to preserve CDATA blocks in merging
1 parent 0cf6cea commit e38da23

File tree

1 file changed

+37
-2
lines changed

1 file changed

+37
-2
lines changed

edm4hep/schema_evolution/merge_selection_xml.py

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,31 @@ def escape_name_content(match):
1414
return f'name="{escaped_value}"'
1515

1616

17+
def preserve_cdata_sections(content):
18+
"""Replace CDATA sections with placeholders to preserve them through XML parsing"""
19+
cdata_sections = []
20+
placeholder_pattern = "CDATA_PLACEHOLDER_{}"
21+
22+
def replace_cdata(match):
23+
cdata_sections.append(match.group(0))
24+
return placeholder_pattern.format(len(cdata_sections) - 1)
25+
26+
# Find and replace all CDATA sections with placeholders
27+
processed_content = re.sub(r"<!\[CDATA\[.*?\]\]>", replace_cdata, content, flags=re.DOTALL)
28+
return processed_content, cdata_sections
29+
30+
31+
def restore_cdata_sections(content, cdata_sections):
32+
"""Restore CDATA sections from placeholders"""
33+
placeholder_pattern = "CDATA_PLACEHOLDER_{}"
34+
35+
for i, cdata_section in enumerate(cdata_sections):
36+
placeholder = placeholder_pattern.format(i)
37+
content = content.replace(placeholder, cdata_section)
38+
39+
return content
40+
41+
1742
def escape_xml_content(content):
1843
"""Escape < and > characters in XML content while preserving XML structure"""
1944
lines = content.split("\n")
@@ -43,9 +68,14 @@ def unescape_xml_output(content):
4368
podio_gen_file, manual_file, output_file = sys.argv[1:]
4469

4570
with open(podio_gen_file, "r", encoding="utf-8") as f:
46-
gen_content = escape_xml_content(f.read())
71+
gen_content, gen_cdata = preserve_cdata_sections(f.read())
72+
4773
with open(manual_file, "r", encoding="utf-8") as f:
48-
manual_content = escape_xml_content(f.read())
74+
manual_content, manual_cdata = preserve_cdata_sections(f.read())
75+
76+
# Escape XML content for parsing
77+
gen_content = escape_xml_content(gen_content)
78+
manual_content = escape_xml_content(manual_content)
4979

5080
gen_tree = ET.fromstring(gen_content)
5181
manual_tree = ET.fromstring(manual_content)
@@ -57,5 +87,10 @@ def unescape_xml_output(content):
5787

5888
ET.indent(gen_tree, space=" ", level=0)
5989
output_content = unescape_xml_output(ET.tostring(gen_tree, encoding="unicode"))
90+
91+
# Restore CDATA sections in the final output
92+
all_cdata = gen_cdata + manual_cdata
93+
output_content = restore_cdata_sections(output_content, all_cdata)
94+
6095
with open(output_file, "w", encoding="utf-8") as f:
6196
f.write(output_content)

0 commit comments

Comments
 (0)