11import argparse
22import os
3- from xml .etree import ElementTree
3+ from lxml import etree as ElementTree
4+ import re
5+
6+
7+ def format_attribute (attribute_indent , ka , attribute_value ):
8+ # Make sure that a space follows commas
9+ attribute_value = re .sub (r",\s*" , ", " , attribute_value )
10+
11+ # Handle external brackets
12+ attribute_value = re .sub (r"{\s*" , "{ " , attribute_value )
13+ attribute_value = re .sub (r"\s*}" , " }" , attribute_value )
14+
15+ # Consolidate whitespace
16+ attribute_value = re .sub (r"\s+" , " " , attribute_value )
17+
18+ # Identify and split multi-line attributes
19+ if re .match (r"\s*{\s*({[-+.,0-9a-zA-Z\s]*},?\s*)*\s*}" , attribute_value ):
20+ split_positions = [match .end () for match in re .finditer (r"}\s*," , attribute_value )]
21+ newline_indent = '\n %s' % (' ' * (len (attribute_indent ) + len (ka ) + 4 ))
22+ new_values = []
23+ for a , b in zip ([None ] + split_positions , split_positions + [None ]):
24+ new_values .append (attribute_value [a :b ].strip ())
25+ if new_values :
26+ attribute_value = newline_indent .join (new_values )
27+
28+ return attribute_value
429
530
631def format_xml_level (output ,
@@ -12,6 +37,19 @@ def format_xml_level(output,
1237 sort_attributes = False ,
1338 close_tag_newline = False ,
1439 include_namespace = False ):
40+ """Iteratively format the xml file
41+
42+ @param output the output filename
43+ @param node the current xml element
44+ @param level the xml depth
45+ @param indent the xml indent style
46+ @param block_separation_max_depth the maximum depth to separate adjacent elements
47+ @param modify_attribute_indent option to have flexible attribute indentation
48+ @param sort_attributes option to sort attributes alphabetically
49+ @param close_tag_newline option to place close tag on a separate line
50+ @param include_namespace option to include the xml namespace in the output
51+ """
52+
1553 # Handle comments
1654 if node .tag is ElementTree .Comment :
1755 output .write ('\n %s<!--%s-->' % (indent * level , node .text ))
@@ -43,6 +81,10 @@ def format_xml_level(output,
4381 if sort_attributes :
4482 akeys = sorted (akeys )
4583
84+ # Format attributes
85+ for ka in akeys :
86+ attribute_dict [ka ] = format_attribute (attribute_indent , ka , attribute_dict [ka ])
87+
4688 for ii in range (0 , len (akeys )):
4789 k = akeys [ii ]
4890 if ((ii == 0 ) & modify_attribute_indent ):
@@ -75,14 +117,6 @@ def format_xml_level(output,
75117 output .write ('/>' )
76118
77119
78- # Class to handle commented xml structure
79- class CommentedTreeBuilder (ElementTree .TreeBuilder ):
80- def comment (self , data ):
81- self .start (ElementTree .Comment , {})
82- self .data (data )
83- self .end (ElementTree .Comment )
84-
85-
86120def main ():
87121 """Script to format xml files
88122
@@ -103,12 +137,17 @@ def main():
103137 # Process the xml file
104138 fname = os .path .expanduser (args .input )
105139 try :
106- xml_parser = ElementTree .XMLParser (target = CommentedTreeBuilder ())
107- tree = ElementTree .parse (fname , xml_parser )
140+ tree = ElementTree .parse (fname )
108141 root = tree .getroot ()
142+ prologue_comments = [tmp .text for tmp in root .itersiblings (preceding = True )]
143+ epilog_comments = [tmp .text for tmp in root .itersiblings ()]
109144
110145 with open (fname , 'w' ) as f :
111146 f .write ('<?xml version=\" 1.0\" ?>\n ' )
147+
148+ for comment in reversed (prologue_comments ):
149+ f .write ('\n <!--%s-->' % (comment ))
150+
112151 format_xml_level (f ,
113152 root ,
114153 0 ,
@@ -118,6 +157,9 @@ def main():
118157 sort_attributes = args .alphebitize ,
119158 close_tag_newline = args .close ,
120159 include_namespace = args .namespace )
160+
161+ for comment in epilog_comments :
162+ f .write ('\n <!--%s-->' % (comment ))
121163 f .write ('\n ' )
122164
123165 except ElementTree .ParseError as err :
0 commit comments