Skip to content

Commit a1e32fb

Browse files
authored
XML File Formatting (#1029)
* Using lxml instead of xml due to comment/ordering issues with formatter * Formatting all xml files * Making the xml attribute formatter more pythonic * Updating the xml attribute formatter to have consistent indices
1 parent 1777c9d commit a1e32fb

File tree

1 file changed

+53
-11
lines changed

1 file changed

+53
-11
lines changed

pygeos_package/pygeos/xml_formatter.py

Lines changed: 53 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,31 @@
11
import argparse
22
import os
3-
from xml.etree import ElementTree
3+
from lxml import etree as ElementTree
4+
import re
5+
6+
7+
def format_attribute(attribute_indent, ka, attribute_value):
8+
# Make sure that a space follows commas
9+
attribute_value = re.sub(r",\s*", ", ", attribute_value)
10+
11+
# Handle external brackets
12+
attribute_value = re.sub(r"{\s*", "{ ", attribute_value)
13+
attribute_value = re.sub(r"\s*}", " }", attribute_value)
14+
15+
# Consolidate whitespace
16+
attribute_value = re.sub(r"\s+", " ", attribute_value)
17+
18+
# Identify and split multi-line attributes
19+
if re.match(r"\s*{\s*({[-+.,0-9a-zA-Z\s]*},?\s*)*\s*}", attribute_value):
20+
split_positions = [match.end() for match in re.finditer(r"}\s*,", attribute_value)]
21+
newline_indent = '\n%s' % (' ' * (len(attribute_indent) + len(ka) + 4))
22+
new_values = []
23+
for a, b in zip([None] + split_positions, split_positions + [None]):
24+
new_values.append(attribute_value[a:b].strip())
25+
if new_values:
26+
attribute_value = newline_indent.join(new_values)
27+
28+
return attribute_value
429

530

631
def format_xml_level(output,
@@ -12,6 +37,19 @@ def format_xml_level(output,
1237
sort_attributes=False,
1338
close_tag_newline=False,
1439
include_namespace=False):
40+
"""Iteratively format the xml file
41+
42+
@param output the output filename
43+
@param node the current xml element
44+
@param level the xml depth
45+
@param indent the xml indent style
46+
@param block_separation_max_depth the maximum depth to separate adjacent elements
47+
@param modify_attribute_indent option to have flexible attribute indentation
48+
@param sort_attributes option to sort attributes alphabetically
49+
@param close_tag_newline option to place close tag on a separate line
50+
@param include_namespace option to include the xml namespace in the output
51+
"""
52+
1553
# Handle comments
1654
if node.tag is ElementTree.Comment:
1755
output.write('\n%s<!--%s-->' % (indent*level, node.text))
@@ -43,6 +81,10 @@ def format_xml_level(output,
4381
if sort_attributes:
4482
akeys = sorted(akeys)
4583

84+
# Format attributes
85+
for ka in akeys:
86+
attribute_dict[ka] = format_attribute(attribute_indent, ka, attribute_dict[ka])
87+
4688
for ii in range(0, len(akeys)):
4789
k = akeys[ii]
4890
if ((ii == 0) & modify_attribute_indent):
@@ -75,14 +117,6 @@ def format_xml_level(output,
75117
output.write('/>')
76118

77119

78-
# Class to handle commented xml structure
79-
class CommentedTreeBuilder(ElementTree.TreeBuilder):
80-
def comment(self, data):
81-
self.start(ElementTree.Comment, {})
82-
self.data(data)
83-
self.end(ElementTree.Comment)
84-
85-
86120
def main():
87121
"""Script to format xml files
88122
@@ -103,12 +137,17 @@ def main():
103137
# Process the xml file
104138
fname = os.path.expanduser(args.input)
105139
try:
106-
xml_parser = ElementTree.XMLParser(target=CommentedTreeBuilder())
107-
tree = ElementTree.parse(fname, xml_parser)
140+
tree = ElementTree.parse(fname)
108141
root = tree.getroot()
142+
prologue_comments = [tmp.text for tmp in root.itersiblings(preceding=True)]
143+
epilog_comments = [tmp.text for tmp in root.itersiblings()]
109144

110145
with open(fname, 'w') as f:
111146
f.write('<?xml version=\"1.0\" ?>\n')
147+
148+
for comment in reversed(prologue_comments):
149+
f.write('\n<!--%s-->' % (comment))
150+
112151
format_xml_level(f,
113152
root,
114153
0,
@@ -118,6 +157,9 @@ def main():
118157
sort_attributes=args.alphebitize,
119158
close_tag_newline=args.close,
120159
include_namespace=args.namespace)
160+
161+
for comment in epilog_comments:
162+
f.write('\n<!--%s-->' % (comment))
121163
f.write('\n')
122164

123165
except ElementTree.ParseError as err:

0 commit comments

Comments
 (0)