Skip to content

Commit c64fed4

Browse files
committed
Bye bye lxml, hello HTMLParser
1 parent 465c572 commit c64fed4

File tree

2 files changed

+28
-13
lines changed

2 files changed

+28
-13
lines changed

xlsformconverter/XLSFormConverter.py

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
import os
22
import re
33
import unicodedata
4+
from html.parser import HTMLParser
5+
from io import StringIO
46

5-
from lxml import html
67
from qgis.core import (
78
Qgis,
89
QgsAttributeEditorContainer,
@@ -40,6 +41,27 @@
4041
MARKDOWN_AVAILABLE = False
4142

4243

44+
class HTMLStripper(HTMLParser):
45+
def __init__(self):
46+
super().__init__()
47+
self.reset()
48+
self.strict = False
49+
self.convert_charrefs = True
50+
self.text = StringIO()
51+
52+
def handle_data(self, d):
53+
self.text.write(d)
54+
55+
def get_data(self):
56+
return self.text.getvalue()
57+
58+
59+
def strip_tags(html):
60+
s = HTMLStripper()
61+
s.feed(html)
62+
return s.get_data()
63+
64+
4365
class XLSFormConverter(QObject):
4466
survey_layer = None
4567
choices_layer = None
@@ -246,9 +268,7 @@ def create_field(self, feature):
246268
if feature.attribute(self.survey_label_index)
247269
else field_name
248270
)
249-
html_fragment = html.fromstring(field_alias)
250-
field_alias = html_fragment.text_content()
251-
del html_fragment
271+
field_alias = strip_tags(field_alias)
252272

253273
field_type = None
254274
field = None
@@ -1042,9 +1062,7 @@ def convert(
10421062
output_lists_field_names.index(field_name)
10431063
)
10441064
if field_name == self.label_field_name:
1045-
html_fragment = html.fromstring(str(attribute_value))
1046-
attribute_value = html_fragment.text_content()
1047-
del html_fragment
1065+
attribute_value = strip_tags(str(attribute_value))
10481066
output_feature.setAttribute(field_name, attribute_value)
10491067
output_lists_sink.addFeature(output_feature)
10501068

@@ -1155,9 +1173,7 @@ def convert(
11551173
relation.id(),
11561174
current_editor_form[-2].invisibleRootContainer(),
11571175
)
1158-
html_fragment = html.fromstring(feature_label)
1159-
feature_label = html_fragment.text_content()
1160-
del html_fragment
1176+
feature_label = strip_tags(feature_label)
11611177
editor_relation.setLabel(feature_label)
11621178
editor_relation.setShowLabel(feature_label != "")
11631179
if relevant_container:
@@ -1172,9 +1188,7 @@ def convert(
11721188
current_layer.pop()
11731189
current_editor_form.pop()
11741190
elif feature_type == "begin group" or feature_type == "begin_group":
1175-
html_fragment = html.fromstring(feature_label)
1176-
feature_label = html_fragment.text_content()
1177-
del html_fragment
1191+
feature_label = strip_tags(feature_label)
11781192
current_container.append(
11791193
QgsAttributeEditorContainer(feature_label, current_container[-1])
11801194
)

xlsformconverter/metadata.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ repository=https://github.com/opengisch/XLSFormConverter
2020
homepage=https://github.com/opengisch/XLSFormConverter
2121

2222
changelog=
23+
v1.0.2 - The lxml library shipped on windows is too unstable, use HTMLParser to strip tags instead
2324
v1.0.1 - Qt6 support, windows crasher fix
2425
v1.0.0 - Initial release
2526

0 commit comments

Comments
 (0)