Skip to content

Commit 7f84497

Browse files
Relax HTML validation for Fortunes test
Some frameworks, such as Next.js, automatically inject `<meta>`, `<link>`, and `<script>` tags into pages rendered by their templating system. Since the Fortunes test is meant to exercise the templating system (as opposed to raw string concatenation), it should allow these tags. Furthermore, React warns against nesting a `<tr>` directly inside a `<table>` ("<tr> cannot be a child of <table>") because browsers will automatically wrap the `<tr>` elements in a `<tbody>`, causing a mismatch with the virtual DOM. Therefore, the Fortunes test should allow optional `<tbody>` (and `<thead>`) tags. This commit relaxes the HTML validation for the Fortunes test to allow these tags by simply ignoring them when building the comparison string.
1 parent ddd0952 commit 7f84497

File tree

1 file changed

+31
-16
lines changed

1 file changed

+31
-16
lines changed

toolset/test_types/fortune/fortune_html_parser.py

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,17 @@
88

99

1010
class FortuneHTMLParser(HTMLParser):
11+
IGNORED_TAGS = (
12+
"<meta>", "</meta>",
13+
"<link>", "</link>",
14+
"<script>", "</script>",
15+
"<thead>", "</thead>",
16+
"<tbody>", "</tbody>",
17+
)
18+
1119
def __init__(self):
1220
HTMLParser.__init__(self, convert_charrefs=False)
21+
self.ignore_content = False
1322
self.body = []
1423

1524
valid_fortune = '''<!doctype html><html>
@@ -41,7 +50,7 @@ def handle_decl(self, decl):
4150
# and since we did not specify xml compliance (where
4251
# incorrect casing would throw a syntax error), we must
4352
# allow all casings. We will lower for our normalization.
44-
self.body.append("<!{d}>".format(d=decl.lower()))
53+
self.append("<!{d}>".format(d=decl.lower()))
4554

4655
def handle_charref(self, name):
4756
'''
@@ -63,58 +72,58 @@ def handle_charref(self, name):
6372
# equality.
6473
if val == "34" or val == "034" or val == "x22":
6574
# Append our normalized entity reference to our body.
66-
self.body.append("&quot;")
75+
self.append("&quot;")
6776
# "&#39;" is a valid escaping of "-", but it is not
6877
# required, so we normalize for equality checking.
6978
if val == "39" or val == "039" or val == "x27":
70-
self.body.append("&apos;")
79+
self.append("&apos;")
7180
# Again, "&#43;" is a valid escaping of the "+", but
7281
# it is not required, so we need to normalize for out
7382
# final parse and equality check.
7483
if val == "43" or val == "043" or val == "x2b":
75-
self.body.append("+")
84+
self.append("+")
7685
# Again, "&#62;" is a valid escaping of ">", but we
7786
# need to normalize to "&gt;" for equality checking.
7887
if val == "62" or val == "062" or val == "x3e":
79-
self.body.append("&gt;")
88+
self.append("&gt;")
8089
# Again, "&#60;" is a valid escaping of "<", but we
8190
# need to normalize to "&lt;" for equality checking.
8291
if val == "60" or val == "060" or val == "x3c":
83-
self.body.append("&lt;")
92+
self.append("&lt;")
8493
# Not sure why some are escaping '/'
8594
if val == "47" or val == "047" or val == "x2f":
86-
self.body.append("/")
95+
self.append("/")
8796
# "&#40;" is a valid escaping of "(", but
8897
# it is not required, so we need to normalize for out
8998
# final parse and equality check.
9099
if val == "40" or val == "040" or val == "x28":
91-
self.body.append("(")
100+
self.append("(")
92101
# "&#41;" is a valid escaping of ")", but
93102
# it is not required, so we need to normalize for out
94103
# final parse and equality check.
95104
if val == "41" or val == "041" or val == "x29":
96-
self.body.append(")")
105+
self.append(")")
97106

98107
def handle_entityref(self, name):
99108
'''
100109
Again, "&mdash;" is a valid escaping of "—", but we
101110
need to normalize to "—" for equality checking.
102111
'''
103112
if name == "mdash":
104-
self.body.append("—")
113+
self.append("—")
105114
else:
106-
self.body.append("&{n};".format(n=name))
115+
self.append("&{n};".format(n=name))
107116

108117
def handle_starttag(self, tag, attrs):
109118
'''
110119
This is called every time a tag is opened. We append
111120
each one wrapped in "<" and ">".
112121
'''
113-
self.body.append("<{t}>".format(t=tag))
122+
self.append("<{t}>".format(t=tag))
114123

115124
# Append a newline after the <table> and <html>
116125
if tag.lower() == 'table' or tag.lower() == 'html':
117-
self.body.append(os.linesep)
126+
self.append(os.linesep)
118127

119128
def handle_data(self, data):
120129
'''
@@ -146,18 +155,24 @@ def handle_data(self, data):
146155
data = data.replace('"', '&quot;')
147156
data = data.replace('>', '&gt;')
148157

149-
self.body.append("{d}".format(d=data))
158+
self.append("{d}".format(d=data))
150159

151160
def handle_endtag(self, tag):
152161
'''
153162
This is called every time a tag is closed. We append
154163
each one wrapped in "</" and ">".
155164
'''
156-
self.body.append("</{t}>".format(t=tag))
165+
self.append("</{t}>".format(t=tag))
157166

158167
# Append a newline after each </tr> and </head>
159168
if tag.lower() == 'tr' or tag.lower() == 'head':
160-
self.body.append(os.linesep)
169+
self.append(os.linesep)
170+
171+
def append(self, item):
172+
self.ignore_content = item == "<script>" or (self.ignore_content and item != "</script>")
173+
174+
if not (self.ignore_content or item in self.IGNORED_TAGS):
175+
self.body.append(item)
161176

162177
def isValidFortune(self, name, out):
163178
'''

0 commit comments

Comments
 (0)