Skip to content

Commit e377eab

Browse files
Don't escape contents of script tags
1 parent 0d44a39 commit e377eab

File tree

7 files changed

+78
-5
lines changed

7 files changed

+78
-5
lines changed

meta/generate_tag_defs.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,11 @@
1212

1313
TEMPLATES_FOLDER = Path('./meta/templates')
1414

15+
NO_ESCAPE_CHILDREN = """
16+
def _escape_children(self) -> bool:
17+
return False
18+
"""
19+
1520

1621
def get_template_class(name: str):
1722
try:
@@ -86,6 +91,11 @@ def generate_tag_class(output: TextIO, tag: TagInfo):
8691
.replace("{default_attrs}", default_attrs)
8792

8893
print(text, file=output)
94+
95+
# And add the no escape children function if needed
96+
if not tag.escape_children:
97+
print(NO_ESCAPE_CHILDREN, file=output)
98+
8999
# And a nice trailing newline to make flake8 happy
90100
print(file=output)
91101

meta/scrape_tags.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,9 @@ class TagsYmlItem(TypedDict):
133133
rename: NotRequired[str]
134134
"""Value to rename the class to (to avoid bad keyword usage)"""
135135

136+
escape_children: NotRequired[bool]
137+
"""Whether to escape the contents of the tag (default True)"""
138+
136139

137140
TagsYaml = dict[str, TagsYmlItem]
138141
"""Type alias for type of tags.yml file"""
@@ -191,6 +194,11 @@ class TagInfo:
191194
Link to full documentation on MDN
192195
"""
193196

197+
escape_children: bool
198+
"""
199+
Whether to escape child elements for the tag
200+
"""
201+
194202
attributes: list[Attr]
195203
"""
196204
List of attributes and their documentation.
@@ -431,6 +439,16 @@ def get_tag_skip(tags: TagsYaml, tag_name: str) -> bool:
431439
return tag.get('skip', False)
432440

433441

442+
def get_tag_escape_children(tags: TagsYaml, tag_name: str) -> bool:
443+
"""
444+
Return whether to skip this tag
445+
"""
446+
if tag_name not in tags:
447+
return True
448+
tag = tags[tag_name]
449+
return tag.get('escape_children', True)
450+
451+
434452
def make_mdn_link(tag: str) -> str:
435453
"""Generate an MDN docs link for the given tag"""
436454
return f"{MDN_ELEMENT_PAGE}/{tag}"
@@ -455,6 +473,7 @@ def elements_to_element_structs(
455473
description=description,
456474
base=get_tag_base_class(tag_attrs, name),
457475
mdn_link=make_mdn_link(name),
476+
escape_children=get_tag_escape_children(tag_attrs, name),
458477
attributes=attr_entries_to_object(tag_attrs, name),
459478
))
460479

meta/tags.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ a:
3232
type: "Union[str, Literal['_self', '_blank', '_parent', '_top'], None]"
3333

3434
script:
35+
escape_children: false
3536
attributes:
3637
type:
3738
doc: Type of script to use

pyhtml/__tag_base.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,17 @@ def _get_default_attributes(
7474
"""
7575
return {}
7676

77+
def _escape_children(self) -> bool:
78+
"""
79+
Returns whether the contents of the element should be escaped, or
80+
rendered plainly.
81+
82+
By default, all string content should be escaped to prevent security
83+
vulnerabilities such as XSS, but this is disabled for certain tags such
84+
as <script>.
85+
"""
86+
return True
87+
7788
def _render(self) -> list[str]:
7889
"""
7990
Renders tag and its children to a list of strings where each string is
@@ -97,7 +108,9 @@ def _render(self) -> list[str]:
97108
else:
98109
out = [opening]
99110
# Children
100-
out.extend(util.render_children(self.children))
111+
out.extend(
112+
util.render_children(self.children, self._escape_children())
113+
)
101114
# Closing tag
102115
out.append(f"</{self._get_tag_name()}>")
103116

pyhtml/__tags/generated.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4119,6 +4119,10 @@ def _get_default_attributes(self, given: dict[str, AttributeType]) -> dict[str,
41194119
return {'type': 'text/javascript'}
41204120

41214121

4122+
def _escape_children(self) -> bool:
4123+
return False
4124+
4125+
41224126
class del_(Tag):
41234127
"""
41244128
Represents a range of text that has been deleted from a document. This can be used when rendering "track changes" or source code diff information, for example. The `<ins>` element can be used for the opposite purpose: to indicate text that has been added to the document.

pyhtml/__util.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,10 @@ def filter_attributes(attributes: dict[str, Any]) -> dict[str, Any]:
9696
}
9797

9898

99-
def render_inline_element(ele: ChildElementType) -> list[str]:
99+
def render_inline_element(
100+
ele: ChildElementType,
101+
escape_strings: bool,
102+
) -> list[str]:
100103
"""
101104
Render an element inline
102105
"""
@@ -107,18 +110,24 @@ def render_inline_element(ele: ChildElementType) -> list[str]:
107110
return ele()._render()
108111
else:
109112
# Remove newlines from strings when inline rendering
110-
return [escape_string(str(ele))]
113+
if escape_strings:
114+
return [escape_string(str(ele))]
115+
else:
116+
return [str(ele)]
111117

112118

113-
def render_children(children: list[ChildElementType]) -> list[str]:
119+
def render_children(
120+
children: list[ChildElementType],
121+
escape_strings: bool,
122+
) -> list[str]:
114123
"""
115124
Render child elements of tags.
116125
117126
Elements are placed in the same string
118127
"""
119128
rendered = []
120129
for ele in children:
121-
rendered.extend(render_inline_element(ele))
130+
rendered.extend(render_inline_element(ele, escape_strings))
122131
return increase_indent(rendered, 2)
123132

124133

tests/script_tag_test.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
"""
2+
# Tests / script tag test
3+
4+
Test cases for the script tag
5+
"""
6+
from pyhtml import script
7+
8+
9+
def test_script_not_escaped():
10+
"""
11+
Contents of script are not escaped.
12+
"""
13+
assert str(script("<>'\";&")) == "\n".join([
14+
"<script type=\"text/javascript\">",
15+
" <>'\";&",
16+
"</script>",
17+
])

0 commit comments

Comments
 (0)