Skip to content

Commit 5b517d7

Browse files
committed
fix: Serialize all HTML attributes without escaping
Ref: #184
1 parent 0fd46d0 commit 5b517d7

File tree

6 files changed

+178
-24
lines changed

6 files changed

+178
-24
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22

33
## [Unreleased]
44

5+
### Fixed
6+
7+
- Serialize all HTML attributes without escaping. [#184](https://github.com/Stranger6667/css-inline/issues/184)
8+
59
### Internal
610

711
- Replaced the `kuchiki` crate with our custom-built HTML tree representation. [#176](https://github.com/Stranger6667/css-inline/issues/176)

bindings/python/CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22

33
## [Unreleased]
44

5+
### Fixed
6+
7+
- Serialize all HTML attributes without escaping. [#184](https://github.com/Stranger6667/css-inline/issues/184)
8+
59
### Changed
610

711
- Update `PyO3` to `0.19.0`.

bindings/wasm/CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22

33
## [Unreleased]
44

5+
### Fixed
6+
7+
- Serialize all HTML attributes without escaping. [#184](https://github.com/Stranger6667/css-inline/issues/184)
8+
59
### Performance
610

711
- 15-30% average performance improvement due switch from `kuchiki` to a custom-built HTML tree representation.

css-inline/src/html/iter.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use super::{
77
};
88

99
/// Compile selectors from a string and create an element iterator that yields elements matching these selectors.
10+
#[inline]
1011
pub(crate) fn select<'a, 'b>(
1112
document: &'a Document,
1213
selectors: &'b str,
@@ -59,6 +60,7 @@ pub(crate) struct Select<'a> {
5960

6061
impl<'a> Select<'a> {
6162
/// Specificity of the first selector in the list of selectors.
63+
#[inline]
6264
pub(crate) fn specificity(&self) -> Specificity {
6365
self.selectors.0[0].specificity()
6466
}

css-inline/src/html/serializer.rs

Lines changed: 136 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
11
use super::{
2+
attributes::Attributes,
23
document::Document,
34
node::{ElementData, NodeData, NodeId},
45
};
5-
use html5ever::{
6-
local_name, serialize,
7-
serialize::{Serialize, SerializeOpts, Serializer, TraversalScope},
8-
};
6+
use html5ever::{local_name, namespace_url, ns, QualName};
97
use std::{io, io::Write};
108

119
pub(crate) fn serialize_to<W: Write>(
@@ -14,7 +12,8 @@ pub(crate) fn serialize_to<W: Write>(
1412
skip_style_tags: bool,
1513
) -> io::Result<()> {
1614
let sink = Sink::new(document, NodeId::document_id(), skip_style_tags);
17-
serialize(writer, &sink, SerializeOpts::default())
15+
let mut serializer = HtmlSerializer::new(writer);
16+
sink.serialize(&mut serializer)
1817
}
1918

2019
/// Intermediary structure for serializing an HTML document.
@@ -32,46 +31,35 @@ impl<'a> Sink<'a> {
3231
skip_style_tags,
3332
}
3433
}
34+
#[inline]
3535
fn for_node(&self, node: NodeId) -> Sink<'a> {
3636
Sink::new(self.document, node, self.skip_style_tags)
3737
}
38+
#[inline]
3839
fn data(&self) -> &NodeData {
3940
&self.document[self.node].data
4041
}
42+
#[inline]
4143
fn should_skip_element(&self, element: &ElementData) -> bool {
4244
self.skip_style_tags && element.name.local == local_name!("style")
4345
}
44-
fn serialize_children<S: Serializer>(&self, serializer: &mut S) -> io::Result<()> {
46+
fn serialize_children<W: Write>(&self, serializer: &mut HtmlSerializer<W>) -> io::Result<()> {
4547
for child in self.document.children(self.node) {
46-
Serialize::serialize(
47-
&self.for_node(child),
48-
serializer,
49-
TraversalScope::IncludeNode,
50-
)?
48+
self.for_node(child).serialize(serializer)?
5149
}
5250
Ok(())
5351
}
54-
}
55-
56-
impl<'a> Serialize for Sink<'a> {
57-
fn serialize<S: Serializer>(&self, serializer: &mut S, _: TraversalScope) -> io::Result<()> {
52+
fn serialize<W: Write>(&self, serializer: &mut HtmlSerializer<W>) -> io::Result<()> {
5853
match self.data() {
5954
NodeData::Element { element, .. } => {
6055
if self.should_skip_element(element) {
6156
return Ok(());
6257
}
63-
serializer.start_elem(
64-
element.name.clone(),
65-
element
66-
.attributes
67-
.map
68-
.iter()
69-
.map(|(name, value)| (name, &**value)),
70-
)?;
58+
serializer.start_elem(&element.name, &element.attributes)?;
7159

7260
self.serialize_children(serializer)?;
7361

74-
serializer.end_elem(element.name.clone())?;
62+
serializer.end_elem(&element.name)?;
7563
Ok(())
7664
}
7765
NodeData::Document => self.serialize_children(serializer),
@@ -85,6 +73,130 @@ impl<'a> Serialize for Sink<'a> {
8573
}
8674
}
8775

76+
struct ElemInfo {
77+
ignore_children: bool,
78+
}
79+
80+
struct HtmlSerializer<Wr: Write> {
81+
writer: Wr,
82+
stack: Vec<ElemInfo>,
83+
}
84+
85+
impl<Wr: Write> HtmlSerializer<Wr> {
86+
fn new(writer: Wr) -> Self {
87+
HtmlSerializer {
88+
writer,
89+
stack: vec![ElemInfo {
90+
ignore_children: false,
91+
}],
92+
}
93+
}
94+
95+
fn parent(&mut self) -> &mut ElemInfo {
96+
self.stack.last_mut().expect("Stack is empty")
97+
}
98+
99+
fn start_elem(&mut self, name: &QualName, attrs: &Attributes) -> io::Result<()> {
100+
if self.parent().ignore_children {
101+
self.stack.push(ElemInfo {
102+
ignore_children: true,
103+
});
104+
return Ok(());
105+
}
106+
107+
self.writer.write_all(b"<")?;
108+
self.writer.write_all(name.local.as_bytes())?;
109+
for (name, value) in &attrs.map {
110+
self.writer.write_all(b" ")?;
111+
112+
match name.ns {
113+
ns!() => (),
114+
ns!(xml) => self.writer.write_all(b"xml:")?,
115+
ns!(xmlns) => {
116+
if name.local != local_name!("xmlns") {
117+
self.writer.write_all(b"xmlns:")?;
118+
}
119+
}
120+
ns!(xlink) => self.writer.write_all(b"xlink:")?,
121+
_ => {
122+
self.writer.write_all(b"unknown_namespace:")?;
123+
}
124+
}
125+
126+
self.writer.write_all(name.local.as_bytes())?;
127+
self.writer.write_all(b"=\"")?;
128+
self.writer.write_all(value.as_bytes())?;
129+
self.writer.write_all(b"\"")?;
130+
}
131+
self.writer.write_all(b">")?;
132+
133+
let ignore_children = name.ns == ns!(html)
134+
&& matches!(
135+
name.local,
136+
local_name!("area")
137+
| local_name!("base")
138+
| local_name!("basefont")
139+
| local_name!("bgsound")
140+
| local_name!("br")
141+
| local_name!("col")
142+
| local_name!("embed")
143+
| local_name!("frame")
144+
| local_name!("hr")
145+
| local_name!("img")
146+
| local_name!("input")
147+
| local_name!("keygen")
148+
| local_name!("link")
149+
| local_name!("meta")
150+
| local_name!("param")
151+
| local_name!("source")
152+
| local_name!("track")
153+
| local_name!("wbr")
154+
);
155+
156+
self.stack.push(ElemInfo { ignore_children });
157+
158+
Ok(())
159+
}
160+
161+
fn end_elem(&mut self, name: &QualName) -> io::Result<()> {
162+
let info = match self.stack.pop() {
163+
Some(info) => info,
164+
_ => panic!("no ElemInfo"),
165+
};
166+
if info.ignore_children {
167+
return Ok(());
168+
}
169+
170+
self.writer.write_all(b"</")?;
171+
self.writer.write_all(name.local.as_bytes())?;
172+
self.writer.write_all(b">")
173+
}
174+
175+
fn write_text(&mut self, text: &str) -> io::Result<()> {
176+
self.writer.write_all(text.as_bytes())
177+
}
178+
179+
fn write_comment(&mut self, text: &str) -> io::Result<()> {
180+
self.writer.write_all(b"<!--")?;
181+
self.writer.write_all(text.as_bytes())?;
182+
self.writer.write_all(b"-->")
183+
}
184+
185+
fn write_doctype(&mut self, name: &str) -> io::Result<()> {
186+
self.writer.write_all(b"<!DOCTYPE ")?;
187+
self.writer.write_all(name.as_bytes())?;
188+
self.writer.write_all(b">")
189+
}
190+
191+
fn write_processing_instruction(&mut self, target: &str, data: &str) -> io::Result<()> {
192+
self.writer.write_all(b"<?")?;
193+
self.writer.write_all(target.as_bytes())?;
194+
self.writer.write_all(b" ")?;
195+
self.writer.write_all(data.as_bytes())?;
196+
self.writer.write_all(b">")
197+
}
198+
}
199+
88200
#[cfg(test)]
89201
mod tests {
90202
use super::Document;

css-inline/tests/test_inlining.rs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,34 @@ fn font_family_quoted() {
193193
)
194194
}
195195

196+
#[test]
197+
fn href_attribute_unchanged() {
198+
// All HTML attributes should be serialized as is
199+
let html = r#"<html>
200+
<head>
201+
<title>Test</title>
202+
<style>h1 { color:blue; }</style>
203+
</head>
204+
<body>
205+
<h1>Big Text</h1>
206+
<a href="https://example.org/test?a=b&c=d">Link</a>
207+
</body>
208+
</html>"#;
209+
let inlined = inline(html).unwrap();
210+
assert_eq!(
211+
inlined,
212+
r#"<html><head>
213+
<title>Test</title>
214+
<style>h1 { color:blue; }</style>
215+
</head>
216+
<body>
217+
<h1 style="color:blue;">Big Text</h1>
218+
<a href="https://example.org/test?a=b&c=d">Link</a>
219+
220+
</body></html>"#
221+
);
222+
}
223+
196224
#[test]
197225
fn existing_styles() {
198226
// When there is a `style` attribute on a tag that contains a rule

0 commit comments

Comments
 (0)