Skip to content

Commit 84a8ea8

Browse files
author
bors-servo
authored
Auto merge of #293 - jechas01:no_stack_panics, r=jdm
Don't bail on empty Serializer stack This prevents panics if `end_elem` or `parent` is called with an empty stack by fabricating a new default ElemInfo. Normally, this will never happen with valid html, but in the event that someone needs to serialize a token stream representing a malformed tree, they can now do so. Fixes #292
2 parents 6e5ef0d + 703d141 commit 84a8ea8

File tree

2 files changed

+126
-12
lines changed

2 files changed

+126
-12
lines changed

html5ever/src/serialize/mod.rs

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,17 +26,26 @@ pub struct SerializeOpts {
2626

2727
/// Serialize the root node? Default: ChildrenOnly
2828
pub traversal_scope: TraversalScope,
29+
30+
/// If the serializer is asked to serialize an invalid tree, the default
31+
/// behavior is to panic in the event that an `end_elem` is created without a
32+
/// matching `start_elem`. Setting this to true will prevent those panics by
33+
/// creating a default parent on the element stack. No extra start elem will
34+
/// actually be written. Default: false
35+
pub create_missing_parent: bool,
2936
}
3037

3138
impl Default for SerializeOpts {
3239
fn default() -> SerializeOpts {
3340
SerializeOpts {
3441
scripting_enabled: true,
3542
traversal_scope: TraversalScope::ChildrenOnly,
43+
create_missing_parent: false,
3644
}
3745
}
3846
}
3947

48+
#[derive(Default)]
4049
struct ElemInfo {
4150
html_name: Option<LocalName>,
4251
ignore_children: bool,
@@ -66,16 +75,20 @@ impl<Wr: Write> HtmlSerializer<Wr> {
6675
HtmlSerializer {
6776
writer: writer,
6877
opts: opts,
69-
stack: vec!(ElemInfo {
70-
html_name: None,
71-
ignore_children: false,
72-
processed_first_child: false,
73-
}),
78+
stack: vec![Default::default()],
7479
}
7580
}
7681

7782
fn parent(&mut self) -> &mut ElemInfo {
78-
self.stack.last_mut().expect("no parent ElemInfo")
83+
if self.stack.len() == 0 {
84+
if self.opts.create_missing_parent {
85+
warn!("ElemInfo stack empty, creating new parent");
86+
self.stack.push(Default::default());
87+
} else {
88+
panic!("no parent ElemInfo")
89+
}
90+
}
91+
self.stack.last_mut().unwrap()
7992
}
8093

8194
fn write_escaped(&mut self, text: &str, attr_mode: bool) -> io::Result<()> {
@@ -159,7 +172,14 @@ impl<Wr: Write> Serializer for HtmlSerializer<Wr> {
159172
}
160173

161174
fn end_elem(&mut self, name: QualName) -> io::Result<()> {
162-
let info = self.stack.pop().expect("no ElemInfo");
175+
let info = match self.stack.pop() {
176+
Some(info) => info,
177+
None if self.opts.create_missing_parent => {
178+
warn!("missing ElemInfo, creating default.");
179+
Default::default()
180+
}
181+
_ => panic!("no ElemInfo"),
182+
};
163183
if info.ignore_children {
164184
return Ok(());
165185
}

html5ever/tests/serializer.rs

Lines changed: 99 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,84 @@ use html5ever::{parse_fragment, parse_document, serialize, QualName};
1515
use html5ever::driver::ParseOpts;
1616
use html5ever::rcdom::RcDom;
1717
use html5ever::tendril::{StrTendril, SliceExt, TendrilSink};
18+
use html5ever::tokenizer::{Token, TokenSink, TokenSinkResult, TagKind, Tokenizer};
19+
use html5ever::serialize::{Serialize, Serializer, TraversalScope, SerializeOpts};
20+
21+
use std::io;
22+
23+
struct Tokens(Vec<Token>);
24+
25+
impl TokenSink for Tokens {
26+
type Handle = ();
27+
28+
fn process_token(&mut self, token: Token, _: u64) -> TokenSinkResult<()> {
29+
self.0.push(token);
30+
TokenSinkResult::Continue
31+
}
32+
}
33+
34+
impl Serialize for Tokens {
35+
fn serialize<S>(&self, serializer: &mut S, _: TraversalScope) -> io::Result<()>
36+
where
37+
S: Serializer,
38+
{
39+
for t in self.0.iter() {
40+
match t { // TODO: check whether this is an IE conditional comment or a spec comment
41+
&Token::TagToken(ref tag) => {
42+
let name = QualName::new(
43+
None,
44+
"http://www.w3.org/1999/xhtml".into(),
45+
tag.name.as_ref().into(),
46+
);
47+
match tag.kind {
48+
TagKind::StartTag => {
49+
serializer.start_elem(
50+
name,
51+
tag.attrs.iter().map(
52+
|at| (&at.name, &at.value[..]),
53+
),
54+
)?
55+
}
56+
TagKind::EndTag => serializer.end_elem(name)?,
57+
}
58+
}
59+
&Token::DoctypeToken(ref dt) => {
60+
match dt.name {
61+
Some(ref name) => serializer.write_doctype(&name)?,
62+
None => {}
63+
}
64+
}
65+
&Token::CommentToken(ref chars) => serializer.write_comment(&chars)?,
66+
&Token::CharacterTokens(ref chars) => serializer.write_text(&chars)?,
67+
&Token::NullCharacterToken |
68+
&Token::EOFToken => {}
69+
&Token::ParseError(ref e) => println!("parse error: {:#?}", e),
70+
}
71+
}
72+
Ok(())
73+
}
74+
}
75+
76+
fn tokenize_and_serialize(input: StrTendril) -> StrTendril {
77+
let mut input = {
78+
let mut q = ::html5ever::tokenizer::BufferQueue::new();
79+
q.push_front(input.into());
80+
q
81+
};
82+
let mut tokenizer = Tokenizer::new(Tokens(vec![]), Default::default());
83+
tokenizer.feed(&mut input);
84+
tokenizer.end();
85+
let mut output = ::std::io::Cursor::new(vec![]);
86+
serialize(
87+
&mut output,
88+
&tokenizer.sink,
89+
SerializeOpts {
90+
create_missing_parent: true,
91+
..Default::default()
92+
},
93+
).unwrap();
94+
StrTendril::try_from_byte_slice(&output.into_inner()).unwrap()
95+
}
1896

1997
fn parse_and_serialize(input: StrTendril) -> StrTendril {
2098
let dom = parse_fragment(
@@ -28,20 +106,34 @@ fn parse_and_serialize(input: StrTendril) -> StrTendril {
28106
StrTendril::try_from_byte_slice(&result).unwrap()
29107
}
30108

31-
macro_rules! test {
32-
($name:ident, $input:expr, $output:expr) => {
109+
macro_rules! test_fn {
110+
($f:ident, $name:ident, $input:expr, $output:expr) => {
33111
#[test]
34112
fn $name() {
35-
assert_eq!($output, &*parse_and_serialize($input.to_tendril()));
113+
assert_eq!($output, &*$f($input.to_tendril()));
36114
}
37115
};
38116

39117
// Shorthand for $output = $input
40-
($name:ident, $input:expr) => {
41-
test!($name, $input, $input);
118+
($f:ident, $name:ident, $input:expr) => {
119+
test_fn!($f, $name, $input, $input);
120+
};
121+
}
122+
123+
macro_rules! test {
124+
($($t:tt)*) => {
125+
test_fn!(parse_and_serialize, $($t)*);
42126
};
43127
}
44128

129+
macro_rules! test_no_parse {
130+
($($t:tt)*) => {
131+
test_fn!(tokenize_and_serialize, $($t)*);
132+
};
133+
}
134+
135+
136+
45137
test!(empty, r#""#);
46138
test!(smoke_test, r#"<p><i>Hello</i>, World!</p>"#);
47139

@@ -96,6 +188,8 @@ test!(attr_ns_2, r#"<svg xmlns:foo="bleh"></svg>"#);
96188
test!(attr_ns_3, r#"<svg xmlns:xlink="bleh"></svg>"#);
97189
test!(attr_ns_4, r#"<svg xlink:href="bleh"></svg>"#);
98190

191+
test_no_parse!(malformed_tokens, r#"foo</div><div>"#);
192+
99193
#[test]
100194
fn doctype() {
101195
let dom = parse_document(

0 commit comments

Comments
 (0)