Skip to content

Commit e43440b

Browse files
committed
Add test for malformed token stream serialization
1 parent 6e5ef0d commit e43440b

File tree

1 file changed

+92
-5
lines changed

1 file changed

+92
-5
lines changed

html5ever/tests/serializer.rs

Lines changed: 92 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,77 @@ use html5ever::{parse_fragment, parse_document, serialize, QualName};
1515
use html5ever::driver::ParseOpts;
1616
use html5ever::rcdom::RcDom;
1717
use html5ever::tendril::{StrTendril, SliceExt, TendrilSink};
18+
use html5ever::tokenizer::{Token, TokenSink, TokenSinkResult, TagKind, Tokenizer};
19+
use html5ever::serialize::{Serialize, Serializer, TraversalScope};
20+
21+
use std::io;
22+
23+
struct Tokens(Vec<Token>);
24+
25+
impl TokenSink for Tokens {
26+
type Handle = ();
27+
28+
fn process_token(&mut self, token: Token, _: u64) -> TokenSinkResult<()> {
29+
self.0.push(token);
30+
TokenSinkResult::Continue
31+
}
32+
}
33+
34+
impl Serialize for Tokens {
35+
fn serialize<S>(&self, serializer: &mut S, _: TraversalScope) -> io::Result<()>
36+
where
37+
S: Serializer,
38+
{
39+
for t in self.0.iter() {
40+
match t { // TODO: check whether this is an IE conditional comment or a spec comment
41+
&Token::TagToken(ref tag) => {
42+
let name = QualName::new(
43+
None,
44+
"http://www.w3.org/1999/xhtml".into(),
45+
tag.name.as_ref().into(),
46+
);
47+
match tag.kind {
48+
TagKind::StartTag => {
49+
serializer.start_elem(
50+
name,
51+
tag.attrs.iter().map(
52+
|at| (&at.name, &at.value[..]),
53+
),
54+
)?
55+
}
56+
TagKind::EndTag => serializer.end_elem(name)?,
57+
}
58+
}
59+
&Token::DoctypeToken(ref dt) => {
60+
match dt.name {
61+
Some(ref name) => serializer.write_doctype(&name)?,
62+
None => {}
63+
}
64+
}
65+
&Token::CommentToken(ref chars) => serializer.write_comment(&chars)?,
66+
&Token::CharacterTokens(ref chars) => serializer.write_text(&chars)?,
67+
&Token::NullCharacterToken |
68+
&Token::EOFToken => {}
69+
&Token::ParseError(ref e) => println!("parse error: {:#?}", e),
70+
}
71+
}
72+
Ok(())
73+
}
74+
}
75+
76+
fn tokenize_and_serialize(input: StrTendril) -> StrTendril {
77+
let mut input = {
78+
let mut q = ::html5ever::tokenizer::BufferQueue::new();
79+
q.push_front(input.into());
80+
q
81+
};
82+
let mut tokenizer = Tokenizer::new(Tokens(vec![]), Default::default());
83+
tokenizer.feed(&mut input);
84+
tokenizer.end();
85+
let mut output = ::std::io::Cursor::new(vec![]);
86+
serialize(&mut output, &tokenizer.sink, Default::default()).unwrap();
87+
StrTendril::try_from_byte_slice(&output.into_inner()).unwrap()
88+
}
1889

1990
fn parse_and_serialize(input: StrTendril) -> StrTendril {
2091
let dom = parse_fragment(
@@ -28,20 +99,34 @@ fn parse_and_serialize(input: StrTendril) -> StrTendril {
2899
StrTendril::try_from_byte_slice(&result).unwrap()
29100
}
30101

31-
macro_rules! test {
32-
($name:ident, $input:expr, $output:expr) => {
102+
macro_rules! test_fn {
103+
($f:ident, $name:ident, $input:expr, $output:expr) => {
33104
#[test]
34105
fn $name() {
35-
assert_eq!($output, &*parse_and_serialize($input.to_tendril()));
106+
assert_eq!($output, &*$f($input.to_tendril()));
36107
}
37108
};
38109

39110
// Shorthand for $output = $input
40-
($name:ident, $input:expr) => {
41-
test!($name, $input, $input);
111+
($f:ident, $name:ident, $input:expr) => {
112+
test_fn!($f, $name, $input, $input);
113+
};
114+
}
115+
116+
macro_rules! test {
117+
($($t:tt)*) => {
118+
test_fn!(parse_and_serialize, $($t)*);
42119
};
43120
}
44121

122+
macro_rules! test_no_parse {
123+
($($t:tt)*) => {
124+
test_fn!(tokenize_and_serialize, $($t)*);
125+
};
126+
}
127+
128+
129+
45130
test!(empty, r#""#);
46131
test!(smoke_test, r#"<p><i>Hello</i>, World!</p>"#);
47132

@@ -96,6 +181,8 @@ test!(attr_ns_2, r#"<svg xmlns:foo="bleh"></svg>"#);
96181
test!(attr_ns_3, r#"<svg xmlns:xlink="bleh"></svg>"#);
97182
test!(attr_ns_4, r#"<svg xlink:href="bleh"></svg>"#);
98183

184+
test_no_parse!(malformed_tokens, r#"foo</div><div>"#);
185+
99186
#[test]
100187
fn doctype() {
101188
let dom = parse_document(

0 commit comments

Comments
 (0)