@@ -15,6 +15,84 @@ use html5ever::{parse_fragment, parse_document, serialize, QualName};
15
15
use html5ever:: driver:: ParseOpts ;
16
16
use html5ever:: rcdom:: RcDom ;
17
17
use html5ever:: tendril:: { StrTendril , SliceExt , TendrilSink } ;
18
+ use html5ever:: tokenizer:: { Token , TokenSink , TokenSinkResult , TagKind , Tokenizer } ;
19
+ use html5ever:: serialize:: { Serialize , Serializer , TraversalScope , SerializeOpts } ;
20
+
21
+ use std:: io;
22
+
23
+ struct Tokens ( Vec < Token > ) ;
24
+
25
+ impl TokenSink for Tokens {
26
+ type Handle = ( ) ;
27
+
28
+ fn process_token ( & mut self , token : Token , _: u64 ) -> TokenSinkResult < ( ) > {
29
+ self . 0 . push ( token) ;
30
+ TokenSinkResult :: Continue
31
+ }
32
+ }
33
+
34
+ impl Serialize for Tokens {
35
+ fn serialize < S > ( & self , serializer : & mut S , _: TraversalScope ) -> io:: Result < ( ) >
36
+ where
37
+ S : Serializer ,
38
+ {
39
+ for t in self . 0 . iter ( ) {
40
+ match t { // TODO: check whether this is an IE conditional comment or a spec comment
41
+ & Token :: TagToken ( ref tag) => {
42
+ let name = QualName :: new (
43
+ None ,
44
+ "http://www.w3.org/1999/xhtml" . into ( ) ,
45
+ tag. name . as_ref ( ) . into ( ) ,
46
+ ) ;
47
+ match tag. kind {
48
+ TagKind :: StartTag => {
49
+ serializer. start_elem (
50
+ name,
51
+ tag. attrs . iter ( ) . map (
52
+ |at| ( & at. name , & at. value [ ..] ) ,
53
+ ) ,
54
+ ) ?
55
+ }
56
+ TagKind :: EndTag => serializer. end_elem ( name) ?,
57
+ }
58
+ }
59
+ & Token :: DoctypeToken ( ref dt) => {
60
+ match dt. name {
61
+ Some ( ref name) => serializer. write_doctype ( & name) ?,
62
+ None => { }
63
+ }
64
+ }
65
+ & Token :: CommentToken ( ref chars) => serializer. write_comment ( & chars) ?,
66
+ & Token :: CharacterTokens ( ref chars) => serializer. write_text ( & chars) ?,
67
+ & Token :: NullCharacterToken |
68
+ & Token :: EOFToken => { }
69
+ & Token :: ParseError ( ref e) => println ! ( "parse error: {:#?}" , e) ,
70
+ }
71
+ }
72
+ Ok ( ( ) )
73
+ }
74
+ }
75
+
76
+ fn tokenize_and_serialize ( input : StrTendril ) -> StrTendril {
77
+ let mut input = {
78
+ let mut q = :: html5ever:: tokenizer:: BufferQueue :: new ( ) ;
79
+ q. push_front ( input. into ( ) ) ;
80
+ q
81
+ } ;
82
+ let mut tokenizer = Tokenizer :: new ( Tokens ( vec ! [ ] ) , Default :: default ( ) ) ;
83
+ tokenizer. feed ( & mut input) ;
84
+ tokenizer. end ( ) ;
85
+ let mut output = :: std:: io:: Cursor :: new ( vec ! [ ] ) ;
86
+ serialize (
87
+ & mut output,
88
+ & tokenizer. sink ,
89
+ SerializeOpts {
90
+ create_missing_parent : true ,
91
+ ..Default :: default ( )
92
+ } ,
93
+ ) . unwrap ( ) ;
94
+ StrTendril :: try_from_byte_slice ( & output. into_inner ( ) ) . unwrap ( )
95
+ }
18
96
19
97
fn parse_and_serialize ( input : StrTendril ) -> StrTendril {
20
98
let dom = parse_fragment (
@@ -28,20 +106,34 @@ fn parse_and_serialize(input: StrTendril) -> StrTendril {
28
106
StrTendril :: try_from_byte_slice ( & result) . unwrap ( )
29
107
}
30
108
31
- macro_rules! test {
32
- ( $name: ident, $input: expr, $output: expr) => {
109
+ macro_rules! test_fn {
110
+ ( $f : ident , $ name: ident, $input: expr, $output: expr) => {
33
111
#[ test]
34
112
fn $name( ) {
35
- assert_eq!( $output, & * parse_and_serialize ( $input. to_tendril( ) ) ) ;
113
+ assert_eq!( $output, & * $f ( $input. to_tendril( ) ) ) ;
36
114
}
37
115
} ;
38
116
39
117
// Shorthand for $output = $input
40
- ( $name: ident, $input: expr) => {
41
- test!( $name, $input, $input) ;
118
+ ( $f: ident, $name: ident, $input: expr) => {
119
+ test_fn!( $f, $name, $input, $input) ;
120
+ } ;
121
+ }
122
+
123
+ macro_rules! test {
124
+ ( $( $t: tt) * ) => {
125
+ test_fn!( parse_and_serialize, $( $t) * ) ;
42
126
} ;
43
127
}
44
128
129
+ macro_rules! test_no_parse {
130
+ ( $( $t: tt) * ) => {
131
+ test_fn!( tokenize_and_serialize, $( $t) * ) ;
132
+ } ;
133
+ }
134
+
135
+
136
+
45
137
test ! ( empty, r#""# ) ;
46
138
test ! ( smoke_test, r#"<p><i>Hello</i>, World!</p>"# ) ;
47
139
@@ -96,6 +188,8 @@ test!(attr_ns_2, r#"<svg xmlns:foo="bleh"></svg>"#);
96
188
test ! ( attr_ns_3, r#"<svg xmlns:xlink="bleh"></svg>"# ) ;
97
189
test ! ( attr_ns_4, r#"<svg xlink:href="bleh"></svg>"# ) ;
98
190
191
+ test_no_parse ! ( malformed_tokens, r#"foo</div><div>"# ) ;
192
+
99
193
#[ test]
100
194
fn doctype ( ) {
101
195
let dom = parse_document (
0 commit comments