@@ -15,6 +15,77 @@ use html5ever::{parse_fragment, parse_document, serialize, QualName};
15
15
use html5ever:: driver:: ParseOpts ;
16
16
use html5ever:: rcdom:: RcDom ;
17
17
use html5ever:: tendril:: { StrTendril , SliceExt , TendrilSink } ;
18
+ use html5ever:: tokenizer:: { Token , TokenSink , TokenSinkResult , TagKind , Tokenizer } ;
19
+ use html5ever:: serialize:: { Serialize , Serializer , TraversalScope } ;
20
+
21
+ use std:: io;
22
+
23
+ struct Tokens ( Vec < Token > ) ;
24
+
25
+ impl TokenSink for Tokens {
26
+ type Handle = ( ) ;
27
+
28
+ fn process_token ( & mut self , token : Token , _: u64 ) -> TokenSinkResult < ( ) > {
29
+ self . 0 . push ( token) ;
30
+ TokenSinkResult :: Continue
31
+ }
32
+ }
33
+
34
+ impl Serialize for Tokens {
35
+ fn serialize < S > ( & self , serializer : & mut S , _: TraversalScope ) -> io:: Result < ( ) >
36
+ where
37
+ S : Serializer ,
38
+ {
39
+ for t in self . 0 . iter ( ) {
40
+ match t { // TODO: check whether this is an IE conditional comment or a spec comment
41
+ & Token :: TagToken ( ref tag) => {
42
+ let name = QualName :: new (
43
+ None ,
44
+ "http://www.w3.org/1999/xhtml" . into ( ) ,
45
+ tag. name . as_ref ( ) . into ( ) ,
46
+ ) ;
47
+ match tag. kind {
48
+ TagKind :: StartTag => {
49
+ serializer. start_elem (
50
+ name,
51
+ tag. attrs . iter ( ) . map (
52
+ |at| ( & at. name , & at. value [ ..] ) ,
53
+ ) ,
54
+ ) ?
55
+ }
56
+ TagKind :: EndTag => serializer. end_elem ( name) ?,
57
+ }
58
+ }
59
+ & Token :: DoctypeToken ( ref dt) => {
60
+ match dt. name {
61
+ Some ( ref name) => serializer. write_doctype ( & name) ?,
62
+ None => { }
63
+ }
64
+ }
65
+ & Token :: CommentToken ( ref chars) => serializer. write_comment ( & chars) ?,
66
+ & Token :: CharacterTokens ( ref chars) => serializer. write_text ( & chars) ?,
67
+ & Token :: NullCharacterToken |
68
+ & Token :: EOFToken => { }
69
+ & Token :: ParseError ( ref e) => println ! ( "parse error: {:#?}" , e) ,
70
+ }
71
+ }
72
+ Ok ( ( ) )
73
+ }
74
+ }
75
+
76
+ fn tokenize_and_serialize ( input : StrTendril ) -> StrTendril {
77
+ let mut input = {
78
+ let mut q = :: html5ever:: tokenizer:: BufferQueue :: new ( ) ;
79
+ q. push_front ( input. into ( ) ) ;
80
+ q
81
+ } ;
82
+ let mut tokenizer = Tokenizer :: new ( Tokens ( vec ! [ ] ) , Default :: default ( ) ) ;
83
+ tokenizer. feed ( & mut input) ;
84
+ tokenizer. end ( ) ;
85
+ let mut output = :: std:: io:: Cursor :: new ( vec ! [ ] ) ;
86
+ serialize ( & mut output, & tokenizer. sink , Default :: default ( ) ) . unwrap ( ) ;
87
+ StrTendril :: try_from_byte_slice ( & output. into_inner ( ) ) . unwrap ( )
88
+ }
18
89
19
90
fn parse_and_serialize ( input : StrTendril ) -> StrTendril {
20
91
let dom = parse_fragment (
@@ -28,20 +99,34 @@ fn parse_and_serialize(input: StrTendril) -> StrTendril {
28
99
StrTendril :: try_from_byte_slice ( & result) . unwrap ( )
29
100
}
30
101
31
- macro_rules! test {
32
- ( $name: ident, $input: expr, $output: expr) => {
102
+ macro_rules! test_fn {
103
+ ( $f : ident , $ name: ident, $input: expr, $output: expr) => {
33
104
#[ test]
34
105
fn $name( ) {
35
- assert_eq!( $output, & * parse_and_serialize ( $input. to_tendril( ) ) ) ;
106
+ assert_eq!( $output, & * $f ( $input. to_tendril( ) ) ) ;
36
107
}
37
108
} ;
38
109
39
110
// Shorthand for $output = $input
40
- ( $name: ident, $input: expr) => {
41
- test!( $name, $input, $input) ;
111
+ ( $f: ident, $name: ident, $input: expr) => {
112
+ test_fn!( $f, $name, $input, $input) ;
113
+ } ;
114
+ }
115
+
116
+ macro_rules! test {
117
+ ( $( $t: tt) * ) => {
118
+ test_fn!( parse_and_serialize, $( $t) * ) ;
42
119
} ;
43
120
}
44
121
122
+ macro_rules! test_no_parse {
123
+ ( $( $t: tt) * ) => {
124
+ test_fn!( tokenize_and_serialize, $( $t) * ) ;
125
+ } ;
126
+ }
127
+
128
+
129
+
45
130
test ! ( empty, r#""# ) ;
46
131
test ! ( smoke_test, r#"<p><i>Hello</i>, World!</p>"# ) ;
47
132
@@ -96,6 +181,8 @@ test!(attr_ns_2, r#"<svg xmlns:foo="bleh"></svg>"#);
96
181
test ! ( attr_ns_3, r#"<svg xmlns:xlink="bleh"></svg>"# ) ;
97
182
test ! ( attr_ns_4, r#"<svg xlink:href="bleh"></svg>"# ) ;
98
183
184
+ test_no_parse ! ( malformed_tokens, r#"foo</div><div>"# ) ;
185
+
99
186
#[ test]
100
187
fn doctype ( ) {
101
188
let dom = parse_document (
0 commit comments