1
1
use super :: {
2
+ attributes:: Attributes ,
2
3
document:: Document ,
3
4
node:: { ElementData , NodeData , NodeId } ,
4
5
} ;
5
- use html5ever:: {
6
- local_name,
7
- serialize:: { serialize, Serialize , SerializeOpts , Serializer , TraversalScope } ,
8
- } ;
6
+ use html5ever:: { local_name, namespace_url, ns, LocalName , QualName } ;
9
7
use std:: { io, io:: Write } ;
10
8
11
9
pub ( crate ) fn serialize_to < W : Write > (
@@ -20,7 +18,8 @@ pub(crate) fn serialize_to<W: Write>(
20
18
keep_style_tags,
21
19
keep_link_tags,
22
20
) ;
23
- serialize ( writer, & sink, SerializeOpts :: default ( ) )
21
+ let mut ser = HtmlSerializer :: new ( writer) ;
22
+ sink. serialize ( & mut ser)
24
23
}
25
24
26
25
/// Intermediary structure for serializing an HTML document.
@@ -70,37 +69,25 @@ impl<'a> Sink<'a> {
70
69
false
71
70
}
72
71
}
73
- fn serialize_children < S : Serializer > ( & self , serializer : & mut S ) -> io:: Result < ( ) > {
72
+
73
+ fn serialize_children < W : Write > ( & self , serializer : & mut HtmlSerializer < W > ) -> io:: Result < ( ) > {
74
74
for child in self . document . children ( self . node ) {
75
- self . for_node ( child)
76
- . serialize ( serializer, TraversalScope :: IncludeNode ) ?;
75
+ self . for_node ( child) . serialize ( serializer) ?;
77
76
}
78
77
Ok ( ( ) )
79
78
}
80
- }
81
79
82
- impl < ' a > Serialize for Sink < ' a > {
83
- fn serialize < S > ( & self , serializer : & mut S , _: TraversalScope ) -> io:: Result < ( ) >
84
- where
85
- S : Serializer ,
86
- {
80
+ fn serialize < W : Write > ( & self , serializer : & mut HtmlSerializer < W > ) -> io:: Result < ( ) > {
87
81
match self . data ( ) {
88
82
NodeData :: Element { element, .. } => {
89
83
if self . should_skip_element ( element) {
90
84
return Ok ( ( ) ) ;
91
85
}
92
- serializer. start_elem (
93
- element. name . clone ( ) ,
94
- element
95
- . attributes
96
- . map
97
- . iter ( )
98
- . map ( |( key, value) | ( key, & * * value) ) ,
99
- ) ?;
86
+ serializer. start_elem ( & element. name , & element. attributes ) ?;
100
87
101
88
self . serialize_children ( serializer) ?;
102
89
103
- serializer. end_elem ( element. name . clone ( ) ) ?;
90
+ serializer. end_elem ( & element. name ) ?;
104
91
Ok ( ( ) )
105
92
}
106
93
NodeData :: Document => self . serialize_children ( serializer) ,
@@ -114,6 +101,197 @@ impl<'a> Serialize for Sink<'a> {
114
101
}
115
102
}
116
103
104
+ #[ derive( Default ) ]
105
+ struct ElemInfo {
106
+ html_name : Option < LocalName > ,
107
+ ignore_children : bool ,
108
+ }
109
+
110
+ /// Inspired by HTML serializer from `html5ever`
111
+ /// Source: <https://github.com/servo/html5ever/blob/98d3c0cd01471af997cd60849a38da45a9414dfd/html5ever/src/serialize/mod.rs#L77>
112
+ struct HtmlSerializer < Wr : Write > {
113
+ writer : Wr ,
114
+ stack : Vec < ElemInfo > ,
115
+ }
116
+
117
+ impl < W : Write > HtmlSerializer < W > {
118
+ fn new ( writer : W ) -> Self {
119
+ HtmlSerializer {
120
+ writer,
121
+ stack : vec ! [ ElemInfo {
122
+ html_name: None ,
123
+ ignore_children: false ,
124
+ } ] ,
125
+ }
126
+ }
127
+
128
+ fn parent ( & mut self ) -> & mut ElemInfo {
129
+ self . stack . last_mut ( ) . expect ( "no parent ElemInfo" )
130
+ }
131
+
132
+ fn write_escaped ( & mut self , text : & str ) -> io:: Result < ( ) > {
133
+ // UTF-8 characters are maximum 4 bytes wide.
134
+ let mut buffer = [ 0u8 ; 4 ] ;
135
+ for c in text. chars ( ) {
136
+ match c {
137
+ '&' => self . writer . write_all ( b"&" ) ,
138
+ '\u{00A0}' => self . writer . write_all ( b" " ) ,
139
+ '<' => self . writer . write_all ( b"<" ) ,
140
+ '>' => self . writer . write_all ( b">" ) ,
141
+ c => {
142
+ let slice = c. encode_utf8 ( & mut buffer) ;
143
+ self . writer . write_all ( slice. as_bytes ( ) )
144
+ }
145
+ } ?;
146
+ }
147
+ Ok ( ( ) )
148
+ }
149
+
150
+ fn write_attributes ( & mut self , text : & str ) -> io:: Result < ( ) > {
151
+ // UTF-8 characters are maximum 4 bytes wide.
152
+ let mut buffer = [ 0u8 ; 4 ] ;
153
+ for c in text. chars ( ) {
154
+ match c {
155
+ '&' => self . writer . write_all ( b"&" ) ,
156
+ '\u{00A0}' => self . writer . write_all ( b" " ) ,
157
+ '"' => self . writer . write_all ( b""" ) ,
158
+ c => {
159
+ let slice = c. encode_utf8 ( & mut buffer) ;
160
+ self . writer . write_all ( slice. as_bytes ( ) )
161
+ }
162
+ } ?;
163
+ }
164
+ Ok ( ( ) )
165
+ }
166
+
167
+ fn start_elem ( & mut self , name : & QualName , attrs : & Attributes ) -> io:: Result < ( ) > {
168
+ let html_name = match name. ns {
169
+ ns ! ( html) => Some ( name. local . clone ( ) ) ,
170
+ _ => None ,
171
+ } ;
172
+
173
+ if self . parent ( ) . ignore_children {
174
+ self . stack . push ( ElemInfo {
175
+ html_name,
176
+ ignore_children : true ,
177
+ } ) ;
178
+ return Ok ( ( ) ) ;
179
+ }
180
+
181
+ self . writer . write_all ( b"<" ) ?;
182
+ self . writer . write_all ( name. local . as_bytes ( ) ) ?;
183
+ for ( name, value) in & attrs. map {
184
+ self . writer . write_all ( b" " ) ?;
185
+
186
+ match name. ns {
187
+ ns ! ( ) => ( ) ,
188
+ ns ! ( xml) => self . writer . write_all ( b"xml:" ) ?,
189
+ ns ! ( xmlns) => {
190
+ if name. local != local_name ! ( "xmlns" ) {
191
+ self . writer . write_all ( b"xmlns:" ) ?;
192
+ }
193
+ }
194
+ ns ! ( xlink) => self . writer . write_all ( b"xlink:" ) ?,
195
+ _ => {
196
+ self . writer . write_all ( b"unknown_namespace:" ) ?;
197
+ }
198
+ }
199
+
200
+ self . writer . write_all ( name. local . as_bytes ( ) ) ?;
201
+ self . writer . write_all ( b"=\" " ) ?;
202
+ self . write_attributes ( value) ?;
203
+ self . writer . write_all ( b"\" " ) ?;
204
+ }
205
+ self . writer . write_all ( b">" ) ?;
206
+
207
+ let ignore_children = name. ns == ns ! ( html)
208
+ && matches ! (
209
+ name. local,
210
+ local_name!( "area" )
211
+ | local_name!( "base" )
212
+ | local_name!( "basefont" )
213
+ | local_name!( "bgsound" )
214
+ | local_name!( "br" )
215
+ | local_name!( "col" )
216
+ | local_name!( "embed" )
217
+ | local_name!( "frame" )
218
+ | local_name!( "hr" )
219
+ | local_name!( "img" )
220
+ | local_name!( "input" )
221
+ | local_name!( "keygen" )
222
+ | local_name!( "link" )
223
+ | local_name!( "meta" )
224
+ | local_name!( "param" )
225
+ | local_name!( "source" )
226
+ | local_name!( "track" )
227
+ | local_name!( "wbr" )
228
+ ) ;
229
+
230
+ self . stack . push ( ElemInfo {
231
+ html_name,
232
+ ignore_children,
233
+ } ) ;
234
+
235
+ Ok ( ( ) )
236
+ }
237
+
238
+ fn end_elem ( & mut self , name : & QualName ) -> io:: Result < ( ) > {
239
+ let info = match self . stack . pop ( ) {
240
+ Some ( info) => info,
241
+ _ => panic ! ( "no ElemInfo" ) ,
242
+ } ;
243
+ if info. ignore_children {
244
+ return Ok ( ( ) ) ;
245
+ }
246
+
247
+ self . writer . write_all ( b"</" ) ?;
248
+ self . writer . write_all ( name. local . as_bytes ( ) ) ?;
249
+ self . writer . write_all ( b">" )
250
+ }
251
+
252
+ fn write_text ( & mut self , text : & str ) -> io:: Result < ( ) > {
253
+ let escape = !matches ! (
254
+ self . parent( ) . html_name,
255
+ Some (
256
+ local_name!( "style" )
257
+ | local_name!( "script" )
258
+ | local_name!( "xmp" )
259
+ | local_name!( "iframe" )
260
+ | local_name!( "noembed" )
261
+ | local_name!( "noframes" )
262
+ | local_name!( "plaintext" )
263
+ | local_name!( "noscript" )
264
+ ) ,
265
+ ) ;
266
+
267
+ if escape {
268
+ self . write_escaped ( text)
269
+ } else {
270
+ self . writer . write_all ( text. as_bytes ( ) )
271
+ }
272
+ }
273
+
274
+ fn write_comment ( & mut self , text : & str ) -> io:: Result < ( ) > {
275
+ self . writer . write_all ( b"<!--" ) ?;
276
+ self . writer . write_all ( text. as_bytes ( ) ) ?;
277
+ self . writer . write_all ( b"-->" )
278
+ }
279
+
280
+ fn write_doctype ( & mut self , name : & str ) -> io:: Result < ( ) > {
281
+ self . writer . write_all ( b"<!DOCTYPE " ) ?;
282
+ self . writer . write_all ( name. as_bytes ( ) ) ?;
283
+ self . writer . write_all ( b">" )
284
+ }
285
+
286
+ fn write_processing_instruction ( & mut self , target : & str , data : & str ) -> io:: Result < ( ) > {
287
+ self . writer . write_all ( b"<?" ) ?;
288
+ self . writer . write_all ( target. as_bytes ( ) ) ?;
289
+ self . writer . write_all ( b" " ) ?;
290
+ self . writer . write_all ( data. as_bytes ( ) ) ?;
291
+ self . writer . write_all ( b">" )
292
+ }
293
+ }
294
+
117
295
#[ cfg( test) ]
118
296
mod tests {
119
297
use super :: Document ;
0 commit comments