@@ -14,6 +14,7 @@ use std::borrow::Cow::Borrowed;
14
14
use super :: { XmlTokenizer , TokenSink } ;
15
15
use tendril:: StrTendril ;
16
16
use util:: { is_ascii_alnum} ;
17
+ use markup5ever:: buffer_queue:: BufferQueue ;
17
18
18
19
pub use self :: Status :: * ;
19
20
use self :: State :: * ;
@@ -112,31 +113,35 @@ impl CharRefTokenizer {
112
113
}
113
114
114
115
impl CharRefTokenizer {
115
- pub fn step < Sink : TokenSink > ( & mut self , tokenizer : & mut XmlTokenizer < Sink > ) -> Status {
116
+ pub fn step < Sink : TokenSink > (
117
+ & mut self ,
118
+ tokenizer : & mut XmlTokenizer < Sink > ,
119
+ input : & mut BufferQueue )
120
+ -> Status {
116
121
if self . result . is_some ( ) {
117
122
return Done ;
118
123
}
119
124
120
125
debug ! ( "char ref tokenizer stepping in state {:?}" , self . state) ;
121
126
match self . state {
122
- Begin => self . do_begin ( tokenizer) ,
123
- Octothorpe => self . do_octothorpe ( tokenizer) ,
124
- Numeric ( base) => self . do_numeric ( tokenizer, base) ,
125
- NumericSemicolon => self . do_numeric_semicolon ( tokenizer) ,
126
- Named => self . do_named ( tokenizer) ,
127
- BogusName => self . do_bogus_name ( tokenizer) ,
127
+ Begin => self . do_begin ( tokenizer, input ) ,
128
+ Octothorpe => self . do_octothorpe ( tokenizer, input ) ,
129
+ Numeric ( base) => self . do_numeric ( tokenizer, base, input ) ,
130
+ NumericSemicolon => self . do_numeric_semicolon ( tokenizer, input ) ,
131
+ Named => self . do_named ( tokenizer, input ) ,
132
+ BogusName => self . do_bogus_name ( tokenizer, input ) ,
128
133
}
129
134
}
130
135
131
- fn do_begin < Sink : TokenSink > ( & mut self , tokenizer : & mut XmlTokenizer < Sink > ) -> Status {
132
- match unwrap_or_return ! ( tokenizer. peek( ) , Stuck ) {
136
+ fn do_begin < Sink : TokenSink > ( & mut self , tokenizer : & mut XmlTokenizer < Sink > , input : & mut BufferQueue ) -> Status {
137
+ match unwrap_or_return ! ( tokenizer. peek( input ) , Stuck ) {
133
138
'\t' | '\n' | '\x0C' | ' ' | '<' | '&'
134
139
=> self . finish_none ( ) ,
135
140
c if Some ( c) == self . addnl_allowed
136
141
=> self . finish_none ( ) ,
137
142
138
143
'#' => {
139
- tokenizer. discard_char ( ) ;
144
+ tokenizer. discard_char ( input ) ;
140
145
self . state = Octothorpe ;
141
146
Progress
142
147
}
@@ -149,11 +154,11 @@ impl CharRefTokenizer {
149
154
}
150
155
}
151
156
152
- fn do_octothorpe < Sink : TokenSink > ( & mut self , tokenizer : & mut XmlTokenizer < Sink > ) -> Status {
153
- let c = unwrap_or_return ! ( tokenizer. peek( ) , Stuck ) ;
157
+ fn do_octothorpe < Sink : TokenSink > ( & mut self , tokenizer : & mut XmlTokenizer < Sink > , input : & mut BufferQueue ) -> Status {
158
+ let c = unwrap_or_return ! ( tokenizer. peek( input ) , Stuck ) ;
154
159
match c {
155
160
'x' | 'X' => {
156
- tokenizer. discard_char ( ) ;
161
+ tokenizer. discard_char ( input ) ;
157
162
self . hex_marker = Some ( c) ;
158
163
self . state = Numeric ( 16 ) ;
159
164
}
@@ -166,11 +171,11 @@ impl CharRefTokenizer {
166
171
Progress
167
172
}
168
173
169
- fn do_numeric < Sink : TokenSink > ( & mut self , tokenizer : & mut XmlTokenizer < Sink > , base : u32 ) -> Status {
170
- let c = unwrap_or_return ! ( tokenizer. peek( ) , Stuck ) ;
174
+ fn do_numeric < Sink : TokenSink > ( & mut self , tokenizer : & mut XmlTokenizer < Sink > , base : u32 , input : & mut BufferQueue ) -> Status {
175
+ let c = unwrap_or_return ! ( tokenizer. peek( input ) , Stuck ) ;
171
176
match c. to_digit ( base) {
172
177
Some ( n) => {
173
- tokenizer. discard_char ( ) ;
178
+ tokenizer. discard_char ( input ) ;
174
179
self . num = self . num . wrapping_mul ( base) ;
175
180
if self . num > 0x10FFFF {
176
181
// We might overflow, and the character is definitely invalid.
@@ -182,7 +187,7 @@ impl CharRefTokenizer {
182
187
Progress
183
188
}
184
189
185
- None if !self . seen_digit => self . unconsume_numeric ( tokenizer) ,
190
+ None if !self . seen_digit => self . unconsume_numeric ( tokenizer, input ) ,
186
191
187
192
None => {
188
193
self . state = NumericSemicolon ;
@@ -191,22 +196,22 @@ impl CharRefTokenizer {
191
196
}
192
197
}
193
198
194
- fn do_numeric_semicolon < Sink : TokenSink > ( & mut self , tokenizer : & mut XmlTokenizer < Sink > ) -> Status {
195
- match unwrap_or_return ! ( tokenizer. peek( ) , Stuck ) {
196
- ';' => tokenizer. discard_char ( ) ,
199
+ fn do_numeric_semicolon < Sink : TokenSink > ( & mut self , tokenizer : & mut XmlTokenizer < Sink > , input : & mut BufferQueue ) -> Status {
200
+ match unwrap_or_return ! ( tokenizer. peek( input ) , Stuck ) {
201
+ ';' => tokenizer. discard_char ( input ) ,
197
202
_ => tokenizer. emit_error ( Borrowed ( "Semicolon missing after numeric character reference" ) ) ,
198
203
} ;
199
204
self . finish_numeric ( tokenizer)
200
205
}
201
206
202
- fn unconsume_numeric < Sink : TokenSink > ( & mut self , tokenizer : & mut XmlTokenizer < Sink > ) -> Status {
207
+ fn unconsume_numeric < Sink : TokenSink > ( & mut self , tokenizer : & mut XmlTokenizer < Sink > , input : & mut BufferQueue ) -> Status {
203
208
let mut unconsume = StrTendril :: from_char ( '#' ) ;
204
209
match self . hex_marker {
205
210
Some ( c) => unconsume. push_char ( c) ,
206
211
None => ( ) ,
207
212
}
208
213
209
- tokenizer. unconsume ( unconsume) ;
214
+ tokenizer. unconsume ( input , unconsume) ;
210
215
tokenizer. emit_error ( Borrowed ( "Numeric character reference without digits" ) ) ;
211
216
self . finish_none ( )
212
217
}
@@ -244,8 +249,8 @@ impl CharRefTokenizer {
244
249
self . finish_one ( c)
245
250
}
246
251
247
- fn do_named < Sink : TokenSink > ( & mut self , tokenizer : & mut XmlTokenizer < Sink > ) -> Status {
248
- let c = unwrap_or_return ! ( tokenizer. get_char( ) , Stuck ) ;
252
+ fn do_named < Sink : TokenSink > ( & mut self , tokenizer : & mut XmlTokenizer < Sink > , input : & mut BufferQueue ) -> Status {
253
+ let c = unwrap_or_return ! ( tokenizer. get_char( input ) , Stuck ) ;
249
254
self . name_buf_mut ( ) . push_char ( c) ;
250
255
match data:: NAMED_ENTITIES . get ( & self . name_buf ( ) [ ..] ) {
251
256
// We have either a full match or a prefix of one.
@@ -260,7 +265,7 @@ impl CharRefTokenizer {
260
265
}
261
266
262
267
// Can't continue the match.
263
- None => self . finish_named ( tokenizer, Some ( c) ) ,
268
+ None => self . finish_named ( tokenizer, Some ( c) , input ) ,
264
269
}
265
270
}
266
271
@@ -271,13 +276,14 @@ impl CharRefTokenizer {
271
276
tokenizer. emit_error ( msg) ;
272
277
}
273
278
274
- fn unconsume_name < Sink : TokenSink > ( & mut self , tokenizer : & mut XmlTokenizer < Sink > ) {
275
- tokenizer. unconsume ( self . name_buf_opt . take ( ) . unwrap ( ) ) ;
279
+ fn unconsume_name < Sink : TokenSink > ( & mut self , tokenizer : & mut XmlTokenizer < Sink > , input : & mut BufferQueue ) {
280
+ tokenizer. unconsume ( input , self . name_buf_opt . take ( ) . unwrap ( ) ) ;
276
281
}
277
282
278
283
fn finish_named < Sink : TokenSink > ( & mut self ,
279
284
tokenizer : & mut XmlTokenizer < Sink > ,
280
- end_char : Option < char > ) -> Status {
285
+ end_char : Option < char > ,
286
+ input : & mut BufferQueue ) -> Status {
281
287
match self . name_match {
282
288
None => {
283
289
match end_char {
@@ -294,7 +300,7 @@ impl CharRefTokenizer {
294
300
295
301
_ => ( ) ,
296
302
}
297
- self . unconsume_name ( tokenizer) ;
303
+ self . unconsume_name ( tokenizer, input ) ;
298
304
self . finish_none ( )
299
305
}
300
306
@@ -343,10 +349,10 @@ impl CharRefTokenizer {
343
349
} ;
344
350
345
351
if unconsume_all {
346
- self . unconsume_name ( tokenizer) ;
352
+ self . unconsume_name ( tokenizer, input ) ;
347
353
self . finish_none ( )
348
354
} else {
349
- tokenizer. unconsume ( StrTendril :: from_slice ( & self . name_buf ( ) [ name_len..] ) ) ;
355
+ tokenizer. unconsume ( input , StrTendril :: from_slice ( & self . name_buf ( ) [ name_len..] ) ) ;
350
356
self . result = Some ( CharRef {
351
357
chars : [ from_u32 ( c1) . unwrap ( ) , from_u32 ( c2) . unwrap ( ) ] ,
352
358
num_chars : if c2 == 0 { 1 } else { 2 } ,
@@ -357,40 +363,40 @@ impl CharRefTokenizer {
357
363
}
358
364
}
359
365
360
- fn do_bogus_name < Sink : TokenSink > ( & mut self , tokenizer : & mut XmlTokenizer < Sink > ) -> Status {
361
- let c = unwrap_or_return ! ( tokenizer. get_char( ) , Stuck ) ;
366
+ fn do_bogus_name < Sink : TokenSink > ( & mut self , tokenizer : & mut XmlTokenizer < Sink > , input : & mut BufferQueue ) -> Status {
367
+ let c = unwrap_or_return ! ( tokenizer. get_char( input ) , Stuck ) ;
362
368
self . name_buf_mut ( ) . push_char ( c) ;
363
369
match c {
364
370
_ if is_ascii_alnum ( c) => return Progress ,
365
371
';' => self . emit_name_error ( tokenizer) ,
366
372
_ => ( )
367
373
}
368
- self . unconsume_name ( tokenizer) ;
374
+ self . unconsume_name ( tokenizer, input ) ;
369
375
self . finish_none ( )
370
376
}
371
377
372
- pub fn end_of_file < Sink : TokenSink > ( & mut self , tokenizer : & mut XmlTokenizer < Sink > ) {
378
+ pub fn end_of_file < Sink : TokenSink > ( & mut self , tokenizer : & mut XmlTokenizer < Sink > , input : & mut BufferQueue ) {
373
379
while self . result . is_none ( ) {
374
380
match self . state {
375
381
Begin => drop ( self . finish_none ( ) ) ,
376
382
377
383
Numeric ( _) if !self . seen_digit
378
- => drop ( self . unconsume_numeric ( tokenizer) ) ,
384
+ => drop ( self . unconsume_numeric ( tokenizer, input ) ) ,
379
385
380
386
Numeric ( _) | NumericSemicolon => {
381
387
tokenizer. emit_error ( Borrowed ( "EOF in numeric character reference" ) ) ;
382
388
self . finish_numeric ( tokenizer) ;
383
389
}
384
390
385
- Named => drop ( self . finish_named ( tokenizer, None ) ) ,
391
+ Named => drop ( self . finish_named ( tokenizer, None , input ) ) ,
386
392
387
393
BogusName => {
388
- self . unconsume_name ( tokenizer) ;
394
+ self . unconsume_name ( tokenizer, input ) ;
389
395
self . finish_none ( ) ;
390
396
}
391
397
392
398
Octothorpe => {
393
- tokenizer. unconsume ( StrTendril :: from_slice ( "#" ) ) ;
399
+ tokenizer. unconsume ( input , StrTendril :: from_slice ( "#" ) ) ;
394
400
tokenizer. emit_error ( Borrowed ( "EOF after '#' in character reference" ) ) ;
395
401
self . finish_none ( ) ;
396
402
}
0 commit comments