@@ -28,7 +28,7 @@ pub(super) struct CharRef {
28
28
pub ( super ) enum Status {
29
29
Stuck ,
30
30
Progress ,
31
- Done ,
31
+ Done ( CharRef ) ,
32
32
}
33
33
34
34
#[ derive( Debug ) ]
@@ -43,7 +43,6 @@ enum State {
43
43
44
44
pub ( super ) struct CharRefTokenizer {
45
45
state : State ,
46
- result : Option < CharRef > ,
47
46
is_consumed_in_attribute : bool ,
48
47
49
48
num : u32 ,
@@ -56,12 +55,18 @@ pub(super) struct CharRefTokenizer {
56
55
name_len : usize ,
57
56
}
58
57
58
+ impl CharRef {
59
+ const EMPTY : CharRef = CharRef {
60
+ chars : [ '\0' , '\0' ] ,
61
+ num_chars : 0 ,
62
+ } ;
63
+ }
64
+
59
65
impl CharRefTokenizer {
60
66
pub ( super ) fn new ( is_consumed_in_attribute : bool ) -> CharRefTokenizer {
61
67
CharRefTokenizer {
62
68
is_consumed_in_attribute,
63
69
state : State :: Begin ,
64
- result : None ,
65
70
num : 0 ,
66
71
num_too_big : false ,
67
72
seen_digit : false ,
@@ -72,12 +77,6 @@ impl CharRefTokenizer {
72
77
}
73
78
}
74
79
75
- // A CharRefTokenizer can only tokenize one character reference,
76
- // so this method consumes the tokenizer.
77
- pub ( super ) fn get_result ( self ) -> CharRef {
78
- self . result . expect ( "get_result called before done" )
79
- }
80
-
81
80
fn name_buf ( & self ) -> & StrTendril {
82
81
self . name_buf_opt
83
82
. as_ref ( )
@@ -90,20 +89,11 @@ impl CharRefTokenizer {
90
89
. expect ( "name_buf missing in named character reference" )
91
90
}
92
91
93
- fn finish_none ( & mut self ) -> Status {
94
- self . result = Some ( CharRef {
95
- chars : [ '\0' , '\0' ] ,
96
- num_chars : 0 ,
97
- } ) ;
98
- Status :: Done
99
- }
100
-
101
92
fn finish_one ( & mut self , c : char ) -> Status {
102
- self . result = Some ( CharRef {
93
+ Status :: Done ( CharRef {
103
94
chars : [ c, '\0' ] ,
104
95
num_chars : 1 ,
105
- } ) ;
106
- Status :: Done
96
+ } )
107
97
}
108
98
}
109
99
@@ -113,10 +103,6 @@ impl CharRefTokenizer {
113
103
tokenizer : & Tokenizer < Sink > ,
114
104
input : & BufferQueue ,
115
105
) -> Status {
116
- if self . result . is_some ( ) {
117
- return Status :: Done ;
118
- }
119
-
120
106
debug ! ( "char ref tokenizer stepping in state {:?}" , self . state) ;
121
107
match self . state {
122
108
State :: Begin => self . do_begin ( tokenizer, input) ,
@@ -144,7 +130,7 @@ impl CharRefTokenizer {
144
130
self . state = State :: Octothorpe ;
145
131
Status :: Progress
146
132
} ,
147
- Some ( _) => self . finish_none ( ) ,
133
+ Some ( _) => Status :: Done ( CharRef :: EMPTY ) ,
148
134
None => Status :: Stuck ,
149
135
}
150
136
}
@@ -228,7 +214,7 @@ impl CharRefTokenizer {
228
214
229
215
input. push_front ( unconsume) ;
230
216
tokenizer. emit_error ( Borrowed ( "Numeric character reference without digits" ) ) ;
231
- self . finish_none ( )
217
+ Status :: Done ( CharRef :: EMPTY )
232
218
}
233
219
234
220
fn finish_numeric < Sink : TokenSink > ( & mut self , tokenizer : & Tokenizer < Sink > ) -> Status {
@@ -331,7 +317,7 @@ impl CharRefTokenizer {
331
317
_ => ( ) ,
332
318
}
333
319
self . unconsume_name ( input) ;
334
- self . finish_none ( )
320
+ Status :: Done ( CharRef :: EMPTY )
335
321
} ,
336
322
337
323
Some ( ( c1, c2) ) => {
@@ -379,15 +365,14 @@ impl CharRefTokenizer {
379
365
380
366
if unconsume_all {
381
367
self . unconsume_name ( input) ;
382
- self . finish_none ( )
368
+ Status :: Done ( CharRef :: EMPTY )
383
369
} else {
384
370
input. push_front ( StrTendril :: from_slice ( & self . name_buf ( ) [ name_len..] ) ) ;
385
371
tokenizer. ignore_lf . set ( false ) ;
386
- self . result = Some ( CharRef {
372
+ Status :: Done ( CharRef {
387
373
chars : [ from_u32 ( c1) . unwrap ( ) , from_u32 ( c2) . unwrap ( ) ] ,
388
374
num_chars : if c2 == 0 { 1 } else { 2 } ,
389
- } ) ;
390
- Status :: Done
375
+ } )
391
376
}
392
377
} ,
393
378
}
@@ -411,34 +396,42 @@ impl CharRefTokenizer {
411
396
_ => ( ) ,
412
397
}
413
398
self . unconsume_name ( input) ;
414
- self . finish_none ( )
399
+ Status :: Done ( CharRef :: EMPTY )
415
400
}
416
401
417
402
pub ( super ) fn end_of_file < Sink : TokenSink > (
418
403
& mut self ,
419
404
tokenizer : & Tokenizer < Sink > ,
420
405
input : & BufferQueue ,
421
- ) {
422
- while self . result . is_none ( ) {
423
- match self . state {
424
- State :: Begin => drop ( self . finish_none ( ) ) ,
425
- State :: Numeric ( _) if !self . seen_digit => {
426
- self . unconsume_numeric ( tokenizer, input) ;
427
- } ,
406
+ ) -> CharRef {
407
+ loop {
408
+ let status = match self . state {
409
+ State :: Begin => Status :: Done ( CharRef :: EMPTY ) ,
410
+ State :: Numeric ( _) if !self . seen_digit => self . unconsume_numeric ( tokenizer, input) ,
428
411
State :: Numeric ( _) | State :: NumericSemicolon => {
429
412
tokenizer. emit_error ( Borrowed ( "EOF in numeric character reference" ) ) ;
430
- self . finish_numeric ( tokenizer) ;
413
+ self . finish_numeric ( tokenizer)
431
414
} ,
432
- State :: Named => drop ( self . finish_named ( tokenizer, input, None ) ) ,
415
+ State :: Named => self . finish_named ( tokenizer, input, None ) ,
433
416
State :: BogusName => {
434
417
self . unconsume_name ( input) ;
435
- self . finish_none ( ) ;
418
+ Status :: Done ( CharRef :: EMPTY )
436
419
} ,
437
420
State :: Octothorpe => {
438
421
input. push_front ( StrTendril :: from_slice ( "#" ) ) ;
439
422
tokenizer. emit_error ( Borrowed ( "EOF after '#' in character reference" ) ) ;
440
- self . finish_none ( ) ;
423
+ Status :: Done ( CharRef :: EMPTY )
424
+ } ,
425
+ } ;
426
+
427
+ match status {
428
+ Status :: Done ( char_ref) => {
429
+ return char_ref;
430
+ } ,
431
+ Status :: Stuck => {
432
+ return CharRef :: EMPTY ;
441
433
} ,
434
+ Status :: Progress => { } ,
442
435
}
443
436
}
444
437
}
0 commit comments