7
7
// option. This file may not be copied, modified, or distributed
8
8
// except according to those terms.
9
9
10
- use super :: codegen:: { resolve_unique_hash_value, Node , DAFSA_NODES } ;
10
+ mod codegen;
11
+
12
+ use codegen:: { resolve_unique_hash_value, Node , DAFSA_NODES } ;
11
13
use super :: { CharRef , Status } ;
12
- use crate :: tokenizer:: TokenSink ;
13
- use crate :: tokenizer:: Tokenizer ;
14
- use markup5ever:: buffer_queue:: BufferQueue ;
15
- use markup5ever:: tendril:: StrTendril ;
14
+ use crate :: buffer_queue:: BufferQueue ;
15
+ use crate :: tendril:: StrTendril ;
16
16
17
17
use std:: borrow:: Cow ;
18
18
use std:: mem;
19
19
20
+ type EmitErrorFn = Fn ( & str ) ;
21
+
20
22
#[ derive( Clone , Debug ) ]
21
23
pub ( crate ) struct Match {
22
24
hash_value : usize ,
23
25
matched_text : StrTendril ,
24
26
}
25
27
28
+ impl CharRef {
29
+ pub const EMPTY : CharRef = CharRef {
30
+ chars : [ '\0' , '\0' ] ,
31
+ num_chars : 0 ,
32
+ } ;
33
+ }
34
+
26
35
#[ derive( Clone , Debug ) ]
27
36
pub ( crate ) struct NamedReferenceTokenizerState {
28
37
current_node : & ' static Node ,
@@ -62,15 +71,15 @@ impl NamedReferenceTokenizerState {
62
71
}
63
72
}
64
73
65
- fn feed_character < Sink : TokenSink > (
74
+ fn feed_character (
66
75
& mut self ,
67
76
c : char ,
68
- tokenizer : & Tokenizer < Sink > ,
77
+ error_callback : EmitErrorFn ,
69
78
input : & BufferQueue ,
70
79
) -> NamedReferenceTokenizationResult {
71
80
self . name_buffer . push_char ( c) ;
72
81
if !c. is_ascii_alphanumeric ( ) && c != ';' {
73
- return self . did_find_invalid_character ( tokenizer , input) ;
82
+ return self . did_find_invalid_character ( error_callback , input) ;
74
83
}
75
84
76
85
let code_point = c as u32 as u8 ;
@@ -85,7 +94,7 @@ impl NamedReferenceTokenizerState {
85
94
}
86
95
87
96
let Some ( next_node) = next_node else {
88
- return self . did_find_invalid_character ( tokenizer , input) ;
97
+ return self . did_find_invalid_character ( error_callback , input) ;
89
98
} ;
90
99
91
100
self . current_node = next_node;
@@ -101,24 +110,24 @@ impl NamedReferenceTokenizerState {
101
110
NamedReferenceTokenizationResult :: Continue
102
111
}
103
112
104
- fn did_find_invalid_character < Sink : TokenSink > (
113
+ fn did_find_invalid_character (
105
114
& mut self ,
106
- tokenizer : & Tokenizer < Sink > ,
115
+ error_callback : EmitErrorFn ,
107
116
input : & BufferQueue ,
108
117
) -> NamedReferenceTokenizationResult {
109
118
if let Some ( last_match) = self . last_match . take ( ) {
110
119
input. push_front ( self . name_buffer . clone ( ) ) ;
111
120
return NamedReferenceTokenizationResult :: Success {
112
- reference : self . finish_matching_reference ( last_match, tokenizer , input) ,
121
+ reference : self . finish_matching_reference ( last_match, error_callback , input) ,
113
122
} ;
114
123
}
115
124
116
125
NamedReferenceTokenizationResult :: Failed
117
126
}
118
127
119
- pub ( crate ) fn step < Sink : TokenSink > (
128
+ pub ( crate ) fn step (
120
129
& mut self ,
121
- tokenizer : & Tokenizer < Sink > ,
130
+ error_callback : EmitErrorFn ,
122
131
input : & BufferQueue ,
123
132
) -> Result < Status , StrTendril > {
124
133
loop {
@@ -139,18 +148,18 @@ impl NamedReferenceTokenizerState {
139
148
}
140
149
}
141
150
142
- pub ( crate ) fn notify_end_of_file < Sink : TokenSink > (
151
+ pub ( crate ) fn notify_end_of_file (
143
152
& mut self ,
144
- tokenizer : & Tokenizer < Sink > ,
153
+ error_callback : EmitErrorFn ,
145
154
input : & BufferQueue ,
146
155
) -> Option < CharRef > {
147
156
input. push_front ( self . name_buffer . clone ( ) ) ;
148
157
if let Some ( last_match) = self . last_match . take ( ) {
149
- Some ( self . finish_matching_reference ( last_match, tokenizer , input) )
158
+ Some ( self . finish_matching_reference ( last_match, error_callback , input) )
150
159
} else {
151
160
if self . name_buffer . ends_with ( ';' ) {
152
161
println ! ( "end of file and last is semicolon" ) ;
153
- emit_name_error ( mem:: take ( & mut self . name_buffer ) , tokenizer ) ;
162
+ emit_name_error ( mem:: take ( & mut self . name_buffer ) , error_callback ) ;
154
163
}
155
164
None
156
165
}
@@ -159,10 +168,10 @@ impl NamedReferenceTokenizerState {
159
168
/// Called whenever the tokenizer has finished matching a named reference.
160
169
///
161
170
/// This method takes care of emitting appropriate errors and implement some legacy quirks.
162
- pub ( crate ) fn finish_matching_reference < Sink : TokenSink > (
171
+ pub ( crate ) fn finish_matching_reference (
163
172
& self ,
164
173
matched : Match ,
165
- tokenizer : & Tokenizer < Sink > ,
174
+ error_callback : EmitErrorFn ,
166
175
input : & BufferQueue ,
167
176
) -> CharRef {
168
177
let char_ref = resolve_unique_hash_value ( matched. hash_value ) ;
@@ -190,19 +199,17 @@ impl NamedReferenceTokenizerState {
190
199
// (;), then this is a missing-semicolon-after-character-reference parse
191
200
// error.
192
201
if last_matched_codepoint != ';' {
193
- tokenizer. emit_error ( Cow :: Borrowed (
194
- "Character reference does not end with semicolon" ,
195
- ) ) ;
202
+ error_callback ( "Character reference does not end with semicolon" ) ;
196
203
}
197
204
char_ref
198
205
}
199
206
}
200
207
201
- pub ( crate ) fn emit_name_error < Sink : TokenSink > ( name : StrTendril , tokenizer : & Tokenizer < Sink > ) {
202
- let msg = if tokenizer. opts . exact_errors {
203
- Cow :: from ( format ! ( "Invalid character reference &{}" , name) )
204
- } else {
205
- Cow :: from ( "Invalid character reference" )
206
- } ;
207
- tokenizer. emit_error ( msg) ;
208
- }
208
+ // pub(crate) fn emit_name_error(name: StrTendril, tokenizer: &Tokenizer<Sink>) {
209
+ // let msg = if tokenizer.opts.exact_errors {
210
+ // Cow::from(format!("Invalid character reference &{}", name))
211
+ // } else {
212
+ // Cow::from("Invalid character reference")
213
+ // };
214
+ // tokenizer.emit_error(msg);
215
+ // }
0 commit comments