@@ -5,27 +5,58 @@ use super::{is_name_continue, is_name_start, lexer_config::LexerConfig, token_da
55pub struct LuaLexer < ' a > {
66 reader : Reader < ' a > ,
77 lexer_config : LexerConfig ,
8- errors : & ' a mut Vec < LuaParseError > ,
8+ errors : Option < & ' a mut Vec < LuaParseError > > ,
9+ state : LuaLexerState ,
910}
1011
11- impl LuaLexer < ' _ > {
12- pub fn new < ' a > (
13- text : & ' a str ,
12+ /// This enum allows preserving lexer state between reader resets. This is used
13+ /// when lexer doesn't see the whole input source, and only sees a reader
14+ /// for each individual line. It happens when we're lexing
15+ /// code blocks in comments.
16+ #[ derive( Debug , Clone , Copy , PartialEq , Eq ) ]
17+ pub enum LuaLexerState {
18+ Normal ,
19+ String ( char ) ,
20+ LongString ( usize ) ,
21+ LongComment ( usize ) ,
22+ }
23+
24+ impl < ' a > LuaLexer < ' a > {
25+ pub fn new (
26+ reader : Reader < ' a > ,
27+ lexer_config : LexerConfig ,
28+ errors : Option < & ' a mut Vec < LuaParseError > > ,
29+ ) -> Self {
30+ Self :: new_with_state ( reader, LuaLexerState :: Normal , lexer_config, errors)
31+ }
32+
33+ pub fn new_with_state (
34+ reader : Reader < ' a > ,
35+ state : LuaLexerState ,
1436 lexer_config : LexerConfig ,
15- errors : & ' a mut Vec < LuaParseError > ,
16- ) -> LuaLexer < ' a > {
37+ errors : Option < & ' a mut Vec < LuaParseError > > ,
38+ ) -> Self {
1739 LuaLexer {
18- reader : Reader :: new ( text ) ,
40+ reader,
1941 lexer_config,
2042 errors,
43+ state,
2144 }
2245 }
2346
2447 pub fn tokenize ( & mut self ) -> Vec < LuaTokenData > {
2548 let mut tokens = vec ! [ ] ;
2649
2750 while !self . reader . is_eof ( ) {
28- let kind = self . lex ( ) ;
51+ let kind = match self . state {
52+ LuaLexerState :: Normal => self . lex ( ) ,
53+ LuaLexerState :: String ( quote) => self . lex_string ( quote) ,
54+ LuaLexerState :: LongString ( sep) => self . lex_long_string ( sep) ,
55+ LuaLexerState :: LongComment ( sep) => {
56+ self . lex_long_string ( sep) ;
57+ LuaTokenKind :: TkLongComment
58+ }
59+ } ;
2960 if kind == LuaTokenKind :: TkEof {
3061 break ;
3162 }
@@ -36,6 +67,16 @@ impl LuaLexer<'_> {
3667 tokens
3768 }
3869
70+ pub fn get_state ( & self ) -> LuaLexerState {
71+ self . state
72+ }
73+
74+ pub fn continue_with_new_reader ( & mut self , reader : Reader < ' a > ) -> Vec < LuaTokenData > {
75+ assert ! ( self . reader. is_eof( ) , "previous reader wasn't exhausted" ) ;
76+ self . reader = reader;
77+ self . tokenize ( )
78+ }
79+
3980 fn support_non_std_symbol ( & self , symbol : LuaNonStdSymbol ) -> bool {
4081 self . lexer_config . non_std_symbols . support ( symbol)
4182 }
@@ -105,6 +146,7 @@ impl LuaLexer<'_> {
105146 let sep = self . skip_sep ( ) ;
106147 if self . reader . current_char ( ) == '[' {
107148 self . reader . bump ( ) ;
149+ self . state = LuaLexerState :: LongComment ( sep) ;
108150 self . lex_long_string ( sep) ;
109151 return LuaTokenKind :: TkLongComment ;
110152 }
@@ -120,14 +162,12 @@ impl LuaLexer<'_> {
120162 return LuaTokenKind :: TkLeftBracket ;
121163 }
122164 if self . reader . current_char ( ) != '[' {
123- self . errors . push ( LuaParseError :: syntax_error_from (
124- & t ! ( "invalid long string delimiter" ) ,
125- self . reader . current_range ( ) ,
126- ) ) ;
165+ self . error ( || t ! ( "invalid long string delimiter" ) ) ;
127166 return LuaTokenKind :: TkLongString ;
128167 }
129168
130169 self . reader . bump ( ) ;
170+ self . state = LuaLexerState :: LongString ( sep) ;
131171 self . lex_long_string ( sep)
132172 }
133173 '=' => {
@@ -147,10 +187,7 @@ impl LuaLexer<'_> {
147187 }
148188 '<' => {
149189 if !self . lexer_config . support_integer_operation ( ) {
150- self . errors . push ( LuaParseError :: syntax_error_from (
151- & t ! ( "bitwise operation is not supported" ) ,
152- self . reader . current_range ( ) ,
153- ) ) ;
190+ self . error ( || t ! ( "bitwise operation is not supported" ) ) ;
154191 }
155192
156193 self . reader . bump ( ) ;
@@ -174,10 +211,7 @@ impl LuaLexer<'_> {
174211 }
175212 '>' => {
176213 if !self . lexer_config . support_integer_operation ( ) {
177- self . errors . push ( LuaParseError :: syntax_error_from (
178- & t ! ( "bitwise operation is not supported" ) ,
179- self . reader . current_range ( ) ,
180- ) ) ;
214+ self . error ( || t ! ( "bitwise operation is not supported" ) ) ;
181215 }
182216
183217 self . reader . bump ( ) ;
@@ -196,10 +230,7 @@ impl LuaLexer<'_> {
196230 self . reader . bump ( ) ;
197231 if self . reader . current_char ( ) != '=' {
198232 if !self . lexer_config . support_integer_operation ( ) {
199- self . errors . push ( LuaParseError :: syntax_error_from (
200- & t ! ( "bitwise operation is not supported" ) ,
201- self . reader . current_range ( ) ,
202- ) ) ;
233+ self . error ( || t ! ( "bitwise operation is not supported" ) ) ;
203234 }
204235 return LuaTokenKind :: TkBitXor ;
205236 }
@@ -222,43 +253,8 @@ impl LuaLexer<'_> {
222253 }
223254
224255 self . reader . bump ( ) ;
225- while !self . reader . is_eof ( ) {
226- let ch = self . reader . current_char ( ) ;
227- if ch == quote || ch == '\n' || ch == '\r' {
228- break ;
229- }
230-
231- if ch != '\\' {
232- self . reader . bump ( ) ;
233- continue ;
234- }
235-
236- self . reader . bump ( ) ;
237- match self . reader . current_char ( ) {
238- 'z' => {
239- self . reader . bump ( ) ;
240- self . reader
241- . eat_while ( |c| c == ' ' || c == '\t' || c == '\r' || c == '\n' ) ;
242- }
243- '\r' | '\n' => {
244- self . lex_new_line ( ) ;
245- }
246- _ => {
247- self . reader . bump ( ) ;
248- }
249- }
250- }
251-
252- if self . reader . current_char ( ) != quote {
253- self . errors . push ( LuaParseError :: syntax_error_from (
254- & t ! ( "unfinished string" ) ,
255- self . reader . current_range ( ) ,
256- ) ) ;
257- return LuaTokenKind :: TkString ;
258- }
259-
260- self . reader . bump ( ) ;
261- LuaTokenKind :: TkString
256+ self . state = LuaLexerState :: String ( quote) ;
257+ self . lex_string ( quote)
262258 }
263259 '.' => {
264260 if self . reader . next_char ( ) . is_ascii_digit ( ) {
@@ -295,10 +291,7 @@ impl LuaLexer<'_> {
295291 }
296292 }
297293 _ if self . reader . is_eof ( ) => {
298- self . errors . push ( LuaParseError :: syntax_error_from (
299- & t ! ( "unfinished long comment" ) ,
300- self . reader . current_range ( ) ,
301- ) ) ;
294+ self . error ( || t ! ( "unfinished long comment" ) ) ;
302295 return LuaTokenKind :: TkLongComment ;
303296 }
304297 _ => {
@@ -321,10 +314,7 @@ impl LuaLexer<'_> {
321314 }
322315 _ => {
323316 if !self . lexer_config . support_integer_operation ( ) {
324- self . errors . push ( LuaParseError :: syntax_error_from (
325- & t ! ( "integer division is not supported" ) ,
326- self . reader . current_range ( ) ,
327- ) ) ;
317+ self . error ( || t ! ( "integer division is not supported" ) ) ;
328318 }
329319
330320 self . reader . bump ( ) ;
@@ -403,10 +393,7 @@ impl LuaLexer<'_> {
403393 }
404394 '&' => {
405395 if !self . lexer_config . support_integer_operation ( ) {
406- self . errors . push ( LuaParseError :: syntax_error_from (
407- & t ! ( "bitwise operation is not supported" ) ,
408- self . reader . current_range ( ) ,
409- ) ) ;
396+ self . error ( || t ! ( "bitwise operation is not supported" ) ) ;
410397 }
411398
412399 self . reader . bump ( ) ;
@@ -426,10 +413,7 @@ impl LuaLexer<'_> {
426413 }
427414 '|' => {
428415 if !self . lexer_config . support_integer_operation ( ) {
429- self . errors . push ( LuaParseError :: syntax_error_from (
430- & t ! ( "bitwise operation is not supported" ) ,
431- self . reader . current_range ( ) ,
432- ) ) ;
416+ self . error ( || t ! ( "bitwise operation is not supported" ) ) ;
433417 }
434418
435419 self . reader . bump ( ) ;
@@ -524,6 +508,47 @@ impl LuaLexer<'_> {
524508 self . reader . eat_when ( '=' )
525509 }
526510
511+ fn lex_string ( & mut self , quote : char ) -> LuaTokenKind {
512+ while !self . reader . is_eof ( ) {
513+ let ch = self . reader . current_char ( ) ;
514+ if ch == quote || ch == '\n' || ch == '\r' {
515+ break ;
516+ }
517+
518+ if ch != '\\' {
519+ self . reader . bump ( ) ;
520+ continue ;
521+ }
522+
523+ self . reader . bump ( ) ;
524+ match self . reader . current_char ( ) {
525+ 'z' => {
526+ self . reader . bump ( ) ;
527+ self . reader
528+ . eat_while ( |c| c == ' ' || c == '\t' || c == '\r' || c == '\n' ) ;
529+ }
530+ '\r' | '\n' => {
531+ self . lex_new_line ( ) ;
532+ }
533+ _ => {
534+ self . reader . bump ( ) ;
535+ }
536+ }
537+ }
538+
539+ if self . reader . current_char ( ) == quote || !self . reader . is_eof ( ) {
540+ self . state = LuaLexerState :: Normal ;
541+ }
542+
543+ if self . reader . current_char ( ) != quote {
544+ self . error ( || t ! ( "unfinished string" ) ) ;
545+ return LuaTokenKind :: TkString ;
546+ }
547+
548+ self . reader . bump ( ) ;
549+ LuaTokenKind :: TkString
550+ }
551+
527552 fn lex_long_string ( & mut self , sep : usize ) -> LuaTokenKind {
528553 let mut end = false ;
529554 while !self . reader . is_eof ( ) {
@@ -543,11 +568,12 @@ impl LuaLexer<'_> {
543568 }
544569 }
545570
571+ if end || !self . reader . is_eof ( ) {
572+ self . state = LuaLexerState :: Normal ;
573+ }
574+
546575 if !end {
547- self . errors . push ( LuaParseError :: syntax_error_from (
548- & t ! ( "unfinished long string or comment" ) ,
549- self . reader . current_range ( ) ,
550- ) ) ;
576+ self . error ( || t ! ( "unfinished long string or comment" ) ) ;
551577 }
552578
553579 LuaTokenKind :: TkLongString
@@ -666,18 +692,26 @@ impl LuaLexer<'_> {
666692 }
667693
668694 if self . reader . current_char ( ) . is_alphabetic ( ) {
669- self . errors . push ( LuaParseError :: syntax_error_from (
670- & format ! (
671- "unexpected character '{}' after number literal" ,
672- self . reader. current_char( )
673- ) ,
674- self . reader . current_range ( ) ,
675- ) ) ;
695+ let ch = self . reader . current_char ( ) ;
696+ self . error ( || format ! ( "unexpected character '{ch}' after number literal" ) ) ;
676697 }
677698
678699 match state {
679700 NumberState :: Int | NumberState :: Hex => LuaTokenKind :: TkInt ,
680701 _ => LuaTokenKind :: TkFloat ,
681702 }
682703 }
704+
705+ fn error < F , R > ( & mut self , msg : F )
706+ where
707+ F : FnOnce ( ) -> R ,
708+ R : AsRef < str > ,
709+ {
710+ if let Some ( errors) = & mut self . errors {
711+ errors. push ( LuaParseError :: syntax_error_from (
712+ msg ( ) . as_ref ( ) ,
713+ self . reader . current_range ( ) ,
714+ ) )
715+ }
716+ }
683717}
0 commit comments