@@ -5,27 +5,58 @@ use super::{is_name_continue, is_name_start, lexer_config::LexerConfig, token_da
5
5
pub struct LuaLexer < ' a > {
6
6
reader : Reader < ' a > ,
7
7
lexer_config : LexerConfig ,
8
- errors : & ' a mut Vec < LuaParseError > ,
8
+ errors : Option < & ' a mut Vec < LuaParseError > > ,
9
+ state : LuaLexerState ,
9
10
}
10
11
11
- impl LuaLexer < ' _ > {
12
- pub fn new < ' a > (
13
- text : & ' a str ,
12
+ /// This enum allows preserving lexer state between reader resets. This is used
13
+ /// when lexer doesn't see the whole input source, and only sees a reader
14
+ /// for each individual line. It happens when we're lexing
15
+ /// code blocks in comments.
16
+ #[ derive( Debug , Clone , Copy , PartialEq , Eq ) ]
17
+ pub enum LuaLexerState {
18
+ Normal ,
19
+ String ( char ) ,
20
+ LongString ( usize ) ,
21
+ LongComment ( usize ) ,
22
+ }
23
+
24
+ impl < ' a > LuaLexer < ' a > {
25
+ pub fn new (
26
+ reader : Reader < ' a > ,
27
+ lexer_config : LexerConfig ,
28
+ errors : Option < & ' a mut Vec < LuaParseError > > ,
29
+ ) -> Self {
30
+ Self :: new_with_state ( reader, LuaLexerState :: Normal , lexer_config, errors)
31
+ }
32
+
33
+ pub fn new_with_state (
34
+ reader : Reader < ' a > ,
35
+ state : LuaLexerState ,
14
36
lexer_config : LexerConfig ,
15
- errors : & ' a mut Vec < LuaParseError > ,
16
- ) -> LuaLexer < ' a > {
37
+ errors : Option < & ' a mut Vec < LuaParseError > > ,
38
+ ) -> Self {
17
39
LuaLexer {
18
- reader : Reader :: new ( text ) ,
40
+ reader,
19
41
lexer_config,
20
42
errors,
43
+ state,
21
44
}
22
45
}
23
46
24
47
pub fn tokenize ( & mut self ) -> Vec < LuaTokenData > {
25
48
let mut tokens = vec ! [ ] ;
26
49
27
50
while !self . reader . is_eof ( ) {
28
- let kind = self . lex ( ) ;
51
+ let kind = match self . state {
52
+ LuaLexerState :: Normal => self . lex ( ) ,
53
+ LuaLexerState :: String ( quote) => self . lex_string ( quote) ,
54
+ LuaLexerState :: LongString ( sep) => self . lex_long_string ( sep) ,
55
+ LuaLexerState :: LongComment ( sep) => {
56
+ self . lex_long_string ( sep) ;
57
+ LuaTokenKind :: TkLongComment
58
+ }
59
+ } ;
29
60
if kind == LuaTokenKind :: TkEof {
30
61
break ;
31
62
}
@@ -36,6 +67,16 @@ impl LuaLexer<'_> {
36
67
tokens
37
68
}
38
69
70
+ pub fn get_state ( & self ) -> LuaLexerState {
71
+ self . state
72
+ }
73
+
74
+ pub fn continue_with_new_reader ( & mut self , reader : Reader < ' a > ) -> Vec < LuaTokenData > {
75
+ assert ! ( self . reader. is_eof( ) , "previous reader wasn't exhausted" ) ;
76
+ self . reader = reader;
77
+ self . tokenize ( )
78
+ }
79
+
39
80
fn support_non_std_symbol ( & self , symbol : LuaNonStdSymbol ) -> bool {
40
81
self . lexer_config . non_std_symbols . support ( symbol)
41
82
}
@@ -105,6 +146,7 @@ impl LuaLexer<'_> {
105
146
let sep = self . skip_sep ( ) ;
106
147
if self . reader . current_char ( ) == '[' {
107
148
self . reader . bump ( ) ;
149
+ self . state = LuaLexerState :: LongComment ( sep) ;
108
150
self . lex_long_string ( sep) ;
109
151
return LuaTokenKind :: TkLongComment ;
110
152
}
@@ -120,14 +162,12 @@ impl LuaLexer<'_> {
120
162
return LuaTokenKind :: TkLeftBracket ;
121
163
}
122
164
if self . reader . current_char ( ) != '[' {
123
- self . errors . push ( LuaParseError :: syntax_error_from (
124
- & t ! ( "invalid long string delimiter" ) ,
125
- self . reader . current_range ( ) ,
126
- ) ) ;
165
+ self . error ( || t ! ( "invalid long string delimiter" ) ) ;
127
166
return LuaTokenKind :: TkLongString ;
128
167
}
129
168
130
169
self . reader . bump ( ) ;
170
+ self . state = LuaLexerState :: LongString ( sep) ;
131
171
self . lex_long_string ( sep)
132
172
}
133
173
'=' => {
@@ -147,10 +187,7 @@ impl LuaLexer<'_> {
147
187
}
148
188
'<' => {
149
189
if !self . lexer_config . support_integer_operation ( ) {
150
- self . errors . push ( LuaParseError :: syntax_error_from (
151
- & t ! ( "bitwise operation is not supported" ) ,
152
- self . reader . current_range ( ) ,
153
- ) ) ;
190
+ self . error ( || t ! ( "bitwise operation is not supported" ) ) ;
154
191
}
155
192
156
193
self . reader . bump ( ) ;
@@ -174,10 +211,7 @@ impl LuaLexer<'_> {
174
211
}
175
212
'>' => {
176
213
if !self . lexer_config . support_integer_operation ( ) {
177
- self . errors . push ( LuaParseError :: syntax_error_from (
178
- & t ! ( "bitwise operation is not supported" ) ,
179
- self . reader . current_range ( ) ,
180
- ) ) ;
214
+ self . error ( || t ! ( "bitwise operation is not supported" ) ) ;
181
215
}
182
216
183
217
self . reader . bump ( ) ;
@@ -196,10 +230,7 @@ impl LuaLexer<'_> {
196
230
self . reader . bump ( ) ;
197
231
if self . reader . current_char ( ) != '=' {
198
232
if !self . lexer_config . support_integer_operation ( ) {
199
- self . errors . push ( LuaParseError :: syntax_error_from (
200
- & t ! ( "bitwise operation is not supported" ) ,
201
- self . reader . current_range ( ) ,
202
- ) ) ;
233
+ self . error ( || t ! ( "bitwise operation is not supported" ) ) ;
203
234
}
204
235
return LuaTokenKind :: TkBitXor ;
205
236
}
@@ -222,43 +253,8 @@ impl LuaLexer<'_> {
222
253
}
223
254
224
255
self . reader . bump ( ) ;
225
- while !self . reader . is_eof ( ) {
226
- let ch = self . reader . current_char ( ) ;
227
- if ch == quote || ch == '\n' || ch == '\r' {
228
- break ;
229
- }
230
-
231
- if ch != '\\' {
232
- self . reader . bump ( ) ;
233
- continue ;
234
- }
235
-
236
- self . reader . bump ( ) ;
237
- match self . reader . current_char ( ) {
238
- 'z' => {
239
- self . reader . bump ( ) ;
240
- self . reader
241
- . eat_while ( |c| c == ' ' || c == '\t' || c == '\r' || c == '\n' ) ;
242
- }
243
- '\r' | '\n' => {
244
- self . lex_new_line ( ) ;
245
- }
246
- _ => {
247
- self . reader . bump ( ) ;
248
- }
249
- }
250
- }
251
-
252
- if self . reader . current_char ( ) != quote {
253
- self . errors . push ( LuaParseError :: syntax_error_from (
254
- & t ! ( "unfinished string" ) ,
255
- self . reader . current_range ( ) ,
256
- ) ) ;
257
- return LuaTokenKind :: TkString ;
258
- }
259
-
260
- self . reader . bump ( ) ;
261
- LuaTokenKind :: TkString
256
+ self . state = LuaLexerState :: String ( quote) ;
257
+ self . lex_string ( quote)
262
258
}
263
259
'.' => {
264
260
if self . reader . next_char ( ) . is_ascii_digit ( ) {
@@ -295,10 +291,7 @@ impl LuaLexer<'_> {
295
291
}
296
292
}
297
293
_ if self . reader . is_eof ( ) => {
298
- self . errors . push ( LuaParseError :: syntax_error_from (
299
- & t ! ( "unfinished long comment" ) ,
300
- self . reader . current_range ( ) ,
301
- ) ) ;
294
+ self . error ( || t ! ( "unfinished long comment" ) ) ;
302
295
return LuaTokenKind :: TkLongComment ;
303
296
}
304
297
_ => {
@@ -321,10 +314,7 @@ impl LuaLexer<'_> {
321
314
}
322
315
_ => {
323
316
if !self . lexer_config . support_integer_operation ( ) {
324
- self . errors . push ( LuaParseError :: syntax_error_from (
325
- & t ! ( "integer division is not supported" ) ,
326
- self . reader . current_range ( ) ,
327
- ) ) ;
317
+ self . error ( || t ! ( "integer division is not supported" ) ) ;
328
318
}
329
319
330
320
self . reader . bump ( ) ;
@@ -403,10 +393,7 @@ impl LuaLexer<'_> {
403
393
}
404
394
'&' => {
405
395
if !self . lexer_config . support_integer_operation ( ) {
406
- self . errors . push ( LuaParseError :: syntax_error_from (
407
- & t ! ( "bitwise operation is not supported" ) ,
408
- self . reader . current_range ( ) ,
409
- ) ) ;
396
+ self . error ( || t ! ( "bitwise operation is not supported" ) ) ;
410
397
}
411
398
412
399
self . reader . bump ( ) ;
@@ -426,10 +413,7 @@ impl LuaLexer<'_> {
426
413
}
427
414
'|' => {
428
415
if !self . lexer_config . support_integer_operation ( ) {
429
- self . errors . push ( LuaParseError :: syntax_error_from (
430
- & t ! ( "bitwise operation is not supported" ) ,
431
- self . reader . current_range ( ) ,
432
- ) ) ;
416
+ self . error ( || t ! ( "bitwise operation is not supported" ) ) ;
433
417
}
434
418
435
419
self . reader . bump ( ) ;
@@ -524,6 +508,47 @@ impl LuaLexer<'_> {
524
508
self . reader . eat_when ( '=' )
525
509
}
526
510
511
+ fn lex_string ( & mut self , quote : char ) -> LuaTokenKind {
512
+ while !self . reader . is_eof ( ) {
513
+ let ch = self . reader . current_char ( ) ;
514
+ if ch == quote || ch == '\n' || ch == '\r' {
515
+ break ;
516
+ }
517
+
518
+ if ch != '\\' {
519
+ self . reader . bump ( ) ;
520
+ continue ;
521
+ }
522
+
523
+ self . reader . bump ( ) ;
524
+ match self . reader . current_char ( ) {
525
+ 'z' => {
526
+ self . reader . bump ( ) ;
527
+ self . reader
528
+ . eat_while ( |c| c == ' ' || c == '\t' || c == '\r' || c == '\n' ) ;
529
+ }
530
+ '\r' | '\n' => {
531
+ self . lex_new_line ( ) ;
532
+ }
533
+ _ => {
534
+ self . reader . bump ( ) ;
535
+ }
536
+ }
537
+ }
538
+
539
+ if self . reader . current_char ( ) == quote || !self . reader . is_eof ( ) {
540
+ self . state = LuaLexerState :: Normal ;
541
+ }
542
+
543
+ if self . reader . current_char ( ) != quote {
544
+ self . error ( || t ! ( "unfinished string" ) ) ;
545
+ return LuaTokenKind :: TkString ;
546
+ }
547
+
548
+ self . reader . bump ( ) ;
549
+ LuaTokenKind :: TkString
550
+ }
551
+
527
552
fn lex_long_string ( & mut self , sep : usize ) -> LuaTokenKind {
528
553
let mut end = false ;
529
554
while !self . reader . is_eof ( ) {
@@ -543,11 +568,12 @@ impl LuaLexer<'_> {
543
568
}
544
569
}
545
570
571
+ if end || !self . reader . is_eof ( ) {
572
+ self . state = LuaLexerState :: Normal ;
573
+ }
574
+
546
575
if !end {
547
- self . errors . push ( LuaParseError :: syntax_error_from (
548
- & t ! ( "unfinished long string or comment" ) ,
549
- self . reader . current_range ( ) ,
550
- ) ) ;
576
+ self . error ( || t ! ( "unfinished long string or comment" ) ) ;
551
577
}
552
578
553
579
LuaTokenKind :: TkLongString
@@ -666,18 +692,26 @@ impl LuaLexer<'_> {
666
692
}
667
693
668
694
if self . reader . current_char ( ) . is_alphabetic ( ) {
669
- self . errors . push ( LuaParseError :: syntax_error_from (
670
- & format ! (
671
- "unexpected character '{}' after number literal" ,
672
- self . reader. current_char( )
673
- ) ,
674
- self . reader . current_range ( ) ,
675
- ) ) ;
695
+ let ch = self . reader . current_char ( ) ;
696
+ self . error ( || format ! ( "unexpected character '{ch}' after number literal" ) ) ;
676
697
}
677
698
678
699
match state {
679
700
NumberState :: Int | NumberState :: Hex => LuaTokenKind :: TkInt ,
680
701
_ => LuaTokenKind :: TkFloat ,
681
702
}
682
703
}
704
+
705
+ fn error < F , R > ( & mut self , msg : F )
706
+ where
707
+ F : FnOnce ( ) -> R ,
708
+ R : AsRef < str > ,
709
+ {
710
+ if let Some ( errors) = & mut self . errors {
711
+ errors. push ( LuaParseError :: syntax_error_from (
712
+ msg ( ) . as_ref ( ) ,
713
+ self . reader . current_range ( ) ,
714
+ ) )
715
+ }
716
+ }
683
717
}
0 commit comments