5
5
from jmespath .exceptions import LexerError , EmptyExpressionError
6
6
7
7
8
- VALID_NUMBER = set (string .digits )
8
+ START_IDENTIFIER = set (string .ascii_letters + '_' )
9
9
VALID_IDENTIFIER = set (string .ascii_letters + string .digits + '_' )
10
- STATE_IDENTIFIER = 0 ;
11
- STATE_NUMBER = 1 ;
12
- STATE_SINGLE_CHAR = 2 ;
13
- STATE_WHITESPACE = 3 ;
14
- STATE_STRING_LITERAL = 4 ;
15
- STATE_QUOTED_STRING = 5 ;
16
- STATE_JSON_LITERAL = 6 ;
17
- STATE_LBRACKET = 7 ;
18
- STATE_PIPE = 8 ;
19
- STATE_LT = 9 ;
20
- STATE_GT = 10 ;
21
- STATE_EQ = 11 ;
22
- STATE_NOT = 12 ;
23
- TRANSITION_TABLE = {
24
- '<' : STATE_LT ,
25
- '>' : STATE_GT ,
26
- '=' : STATE_EQ ,
27
- '!' : STATE_NOT ,
28
- '[' : STATE_LBRACKET ,
29
- '|' : STATE_PIPE ,
30
- '`' : STATE_JSON_LITERAL ,
31
- '"' : STATE_QUOTED_STRING ,
32
- "'" : STATE_STRING_LITERAL ,
33
- '-' : STATE_NUMBER ,
34
- '0' : STATE_NUMBER ,
35
- '1' : STATE_NUMBER ,
36
- '2' : STATE_NUMBER ,
37
- '3' : STATE_NUMBER ,
38
- '4' : STATE_NUMBER ,
39
- '5' : STATE_NUMBER ,
40
- '6' : STATE_NUMBER ,
41
- '7' : STATE_NUMBER ,
42
- '8' : STATE_NUMBER ,
43
- '9' : STATE_NUMBER ,
44
- '.' : STATE_SINGLE_CHAR ,
45
- '*' : STATE_SINGLE_CHAR ,
46
- ']' : STATE_SINGLE_CHAR ,
47
- ',' : STATE_SINGLE_CHAR ,
48
- ':' : STATE_SINGLE_CHAR ,
49
- '@' : STATE_SINGLE_CHAR ,
50
- '&' : STATE_SINGLE_CHAR ,
51
- '(' : STATE_SINGLE_CHAR ,
52
- ')' : STATE_SINGLE_CHAR ,
53
- '{' : STATE_SINGLE_CHAR ,
54
- '}' : STATE_SINGLE_CHAR ,
55
- '_' : STATE_IDENTIFIER ,
56
- 'A' : STATE_IDENTIFIER ,
57
- 'B' : STATE_IDENTIFIER ,
58
- 'C' : STATE_IDENTIFIER ,
59
- 'D' : STATE_IDENTIFIER ,
60
- 'E' : STATE_IDENTIFIER ,
61
- 'F' : STATE_IDENTIFIER ,
62
- 'G' : STATE_IDENTIFIER ,
63
- 'H' : STATE_IDENTIFIER ,
64
- 'I' : STATE_IDENTIFIER ,
65
- 'J' : STATE_IDENTIFIER ,
66
- 'K' : STATE_IDENTIFIER ,
67
- 'L' : STATE_IDENTIFIER ,
68
- 'M' : STATE_IDENTIFIER ,
69
- 'N' : STATE_IDENTIFIER ,
70
- 'O' : STATE_IDENTIFIER ,
71
- 'P' : STATE_IDENTIFIER ,
72
- 'Q' : STATE_IDENTIFIER ,
73
- 'R' : STATE_IDENTIFIER ,
74
- 'S' : STATE_IDENTIFIER ,
75
- 'T' : STATE_IDENTIFIER ,
76
- 'U' : STATE_IDENTIFIER ,
77
- 'V' : STATE_IDENTIFIER ,
78
- 'W' : STATE_IDENTIFIER ,
79
- 'X' : STATE_IDENTIFIER ,
80
- 'Y' : STATE_IDENTIFIER ,
81
- 'Z' : STATE_IDENTIFIER ,
82
- 'a' : STATE_IDENTIFIER ,
83
- 'b' : STATE_IDENTIFIER ,
84
- 'c' : STATE_IDENTIFIER ,
85
- 'd' : STATE_IDENTIFIER ,
86
- 'e' : STATE_IDENTIFIER ,
87
- 'f' : STATE_IDENTIFIER ,
88
- 'g' : STATE_IDENTIFIER ,
89
- 'h' : STATE_IDENTIFIER ,
90
- 'i' : STATE_IDENTIFIER ,
91
- 'j' : STATE_IDENTIFIER ,
92
- 'k' : STATE_IDENTIFIER ,
93
- 'l' : STATE_IDENTIFIER ,
94
- 'm' : STATE_IDENTIFIER ,
95
- 'n' : STATE_IDENTIFIER ,
96
- 'o' : STATE_IDENTIFIER ,
97
- 'p' : STATE_IDENTIFIER ,
98
- 'q' : STATE_IDENTIFIER ,
99
- 'r' : STATE_IDENTIFIER ,
100
- 's' : STATE_IDENTIFIER ,
101
- 't' : STATE_IDENTIFIER ,
102
- 'u' : STATE_IDENTIFIER ,
103
- 'v' : STATE_IDENTIFIER ,
104
- 'w' : STATE_IDENTIFIER ,
105
- 'x' : STATE_IDENTIFIER ,
106
- 'y' : STATE_IDENTIFIER ,
107
- 'z' : STATE_IDENTIFIER ,
108
- ' ' : STATE_WHITESPACE ,
109
- "\t " : STATE_WHITESPACE ,
110
- "\n " : STATE_WHITESPACE ,
111
- "\r " : STATE_WHITESPACE
112
- }
10
+ START_NUMBER = set (string .digits )
11
+ VALID_NUMBER = set (string .digits )
12
+ WHITESPACE = set (" \t \n \r " )
113
13
SIMPLE_TOKENS = {
114
14
'.' : 'dot' ,
115
15
'*' : 'star' ,
@@ -166,29 +66,22 @@ class Lexer(object):
166
66
def tokenize (self , expression ):
167
67
scanner = Scanner (expression )
168
68
while scanner .current is not None :
169
- if not scanner .current in TRANSITION_TABLE :
170
- # The current char must be in the transition table to
171
- # be valid.
172
- yield {'type' : 'unknown' , 'value' : scanner .current ,
173
- 'start' : scanner .pos , 'end' : scanner .pos }
174
- scanner .next ()
175
- continue
176
- state = TRANSITION_TABLE [scanner .current ]
177
- if state == STATE_SINGLE_CHAR :
69
+
70
+ if scanner .current in SIMPLE_TOKENS :
178
71
yield {'type' : SIMPLE_TOKENS [scanner .current ],
179
72
'value' : scanner .current ,
180
73
'start' : scanner .pos , 'end' : scanner .pos }
181
74
scanner .next ()
182
- elif state == STATE_IDENTIFIER :
75
+ elif scanner . current in START_IDENTIFIER :
183
76
start = scanner .pos
184
77
buffer = scanner .current
185
78
while scanner .next () in VALID_IDENTIFIER :
186
79
buffer += scanner .current
187
80
yield {'type' : 'identifier' , 'value' : buffer ,
188
81
'start' : start , 'end' : len (buffer )}
189
- elif state == STATE_WHITESPACE :
82
+ elif scanner . current in WHITESPACE :
190
83
scanner .next ()
191
- elif state == STATE_LBRACKET :
84
+ elif scanner . current == '[' :
192
85
start = scanner .pos
193
86
next_char = scanner .next ()
194
87
if next_char == ']' :
@@ -202,29 +95,33 @@ def tokenize(self, expression):
202
95
else :
203
96
yield {'type' : 'lbracket' , 'value' : '[' ,
204
97
'start' : start , 'end' : start }
205
- elif state == STATE_STRING_LITERAL :
98
+ elif scanner . current == "'" :
206
99
yield self ._consume_raw_string_literal (scanner )
207
- elif state == STATE_PIPE :
100
+ elif scanner . current == '|' :
208
101
yield self ._match_or_else (scanner , '|' , 'or' , 'pipe' )
209
- elif state == STATE_JSON_LITERAL :
102
+ elif scanner . current == '`' :
210
103
yield self ._consume_literal (scanner )
211
- elif state == STATE_NUMBER :
104
+ elif scanner . current in START_NUMBER :
212
105
start = scanner .pos
213
106
buffer = scanner .current
214
107
while scanner .next () in VALID_NUMBER :
215
108
buffer += scanner .current
216
109
yield {'type' : 'number' , 'value' : int (buffer ),
217
110
'start' : start , 'end' : len (buffer )}
218
- elif state == STATE_QUOTED_STRING :
111
+ elif scanner . current == '"' :
219
112
yield self ._consume_quoted_identifier (scanner )
220
- elif state == STATE_LT :
113
+ elif scanner . current == '<' :
221
114
yield self ._match_or_else (scanner , '=' , 'lte' , 'lt' )
222
- elif state == STATE_GT :
115
+ elif scanner . current == '>' :
223
116
yield self ._match_or_else (scanner , '=' , 'gte' , 'gt' )
224
- elif state == STATE_EQ :
225
- yield self ._match_or_else (scanner , '=' , 'eq' , 'unknown' )
226
- elif state == STATE_NOT :
117
+ elif scanner .current == '!' :
227
118
yield self ._match_or_else (scanner , '=' , 'ne' , 'unknown' )
119
+ elif scanner .current == '=' :
120
+ yield self ._match_or_else (scanner , '=' , 'eq' , 'unknown' )
121
+ else :
122
+ yield {'type' : 'unknown' , 'value' : scanner .current ,
123
+ 'start' : scanner .pos , 'end' : scanner .pos }
124
+ scanner .next ()
228
125
yield {'type' : 'eof' , 'value' : '' ,
229
126
'start' : len (expression ), 'end' : len (expression )}
230
127
0 commit comments