Skip to content

Commit 9db1408

Browse files
committed
Removing the computed table is just as fast but less code
1 parent 64fba23 commit 9db1408

File tree

1 file changed

+23
-126
lines changed

1 file changed

+23
-126
lines changed

jmespath/lexer.py

Lines changed: 23 additions & 126 deletions
Original file line numberDiff line numberDiff line change
@@ -5,111 +5,11 @@
55
from jmespath.exceptions import LexerError, EmptyExpressionError
66

77

8-
VALID_NUMBER = set(string.digits)
8+
START_IDENTIFIER = set(string.ascii_letters + '_')
99
VALID_IDENTIFIER = set(string.ascii_letters + string.digits + '_')
10-
STATE_IDENTIFIER = 0;
11-
STATE_NUMBER = 1;
12-
STATE_SINGLE_CHAR = 2;
13-
STATE_WHITESPACE = 3;
14-
STATE_STRING_LITERAL = 4;
15-
STATE_QUOTED_STRING = 5;
16-
STATE_JSON_LITERAL = 6;
17-
STATE_LBRACKET = 7;
18-
STATE_PIPE = 8;
19-
STATE_LT = 9;
20-
STATE_GT = 10;
21-
STATE_EQ = 11;
22-
STATE_NOT = 12;
23-
TRANSITION_TABLE = {
24-
'<': STATE_LT,
25-
'>': STATE_GT,
26-
'=': STATE_EQ,
27-
'!': STATE_NOT,
28-
'[': STATE_LBRACKET,
29-
'|': STATE_PIPE,
30-
'`': STATE_JSON_LITERAL,
31-
'"': STATE_QUOTED_STRING,
32-
"'": STATE_STRING_LITERAL,
33-
'-': STATE_NUMBER,
34-
'0': STATE_NUMBER,
35-
'1': STATE_NUMBER,
36-
'2': STATE_NUMBER,
37-
'3': STATE_NUMBER,
38-
'4': STATE_NUMBER,
39-
'5': STATE_NUMBER,
40-
'6': STATE_NUMBER,
41-
'7': STATE_NUMBER,
42-
'8': STATE_NUMBER,
43-
'9': STATE_NUMBER,
44-
'.': STATE_SINGLE_CHAR,
45-
'*': STATE_SINGLE_CHAR,
46-
']': STATE_SINGLE_CHAR,
47-
',': STATE_SINGLE_CHAR,
48-
':': STATE_SINGLE_CHAR,
49-
'@': STATE_SINGLE_CHAR,
50-
'&': STATE_SINGLE_CHAR,
51-
'(': STATE_SINGLE_CHAR,
52-
')': STATE_SINGLE_CHAR,
53-
'{': STATE_SINGLE_CHAR,
54-
'}': STATE_SINGLE_CHAR,
55-
'_': STATE_IDENTIFIER,
56-
'A': STATE_IDENTIFIER,
57-
'B': STATE_IDENTIFIER,
58-
'C': STATE_IDENTIFIER,
59-
'D': STATE_IDENTIFIER,
60-
'E': STATE_IDENTIFIER,
61-
'F': STATE_IDENTIFIER,
62-
'G': STATE_IDENTIFIER,
63-
'H': STATE_IDENTIFIER,
64-
'I': STATE_IDENTIFIER,
65-
'J': STATE_IDENTIFIER,
66-
'K': STATE_IDENTIFIER,
67-
'L': STATE_IDENTIFIER,
68-
'M': STATE_IDENTIFIER,
69-
'N': STATE_IDENTIFIER,
70-
'O': STATE_IDENTIFIER,
71-
'P': STATE_IDENTIFIER,
72-
'Q': STATE_IDENTIFIER,
73-
'R': STATE_IDENTIFIER,
74-
'S': STATE_IDENTIFIER,
75-
'T': STATE_IDENTIFIER,
76-
'U': STATE_IDENTIFIER,
77-
'V': STATE_IDENTIFIER,
78-
'W': STATE_IDENTIFIER,
79-
'X': STATE_IDENTIFIER,
80-
'Y': STATE_IDENTIFIER,
81-
'Z': STATE_IDENTIFIER,
82-
'a': STATE_IDENTIFIER,
83-
'b': STATE_IDENTIFIER,
84-
'c': STATE_IDENTIFIER,
85-
'd': STATE_IDENTIFIER,
86-
'e': STATE_IDENTIFIER,
87-
'f': STATE_IDENTIFIER,
88-
'g': STATE_IDENTIFIER,
89-
'h': STATE_IDENTIFIER,
90-
'i': STATE_IDENTIFIER,
91-
'j': STATE_IDENTIFIER,
92-
'k': STATE_IDENTIFIER,
93-
'l': STATE_IDENTIFIER,
94-
'm': STATE_IDENTIFIER,
95-
'n': STATE_IDENTIFIER,
96-
'o': STATE_IDENTIFIER,
97-
'p': STATE_IDENTIFIER,
98-
'q': STATE_IDENTIFIER,
99-
'r': STATE_IDENTIFIER,
100-
's': STATE_IDENTIFIER,
101-
't': STATE_IDENTIFIER,
102-
'u': STATE_IDENTIFIER,
103-
'v': STATE_IDENTIFIER,
104-
'w': STATE_IDENTIFIER,
105-
'x': STATE_IDENTIFIER,
106-
'y': STATE_IDENTIFIER,
107-
'z': STATE_IDENTIFIER,
108-
' ': STATE_WHITESPACE,
109-
"\t": STATE_WHITESPACE,
110-
"\n": STATE_WHITESPACE,
111-
"\r": STATE_WHITESPACE
112-
}
10+
START_NUMBER = set(string.digits)
11+
VALID_NUMBER = set(string.digits)
12+
WHITESPACE = set(" \t\n\r")
11313
SIMPLE_TOKENS = {
11414
'.': 'dot',
11515
'*': 'star',
@@ -166,29 +66,22 @@ class Lexer(object):
16666
def tokenize(self, expression):
16767
scanner = Scanner(expression)
16868
while scanner.current is not None:
169-
if not scanner.current in TRANSITION_TABLE:
170-
# The current char must be in the transition table to
171-
# be valid.
172-
yield {'type': 'unknown', 'value': scanner.current,
173-
'start': scanner.pos, 'end': scanner.pos}
174-
scanner.next()
175-
continue
176-
state = TRANSITION_TABLE[scanner.current]
177-
if state == STATE_SINGLE_CHAR:
69+
70+
if scanner.current in SIMPLE_TOKENS:
17871
yield {'type': SIMPLE_TOKENS[scanner.current],
17972
'value': scanner.current,
18073
'start': scanner.pos, 'end': scanner.pos}
18174
scanner.next()
182-
elif state == STATE_IDENTIFIER:
75+
elif scanner.current in START_IDENTIFIER:
18376
start = scanner.pos
18477
buffer = scanner.current
18578
while scanner.next() in VALID_IDENTIFIER:
18679
buffer += scanner.current
18780
yield {'type': 'identifier', 'value': buffer,
18881
'start': start, 'end': len(buffer)}
189-
elif state == STATE_WHITESPACE:
82+
elif scanner.current in WHITESPACE:
19083
scanner.next()
191-
elif state == STATE_LBRACKET:
84+
elif scanner.current == '[':
19285
start = scanner.pos
19386
next_char = scanner.next()
19487
if next_char == ']':
@@ -202,29 +95,33 @@ def tokenize(self, expression):
20295
else:
20396
yield {'type': 'lbracket', 'value': '[',
20497
'start': start, 'end': start}
205-
elif state == STATE_STRING_LITERAL:
98+
elif scanner.current == "'":
20699
yield self._consume_raw_string_literal(scanner)
207-
elif state == STATE_PIPE:
100+
elif scanner.current == '|':
208101
yield self._match_or_else(scanner, '|', 'or', 'pipe')
209-
elif state == STATE_JSON_LITERAL:
102+
elif scanner.current == '`':
210103
yield self._consume_literal(scanner)
211-
elif state == STATE_NUMBER:
104+
elif scanner.current in START_NUMBER:
212105
start = scanner.pos
213106
buffer = scanner.current
214107
while scanner.next() in VALID_NUMBER:
215108
buffer += scanner.current
216109
yield {'type': 'number', 'value': int(buffer),
217110
'start': start, 'end': len(buffer)}
218-
elif state == STATE_QUOTED_STRING:
111+
elif scanner.current == '"':
219112
yield self._consume_quoted_identifier(scanner)
220-
elif state == STATE_LT:
113+
elif scanner.current == '<':
221114
yield self._match_or_else(scanner, '=', 'lte', 'lt')
222-
elif state == STATE_GT:
115+
elif scanner.current == '>':
223116
yield self._match_or_else(scanner, '=', 'gte', 'gt')
224-
elif state == STATE_EQ:
225-
yield self._match_or_else(scanner, '=', 'eq', 'unknown')
226-
elif state == STATE_NOT:
117+
elif scanner.current == '!':
227118
yield self._match_or_else(scanner, '=', 'ne', 'unknown')
119+
elif scanner.current == '=':
120+
yield self._match_or_else(scanner, '=', 'eq', 'unknown')
121+
else:
122+
yield {'type': 'unknown', 'value': scanner.current,
123+
'start': scanner.pos, 'end': scanner.pos}
124+
scanner.next()
228125
yield {'type': 'eof', 'value': '',
229126
'start': len(expression), 'end': len(expression)}
230127

0 commit comments

Comments
 (0)