Skip to content

Commit 4c87771

Browse files
committed
Lexer bug fixes
1 parent 9db1408 commit 4c87771

File tree

1 file changed

+9
-10
lines changed

1 file changed

+9
-10
lines changed

jmespath/lexer.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
START_IDENTIFIER = set(string.ascii_letters + '_')
99
VALID_IDENTIFIER = set(string.ascii_letters + string.digits + '_')
10-
START_NUMBER = set(string.digits)
10+
START_NUMBER = set(string.digits + '-')
1111
VALID_NUMBER = set(string.digits)
1212
WHITESPACE = set(" \t\n\r")
1313
SIMPLE_TOKENS = {
@@ -43,7 +43,7 @@ def next(self):
4343
self.current = self.chars[self.pos]
4444
return self.current
4545

46-
def in_delimter(self, delimiter):
46+
def in_delimiter(self, delimiter):
4747
start = self.pos
4848
buffer = ''
4949
self.next()
@@ -66,7 +66,6 @@ class Lexer(object):
6666
def tokenize(self, expression):
6767
scanner = Scanner(expression)
6868
while scanner.current is not None:
69-
7069
if scanner.current in SIMPLE_TOKENS:
7170
yield {'type': SIMPLE_TOKENS[scanner.current],
7271
'value': scanner.current,
@@ -77,7 +76,7 @@ def tokenize(self, expression):
7776
buffer = scanner.current
7877
while scanner.next() in VALID_IDENTIFIER:
7978
buffer += scanner.current
80-
yield {'type': 'identifier', 'value': buffer,
79+
yield {'type': 'unquoted_identifier', 'value': buffer,
8180
'start': start, 'end': len(buffer)}
8281
elif scanner.current in WHITESPACE:
8382
scanner.next()
@@ -127,29 +126,29 @@ def tokenize(self, expression):
127126

128127
def _consume_literal(self, scanner):
129128
start = scanner.pos
130-
lexeme = scanner.in_delimter('`')
129+
lexeme = scanner.in_delimiter('`')
131130
try:
132131
# Assume it is valid JSON and attempt to parse.
133132
parsed_json = loads(lexeme)
134133
except ValueError:
135134
try:
136135
# Invalid JSON values should be converted to quoted
137136
# JSON strings during the JEP-12 deprecation period.
138-
parsed_json = loads('"%s"' % lexeme)
137+
parsed_json = loads('"%s"' % lexeme.lstrip())
139138
warnings.warn("deprecated string literal syntax",
140139
PendingDeprecationWarning)
141140
except ValueError:
142141
raise LexerError(lexer_position=start,
143142
lexer_value=lexeme,
144-
message="Bad token %s" % value)
143+
message="Bad token %s" % lexeme)
145144
return {'type': 'literal', 'value': parsed_json,
146145
'start': start, 'end': len(lexeme)}
147146

148147
def _consume_quoted_identifier(self, scanner):
149148
start = scanner.pos
150-
lexeme = scanner.in_delimter('"')
149+
lexeme = scanner.in_delimiter('"')
151150
try:
152-
return {'type': 'identifier', 'value': loads(lexeme),
151+
return {'type': 'quoted_identifier', 'value': loads(lexeme),
153152
'start': start, 'end': len(lexeme)}
154153
except ValueError as e:
155154
error_message = str(e).split(':')[0]
@@ -159,7 +158,7 @@ def _consume_quoted_identifier(self, scanner):
159158

160159
def _consume_raw_string_literal(self, scanner):
161160
start = scanner.pos
162-
lexeme = scanner.in_delimter("'")
161+
lexeme = scanner.in_delimiter("'")
163162
return {'type': 'literal', 'value': lexeme,
164163
'start': start, 'end': len(lexeme)}
165164

0 commit comments

Comments
 (0)