Skip to content

Commit eb7d3d6

Browse files
committed
Fix lexer error values
Updated the hypothesis tests to check additional properties of lexer errors being raised. This caught a few more issues that I've fixed here, primarily related to the lexer position and the token value that triggered the error.
1 parent 71f4485 commit eb7d3d6

File tree

2 files changed

+33
-5
lines changed

2 files changed

+33
-5
lines changed

extra/test_hypothesis.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,31 @@
4040
def test_lexer_api(expr):
4141
try:
4242
tokens = list(lexer.Lexer().tokenize(expr))
43-
except exceptions.JMESPathError as e:
43+
except exceptions.EmptyExpressionError:
44+
return
45+
except exceptions.LexerError as e:
46+
assert e.lex_position >= 0, e.lex_position
47+
assert e.lex_position < len(expr), e.lex_position
48+
if expr:
49+
assert expr[e.lex_position] == e.token_value[0], (
50+
"Lex position does not match first token char.\n"
51+
"Expression: %s\n%s != %s" % (expr, expr[e.lex_position],
52+
e.token_value[0])
53+
)
4454
return
4555
except Exception as e:
4656
raise AssertionError("Non JMESPathError raised: %s" % e)
4757
assert isinstance(tokens, list)
58+
# Token starting positions must be unique, can't have two
59+
# tokens with the same start position.
60+
start_locations = [t['start'] for t in tokens]
61+
assert len(set(start_locations)) == len(start_locations), (
62+
"Tokens must have unique starting locations.")
63+
# Starting positions must be increasing (i.e sorted).
64+
assert sorted(start_locations) == start_locations, (
65+
"Tokens must have increasing start locations.")
66+
# Last token is always EOF.
67+
assert tokens[-1]['type'] == 'eof'
4868

4969

5070
@settings(**BASE_SETTINGS)

jmespath/lexer.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -92,10 +92,17 @@ def tokenize(self, expression):
9292
'start': self._position - 1, 'end': self._position}
9393
self._next()
9494
else:
95+
if self._current is None:
96+
# If we're at the EOF, we never advanced
97+
# the position so we don't need to rewind
98+
# it back one location.
99+
position = self._position
100+
else:
101+
position = self._position - 1
95102
raise LexerError(
96-
lexer_position=self._position - 1,
103+
lexer_position=position,
97104
lexer_value='=',
98-
message="Unknown token =")
105+
message="Unknown token '='")
99106
else:
100107
raise LexerError(lexer_position=self._position,
101108
lexer_value=self._current,
@@ -138,8 +145,9 @@ def _consume_until(self, delimiter):
138145
buff += '\\'
139146
self._next()
140147
if self._current is None:
148+
# We're at the EOF.
141149
raise LexerError(lexer_position=start,
142-
lexer_value=self._expression,
150+
lexer_value=self._expression[start:],
143151
message="Unclosed %s delimiter" % delimiter)
144152
buff += self._current
145153
self._next()
@@ -162,7 +170,7 @@ def _consume_literal(self):
162170
PendingDeprecationWarning)
163171
except ValueError:
164172
raise LexerError(lexer_position=start,
165-
lexer_value=self._expression,
173+
lexer_value=self._expression[start:],
166174
message="Bad token %s" % lexeme)
167175
token_len = self._position - start
168176
return {'type': 'literal', 'value': parsed_json,

0 commit comments

Comments
 (0)