Skip to content

Commit cefe47c

Browse files
committed
Raise LexerError on invalid numbers
Fixes #98.
1 parent 9302489 commit cefe47c

File tree

3 files changed

+29
-6
lines changed

3 files changed

+29
-6
lines changed

jmespath/lexer.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
class Lexer(object):
99
START_IDENTIFIER = set(string.ascii_letters + '_')
1010
VALID_IDENTIFIER = set(string.ascii_letters + string.digits + '_')
11-
START_NUMBER = set(string.digits + '-')
1211
VALID_NUMBER = set(string.digits)
1312
WHITESPACE = set(" \t\n\r")
1413
SIMPLE_TOKENS = {
@@ -62,13 +61,22 @@ def tokenize(self, expression):
6261
yield self._match_or_else('|', 'or', 'pipe')
6362
elif self._current == '`':
6463
yield self._consume_literal()
65-
elif self._current in self.START_NUMBER:
64+
elif self._current in self.VALID_NUMBER:
6665
start = self._position
67-
buff = self._current
68-
while self._next() in self.VALID_NUMBER:
69-
buff += self._current
66+
buff = self._consume_number()
7067
yield {'type': 'number', 'value': int(buff),
7168
'start': start, 'end': start + len(buff)}
69+
elif self._current == '-':
70+
# Negative number.
71+
start = self._position
72+
buff = self._consume_number()
73+
if len(buff) > 1:
74+
yield {'type': 'number', 'value': int(buff),
75+
'start': start, 'end': start + len(buff)}
76+
else:
77+
raise LexerError(lexer_position=start,
78+
lexer_value=buff,
79+
message="Unknown token '%s'" % buff)
7280
elif self._current == '"':
7381
yield self._consume_quoted_identifier()
7482
elif self._current == '<':
@@ -86,6 +94,13 @@ def tokenize(self, expression):
8694
yield {'type': 'eof', 'value': '',
8795
'start': self._length, 'end': self._length}
8896

97+
def _consume_number(self):
98+
start = self._position
99+
buff = self._current
100+
while self._next() in self.VALID_NUMBER:
101+
buff += self._current
102+
return buff
103+
89104
def _initialize_for_expression(self, expression):
90105
if not expression:
91106
raise EmptyExpressionError()

tests/compliance/syntax.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,10 @@
9595
{
9696
"expression": "a][",
9797
"error": "syntax"
98+
},
99+
{
100+
"expression": "foo-bar",
101+
"error": "syntax"
98102
}
99103
]
100104
},

tests/test_lexer.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,13 +144,17 @@ def test_adds_quotes_when_invalid_json(self):
144144
)
145145

146146
def test_unknown_character(self):
147-
with self.assertRaises(LexerError):
147+
with self.assertRaises(LexerError) as e:
148148
tokens = list(self.lexer.tokenize('foo[0^]'))
149149

150150
def test_bad_first_character(self):
151151
with self.assertRaises(LexerError):
152152
tokens = list(self.lexer.tokenize('^foo[0]'))
153153

154+
def test_unknown_character_with_identifier(self):
155+
with self.assertRaisesRegexp(LexerError, "Unknown token"):
156+
list(self.lexer.tokenize('foo-bar'))
157+
154158

155159
if __name__ == '__main__':
156160
unittest.main()

0 commit comments

Comments
 (0)