Raise LexerError on invalid numbers

jamesls · jamesls · commit cefe47c52c33 · 2015-10-20T21:30:30.000-07:00
Fixes #98.
diff --git a/jmespath/lexer.py b/jmespath/lexer.py
@@ -8,7 +8,6 @@
 class Lexer(object):
     START_IDENTIFIER = set(string.ascii_letters + '_')
     VALID_IDENTIFIER = set(string.ascii_letters + string.digits + '_')
-    START_NUMBER = set(string.digits + '-')
     VALID_NUMBER = set(string.digits)
     WHITESPACE = set(" \t\n\r")
     SIMPLE_TOKENS = {
@@ -62,13 +61,22 @@ def tokenize(self, expression):
                 yield self._match_or_else('|', 'or', 'pipe')
             elif self._current == '`':
                 yield self._consume_literal()
-            elif self._current in self.START_NUMBER:
+            elif self._current in self.VALID_NUMBER:
                 start = self._position
-                buff = self._current
-                while self._next() in self.VALID_NUMBER:
-                    buff += self._current
+                buff = self._consume_number()
                 yield {'type': 'number', 'value': int(buff),
                        'start': start, 'end': start + len(buff)}
+            elif self._current == '-':
+                # Negative number.
+                start = self._position
+                buff = self._consume_number()
+                if len(buff) > 1:
+                    yield {'type': 'number', 'value': int(buff),
+                           'start': start, 'end': start + len(buff)}
+                else:
+                    raise LexerError(lexer_position=start,
+                                     lexer_value=buff,
+                                     message="Unknown token '%s'" % buff)
             elif self._current == '"':
                 yield self._consume_quoted_identifier()
             elif self._current == '<':
@@ -86,6 +94,13 @@ def tokenize(self, expression):
         yield {'type': 'eof', 'value': '',
                'start': self._length, 'end': self._length}
 
+    def _consume_number(self):
+        start = self._position
+        buff = self._current
+        while self._next() in self.VALID_NUMBER:
+            buff += self._current
+        return buff
+
     def _initialize_for_expression(self, expression):
         if not expression:
             raise EmptyExpressionError()
diff --git a/tests/compliance/syntax.json b/tests/compliance/syntax.json
@@ -95,6 +95,10 @@
       {
         "expression": "a][",
         "error": "syntax"
+      },
+      {
+        "expression": "foo-bar",
+        "error": "syntax"
       }
     ]
   },
diff --git a/tests/test_lexer.py b/tests/test_lexer.py
@@ -144,13 +144,17 @@ def test_adds_quotes_when_invalid_json(self):
         )
 
     def test_unknown_character(self):
-        with self.assertRaises(LexerError):
+        with self.assertRaises(LexerError) as e:
             tokens = list(self.lexer.tokenize('foo[0^]'))
 
     def test_bad_first_character(self):
         with self.assertRaises(LexerError):
             tokens = list(self.lexer.tokenize('^foo[0]'))
 
+    def test_unknown_character_with_identifier(self):
+        with self.assertRaisesRegexp(LexerError, "Unknown token"):
+            list(self.lexer.tokenize('foo-bar'))
+
 
 if __name__ == '__main__':
     unittest.main()

Original file line number	Diff line number	Diff line change
`@@ -95,6 +95,10 @@`
`95`	`95`	`{`
`96`	`96`	`"expression": "a][",`
`97`	`97`	`"error": "syntax"`
	`98`	`+ },`
	`99`	`+ {`
	`100`	`+ "expression": "foo-bar",`
	`101`	`+ "error": "syntax"`
`98`	`102`	`}`
`99`	`103`	`]`
`100`	`104`	`},`