Skip to content

Commit 6debb36

Browse files
committed
[lexer] Supports arithmetic operators
1 parent bbe7300 commit 6debb36

File tree

2 files changed

+91
-12
lines changed

2 files changed

+91
-12
lines changed

jmespath/lexer.py

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,16 @@ class Lexer(object):
2121
')': 'rparen',
2222
'{': 'lbrace',
2323
'}': 'rbrace',
24+
'+': 'plus',
25+
'%': 'modulo',
26+
u'\u2212': 'minus',
27+
u'\u00d7': 'multiply',
28+
u'\u00f7': 'divide',
2429
}
2530

2631
def tokenize(self, expression):
2732
self._initialize_for_expression(expression)
33+
print(self._current in self.SIMPLE_TOKENS)
2834
while self._current is not None:
2935
if self._current in self.SIMPLE_TOKENS:
3036
yield {'type': self.SIMPLE_TOKENS[self._current],
@@ -68,16 +74,30 @@ def tokenize(self, expression):
6874
yield {'type': 'number', 'value': int(buff),
6975
'start': start, 'end': start + len(buff)}
7076
elif self._current == '-':
71-
# Negative number.
72-
start = self._position
73-
buff = self._consume_number()
74-
if len(buff) > 1:
75-
yield {'type': 'number', 'value': int(buff),
76-
'start': start, 'end': start + len(buff)}
77+
if not self._peek_is_next_digit():
78+
self._next()
79+
yield {'type': 'minus', 'value': '-',
80+
'start': self._position - 1, 'end': self._position}
81+
else:
82+
# Negative number.
83+
start = self._position
84+
buff = self._consume_number()
85+
if len(buff) > 1:
86+
yield {'type': 'number', 'value': int(buff),
87+
'start': start, 'end': start + len(buff)}
88+
else:
89+
raise LexerError(lexer_position=start,
90+
lexer_value=buff,
91+
message="Unknown token '%s'" % buff)
92+
elif self._current == '/':
93+
self._next()
94+
if self._current == '/':
95+
self._next()
96+
yield {'type': 'div', 'value': '//',
97+
'start': self._position - 1, 'end': self._position}
7798
else:
78-
raise LexerError(lexer_position=start,
79-
lexer_value=buff,
80-
message="Unknown token '%s'" % buff)
99+
yield {'type': 'divide', 'value': '/',
100+
'start': self._position, 'end': self._position + 1}
81101
elif self._current == '"':
82102
yield self._consume_quoted_identifier()
83103
elif self._current == '<':
@@ -117,6 +137,13 @@ def _consume_number(self):
117137
buff += self._current
118138
return buff
119139

140+
def _peek_is_next_digit(self):
141+
if (self._position == self._length - 1):
142+
return False
143+
else:
144+
next = self._chars[self._position + 1]
145+
return next in self.VALID_NUMBER
146+
120147
def _initialize_for_expression(self, expression):
121148
if not expression:
122149
raise EmptyExpressionError()

tests/test_lexer.py

Lines changed: 55 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,50 @@ def test_negative_number(self):
4545
self.assert_tokens(tokens, [{'type': 'number',
4646
'value': -24}])
4747

48+
def test_plus(self):
49+
tokens = list(self.lexer.tokenize('+'))
50+
self.assert_tokens(tokens, [{'type': 'plus',
51+
'value': '+'}])
52+
53+
def test_minus(self):
54+
tokens = list(self.lexer.tokenize('-'))
55+
self.assert_tokens(tokens, [{'type': 'minus',
56+
'value': '-'}])
57+
def test_minus_unicode(self):
58+
tokens = list(self.lexer.tokenize(u'\u2212'))
59+
self.assert_tokens(tokens, [{'type': 'minus',
60+
'value': u'\u2212'}])
61+
62+
def test_multiplication(self):
63+
tokens = list(self.lexer.tokenize('*'))
64+
self.assert_tokens(tokens, [{'type': 'star',
65+
'value': '*'}])
66+
67+
def test_multiplication_unicode(self):
68+
tokens = list(self.lexer.tokenize(u'\u00d7'))
69+
self.assert_tokens(tokens, [{'type': 'multiply',
70+
'value': u'\u00d7'}])
71+
72+
def test_division(self):
73+
tokens = list(self.lexer.tokenize('/'))
74+
self.assert_tokens(tokens, [{'type': 'divide',
75+
'value': '/'}])
76+
77+
def test_division_unicode(self):
78+
tokens = list(self.lexer.tokenize('÷'))
79+
self.assert_tokens(tokens, [{'type': 'divide',
80+
'value': '÷'}])
81+
82+
def test_modulo(self):
83+
tokens = list(self.lexer.tokenize('%'))
84+
self.assert_tokens(tokens, [{'type': 'modulo',
85+
'value': '%'}])
86+
87+
def test_integer_division(self):
88+
tokens = list(self.lexer.tokenize('//'))
89+
self.assert_tokens(tokens, [{'type': 'div',
90+
'value': '//'}])
91+
4892
def test_quoted_identifier(self):
4993
tokens = list(self.lexer.tokenize('"foobar"'))
5094
self.assert_tokens(tokens, [{'type': 'quoted_identifier',
@@ -151,9 +195,17 @@ def test_bad_first_character(self):
151195
with self.assertRaises(LexerError):
152196
tokens = list(self.lexer.tokenize('^foo[0]'))
153197

154-
def test_unknown_character_with_identifier(self):
155-
with self.assertRaisesRegex(LexerError, "Unknown token"):
156-
list(self.lexer.tokenize('foo-bar'))
198+
def test_arithmetic_expression(self):
199+
tokens = list(self.lexer.tokenize('foo-bar'))
200+
self.assertEqual(
201+
tokens,
202+
[
203+
{'type': 'unquoted_identifier', 'value': 'foo', 'start': 0, 'end': 3},
204+
{'type': 'minus', 'value': '-', 'start': 3, 'end': 4},
205+
{'type': 'unquoted_identifier', 'value': 'bar', 'start': 4, 'end': 7},
206+
{'type': 'eof', 'value': '', 'start': 7, 'end': 7}
207+
]
208+
)
157209

158210

159211
if __name__ == '__main__':

0 commit comments

Comments
 (0)