8
8
class Lexer (object ):
9
9
START_IDENTIFIER = set (string .ascii_letters + '_' )
10
10
VALID_IDENTIFIER = set (string .ascii_letters + string .digits + '_' )
11
- START_NUMBER = set (string .digits + '-' )
12
11
VALID_NUMBER = set (string .digits )
13
12
WHITESPACE = set (" \t \n \r " )
14
13
SIMPLE_TOKENS = {
@@ -62,13 +61,22 @@ def tokenize(self, expression):
62
61
yield self ._match_or_else ('|' , 'or' , 'pipe' )
63
62
elif self ._current == '`' :
64
63
yield self ._consume_literal ()
65
- elif self ._current in self .START_NUMBER :
64
+ elif self ._current in self .VALID_NUMBER :
66
65
start = self ._position
67
- buff = self ._current
68
- while self ._next () in self .VALID_NUMBER :
69
- buff += self ._current
66
+ buff = self ._consume_number ()
70
67
yield {'type' : 'number' , 'value' : int (buff ),
71
68
'start' : start , 'end' : start + len (buff )}
69
+ elif self ._current == '-' :
70
+ # Negative number.
71
+ start = self ._position
72
+ buff = self ._consume_number ()
73
+ if len (buff ) > 1 :
74
+ yield {'type' : 'number' , 'value' : int (buff ),
75
+ 'start' : start , 'end' : start + len (buff )}
76
+ else :
77
+ raise LexerError (lexer_position = start ,
78
+ lexer_value = buff ,
79
+ message = "Unknown token '%s'" % buff )
72
80
elif self ._current == '"' :
73
81
yield self ._consume_quoted_identifier ()
74
82
elif self ._current == '<' :
@@ -86,6 +94,13 @@ def tokenize(self, expression):
86
94
yield {'type' : 'eof' , 'value' : '' ,
87
95
'start' : self ._length , 'end' : self ._length }
88
96
97
+ def _consume_number (self ):
98
+ start = self ._position
99
+ buff = self ._current
100
+ while self ._next () in self .VALID_NUMBER :
101
+ buff += self ._current
102
+ return buff
103
+
89
104
def _initialize_for_expression (self , expression ):
90
105
if not expression :
91
106
raise EmptyExpressionError ()
0 commit comments