Skip to content

Commit e0d9022

Browse files
refactor(tokenizer): Extract NumberParser into own file
1 parent 07b0fd5 commit e0d9022

File tree

2 files changed

+81
-73
lines changed

2 files changed

+81
-73
lines changed

parser/lexer/number_parser.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
from __future__ import annotations
2+
3+
from string import digits
4+
5+
from parser.common import StrRegion
6+
from parser.lexer import LocatedMalformedNumberError
7+
from parser.lexer.src_handler import UsesSrc
8+
from parser.lexer.tokens import Token, NumberToken
9+
10+
11+
class NumberParser(UsesSrc):
12+
default_err_type = LocatedMalformedNumberError
13+
14+
# todo 0x, 0b (I refuse to add octal literals) - also hex floats???
15+
def _parse_digit_seq(self, start: int) -> int | None:
16+
# (Returns None if no digits)
17+
idx = start
18+
if self.get(idx) == '_':
19+
raise self.err("Can't have '_' at the start of a number", idx)
20+
if self.get(idx) not in digits:
21+
return None
22+
idx += 1
23+
while True:
24+
if self.get(idx) == '_':
25+
if self.get(idx + 1) in digits:
26+
idx += 2 # '_' and digit
27+
elif self.get(idx + 1) == '_':
28+
raise self.err(
29+
"Can only have one consecutive '_' in a number", idx + 1)
30+
else:
31+
raise self.err(
32+
"Can't have '_' at the end of a number", idx)
33+
elif self.get(idx) in digits:
34+
idx += 1
35+
else:
36+
return idx # end of digits/'_'
37+
38+
def _parse_num_no_exp(self, idx: int) -> int:
39+
new_idx = self._parse_digit_seq(idx)
40+
if new_idx is None:
41+
if self.get(idx) != '.':
42+
raise self.err("Number must start with digit or '.' ", idx)
43+
has_pre_dot = False
44+
else:
45+
has_pre_dot = True
46+
idx = new_idx
47+
if self.get(idx) != '.':
48+
# eg: 1234, 567e-5, 8 +9-10
49+
return idx
50+
idx += 1
51+
new_idx = self._parse_digit_seq(idx)
52+
if new_idx is None:
53+
has_post_dot = False
54+
else:
55+
has_post_dot = True
56+
idx = new_idx
57+
if has_pre_dot or has_post_dot:
58+
return idx
59+
raise self.err("Number cannot be a single '.' "
60+
"(expected digits before or after)", idx)
61+
62+
def _parse_number(self, idx: int) -> int:
63+
idx = self._parse_num_no_exp(idx)
64+
if self.get(idx).lower() != 'e':
65+
return idx
66+
idx += 1
67+
# need to handle '-' here explicitly as it is part of the number
68+
# so can't just be parsed as a separate operator
69+
if self.get(idx) == '-':
70+
idx += 1
71+
new_idx = self._parse_digit_seq(idx) # no dot after the 'e'
72+
if new_idx is None:
73+
# eg: 1.2eC, 8e-Q which is always an error
74+
raise self.err("Expected integer after <number>e", idx)
75+
idx = new_idx
76+
return idx
77+
78+
def parse(self, start: int) -> Token:
79+
return NumberToken(StrRegion(start, self._parse_number(start)))

parser/lexer/tokenizer.py

Lines changed: 2 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from string import ascii_letters, digits
66
from typing import IO
77

8-
from .errors import LocatedMalformedNumberError
8+
from .number_parser import NumberParser
99
from .src_handler import UsesSrc
1010
from .tokens import *
1111
from ..common import StrRegion
@@ -170,7 +170,7 @@ def _t_number(self, idx: int) -> int:
170170
# doesn't handle negative numbers,
171171
# those should be handled as a separate '-' operator
172172
assert self[idx] != '-', "_t_number doesn't handle negative numbers"
173-
return self.add_token(_IncrementalNumberParser(self.src).parse(idx))
173+
return self.add_token(NumberParser(self.src).parse(idx))
174174

175175
def _t_dot(self, idx: int) -> int:
176176
assert self[idx] == '.', "_t_dot should only be called if char is '.'"
@@ -222,77 +222,6 @@ def _t_ident_name(self, start: int) -> int:
222222
)
223223

224224

225-
class _IncrementalNumberParser(UsesSrc):
226-
default_err_type = LocatedMalformedNumberError
227-
228-
# todo 0x, 0b (I refuse to add octal literals) - also hex floats???
229-
def _parse_digit_seq(self, start: int) -> int | None:
230-
# (Returns None if no digits)
231-
idx = start
232-
if self.get(idx) == '_':
233-
raise self.err("Can't have '_' at the start of a number", idx)
234-
if self.get(idx) not in digits:
235-
return None
236-
idx += 1
237-
while True:
238-
if self.get(idx) == '_':
239-
if self.get(idx + 1) in digits:
240-
idx += 2 # '_' and digit
241-
elif self.get(idx + 1) == '_':
242-
raise self.err(
243-
"Can only have one consecutive '_' in a number", idx + 1)
244-
else:
245-
raise self.err(
246-
"Can't have '_' at the end of a number", idx)
247-
elif self.get(idx) in digits:
248-
idx += 1
249-
else:
250-
return idx # end of digits/'_'
251-
252-
def _parse_num_no_exp(self, idx: int) -> int:
253-
new_idx = self._parse_digit_seq(idx)
254-
if new_idx is None:
255-
if self.get(idx) != '.':
256-
raise self.err("Number must start with digit or '.' ", idx)
257-
has_pre_dot = False
258-
else:
259-
has_pre_dot = True
260-
idx = new_idx
261-
if self.get(idx) != '.':
262-
# eg: 1234, 567e-5, 8 +9-10
263-
return idx
264-
idx += 1
265-
new_idx = self._parse_digit_seq(idx)
266-
if new_idx is None:
267-
has_post_dot = False
268-
else:
269-
has_post_dot = True
270-
idx = new_idx
271-
if has_pre_dot or has_post_dot:
272-
return idx
273-
raise self.err("Number cannot be a single '.' "
274-
"(expected digits before or after)", idx)
275-
276-
def _parse_number(self, idx: int) -> int:
277-
idx = self._parse_num_no_exp(idx)
278-
if self.get(idx).lower() != 'e':
279-
return idx
280-
idx += 1
281-
# need to handle '-' here explicitly as it is part of the number
282-
# so can't just be parsed as a separate operator
283-
if self.get(idx) == '-':
284-
idx += 1
285-
new_idx = self._parse_digit_seq(idx) # no dot after the 'e'
286-
if new_idx is None:
287-
# eg: 1.2eC, 8e-Q which is always an error
288-
raise self.err("Expected integer after <number>e", idx)
289-
idx = new_idx
290-
return idx
291-
292-
def parse(self, start: int) -> Token:
293-
return NumberToken(StrRegion(start, self._parse_number(start)))
294-
295-
296225
def print_tokens(src: str, tokens: list[Token], stream: IO[str] = None, do_ws=False):
297226
if stream is None:
298227
stream = sys.stdout

0 commit comments

Comments
 (0)