Skip to content

Commit 07b0fd5

Browse files
refactor(tokenizer): Extract parser.lexer.src_handler module
1 parent 93f7f42 commit 07b0fd5

File tree

2 files changed

+43
-37
lines changed

2 files changed

+43
-37
lines changed

parser/lexer/src_handler.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
from __future__ import annotations
2+
3+
from typing import Sequence
4+
5+
from .errors import LocatedTokenizerError
6+
from .tokens import Token
7+
from ..common import StrRegion, BaseLocatedError, region_union
8+
9+
10+
class UsesSrc:
11+
def __init__(self, src: str):
12+
self.src: str = src
13+
14+
def __getitem__(self, item: int | slice) -> str:
15+
return self.src[item]
16+
17+
def eof(self, idx: int):
18+
return idx >= len(self.src)
19+
20+
def get(self, idx: int, eof: str = '\0') -> str:
21+
try:
22+
return self.src[idx]
23+
except IndexError:
24+
return eof
25+
26+
default_err_type = LocatedTokenizerError
27+
28+
def err(self, msg: str,
29+
loc: int | Token | StrRegion | Sequence[int | Token | StrRegion],
30+
tp: type[BaseLocatedError] = None):
31+
try:
32+
seq: tuple[int | Token | StrRegion, ...] = tuple(loc)
33+
except TypeError:
34+
seq = (loc,)
35+
region = region_union([
36+
StrRegion(o, o + 1) if isinstance(o, int) else o
37+
for o in seq])
38+
tp = tp or self.default_err_type
39+
return tp(msg, region, self.src)

parser/lexer/tokenizer.py

Lines changed: 4 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -3,52 +3,19 @@
33
import sys
44
from io import StringIO
55
from string import ascii_letters, digits
6-
from typing import IO, Sequence
6+
from typing import IO
77

8-
from .errors import LocatedTokenizerError, LocatedMalformedNumberError
8+
from .errors import LocatedMalformedNumberError
9+
from .src_handler import UsesSrc
910
from .tokens import *
10-
from .tokens import OpToken, CommaToken
11-
from ..common import StrRegion, region_union
12-
from ..common.error import BaseLocatedError
11+
from ..common import StrRegion
1312
from ..operators import OPS_SET, MAX_OP_LEN, OP_FIRST_CHARS
1413

1514

1615
IDENT_START = ascii_letters + '_'
1716
IDENT_CONT = IDENT_START + digits
1817

1918

20-
class UsesSrc:
21-
def __init__(self, src: str):
22-
self.src: str = src
23-
24-
def __getitem__(self, item: int | slice) -> str:
25-
return self.src[item]
26-
27-
def eof(self, idx: int):
28-
return idx >= len(self.src)
29-
30-
def get(self, idx: int, eof: str = '\0') -> str:
31-
try:
32-
return self.src[idx]
33-
except IndexError:
34-
return eof
35-
36-
default_err_type = LocatedTokenizerError
37-
38-
def err(self, msg: str,
39-
loc: int | Token | StrRegion | Sequence[int | Token | StrRegion],
40-
tp: type[BaseLocatedError] = None):
41-
try:
42-
seq: tuple[int | Token | StrRegion, ...] = tuple(loc)
43-
except TypeError:
44-
seq = (loc,)
45-
region = region_union([
46-
StrRegion(o, o + 1) if isinstance(o, int) else o
47-
for o in seq])
48-
tp = tp or self.default_err_type
49-
return tp(msg, region, self.src)
50-
51-
5219
class Tokenizer(UsesSrc):
5320
def __init__(self, src: str):
5421
super().__init__(src)

0 commit comments

Comments
 (0)