Skip to content

Commit cecf82f

Browse files
refactor(tokenizer): Extract token printing into own file.
This structure is getting rather Java-y, isn't it...
1 parent e0d9022 commit cecf82f

File tree

4 files changed

+48
-43
lines changed

4 files changed

+48
-43
lines changed

parser/lexer/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
from .tokenizer import Tokenizer, print_tokens, format_tokens
1+
from .tokenizer import Tokenizer
2+
from .token_print import print_tokens, format_tokens
23
from .errors import (
34
TokenizerError, LocatedTokenizerError, MalformedNumberError,
45
LocatedMalformedNumberError)

parser/lexer/number_parser.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22

33
from string import digits
44

5-
from parser.common import StrRegion
6-
from parser.lexer import LocatedMalformedNumberError
7-
from parser.lexer.src_handler import UsesSrc
8-
from parser.lexer.tokens import Token, NumberToken
5+
from .errors import LocatedMalformedNumberError
6+
from .src_handler import UsesSrc
7+
from .tokens import Token, NumberToken
8+
from ..common import StrRegion
99

1010

1111
class NumberParser(UsesSrc):

parser/lexer/token_print.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
from __future__ import annotations
2+
3+
import sys
4+
from io import StringIO
5+
from typing import IO
6+
7+
from .tokens import Token
8+
9+
10+
def print_tokens(src: str, tokens: list[Token], stream: IO[str] = None, do_ws=False):
11+
if stream is None:
12+
stream = sys.stdout
13+
table = []
14+
for tok in tokens:
15+
if tok.is_whitespace:
16+
if do_ws:
17+
table.append(['(WS) ' + repr(tok.region.resolve(src)), tok.name])
18+
else:
19+
table.append([str(tok.region.resolve(src)), tok.name])
20+
max0 = max(len(r[0]) for r in table)
21+
max1 = max(len(r[1]) for r in table)
22+
for s0, s1 in table:
23+
print(f'{s0:>{max0}} | {s1:>{max1}}', file=stream)
24+
25+
26+
def format_tokens(src: str, tokens: list[Token], do_ws=False):
27+
out = StringIO()
28+
print_tokens(src, tokens, out, do_ws)
29+
return out.getvalue()

parser/lexer/tokenizer.py

Lines changed: 13 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
from __future__ import annotations
22

3-
import sys
4-
from io import StringIO
53
from string import ascii_letters, digits
6-
from typing import IO
74

85
from .number_parser import NumberParser
96
from .src_handler import UsesSrc
@@ -16,6 +13,19 @@
1613
IDENT_CONT = IDENT_START + digits
1714

1815

16+
GETATTR_VALID_AFTER_CLS = (
17+
StringToken,
18+
RParToken,
19+
RSqBracket,
20+
AttrNameToken,
21+
IdentNameToken
22+
# Not valid (directly) after floats (need parens) because we treat all
23+
# numbers the same and we cannot have it after ints
24+
# 2.3 => (2).3 (attribute) or `2.3` (float)
25+
# Also it would be confusing to have 2.e3 => num, 2.e3.3 -> num.attr.
26+
)
27+
28+
1929
class Tokenizer(UsesSrc):
2030
def __init__(self, src: str):
2131
super().__init__(src)
@@ -207,38 +217,3 @@ def _t_ident_name(self, start: int) -> int:
207217
while self.get(idx) in IDENT_CONT:
208218
idx += 1
209219
return self.add_token(IdentNameToken(StrRegion(start, idx)))
210-
211-
212-
GETATTR_VALID_AFTER_CLS = (
213-
StringToken,
214-
RParToken,
215-
RSqBracket,
216-
AttrNameToken,
217-
IdentNameToken
218-
# Not valid (directly) after floats (need parens) because we treat all
219-
# numbers the same and we cannot have it after ints
220-
# 2.3 => (2).3 (attribute) or `2.3` (float)
221-
# Also it would be confusing to have 2.e3 => num, 2.e3.3 -> num.attr.
222-
)
223-
224-
225-
def print_tokens(src: str, tokens: list[Token], stream: IO[str] = None, do_ws=False):
226-
if stream is None:
227-
stream = sys.stdout
228-
table = []
229-
for tok in tokens:
230-
if tok.is_whitespace:
231-
if do_ws:
232-
table.append(['(WS) ' + repr(tok.region.resolve(src)), tok.name])
233-
else:
234-
table.append([str(tok.region.resolve(src)), tok.name])
235-
max0 = max(len(r[0]) for r in table)
236-
max1 = max(len(r[1]) for r in table)
237-
for s0, s1 in table:
238-
print(f'{s0:>{max0}} | {s1:>{max1}}', file=stream)
239-
240-
241-
def format_tokens(src: str, tokens: list[Token], do_ws=False):
242-
out = StringIO()
243-
print_tokens(src, tokens, out, do_ws)
244-
return out.getvalue()

0 commit comments

Comments
 (0)