Skip to content

Commit 42738b3

Browse files
Commit
1 parent 790cdae commit 42738b3

File tree

1 file changed

+252
-9
lines changed

1 file changed

+252
-9
lines changed

Lib/idlelib/colorizer.py

Lines changed: 252 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,255 @@
22
import keyword
33
import re
44
import time
5+
import token as T
6+
import tokenize
7+
from collections import deque
8+
from io import StringIO
9+
from tokenize import TokenInfo as TI
10+
from typing import Iterable, Iterator, Match, NamedTuple, Self
511

612
from idlelib.config import idleConf
713
from idlelib.delegator import Delegator
814

915
DEBUG = False
1016

1117

18+
ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]")
19+
ZERO_WIDTH_BRACKET = re.compile(r"\x01.*?\x02")
20+
ZERO_WIDTH_TRANS = str.maketrans({"\x01": "", "\x02": ""})
21+
IDENTIFIERS_AFTER = {"def", "class"}
22+
KEYWORD_CONSTANTS = {"True", "False", "None"}
23+
BUILTINS = {str(name) for name in dir(builtins) if not name.startswith('_')}
24+
25+
26+
class Span(NamedTuple):
27+
"""Span indexing that's inclusive on both ends."""
28+
29+
start: int
30+
end: int
31+
32+
@classmethod
33+
def from_re(cls, m: Match[str], group: int | str) -> Self:
34+
re_span = m.span(group)
35+
return cls(re_span[0], re_span[1] - 1)
36+
37+
@classmethod
38+
def from_token(cls, token: TI, line_len: list[int]) -> Self:
39+
end_offset = -1
40+
if (token.type in {T.FSTRING_MIDDLE, T.TSTRING_MIDDLE}
41+
and token.string.endswith(("{", "}"))):
42+
# gh-134158: a visible trailing brace comes from a double brace in input
43+
end_offset += 1
44+
45+
return cls(
46+
line_len[token.start[0] - 1] + token.start[1],
47+
line_len[token.end[0] - 1] + token.end[1] + end_offset,
48+
)
49+
50+
51+
class ColorSpan(NamedTuple):
52+
span: Span
53+
tag: str
54+
55+
56+
def prev_next_window[T](
57+
iterable: Iterable[T]
58+
) -> Iterator[tuple[T | None, ...]]:
59+
"""Generates three-tuples of (previous, current, next) items.
60+
61+
On the first iteration previous is None. On the last iteration next
62+
is None. In case of exception next is None and the exception is re-raised
63+
on a subsequent next() call.
64+
65+
Inspired by `sliding_window` from `itertools` recipes.
66+
"""
67+
68+
iterator = iter(iterable)
69+
window = deque((None, next(iterator)), maxlen=3)
70+
try:
71+
for x in iterator:
72+
window.append(x)
73+
yield tuple(window)
74+
except Exception:
75+
raise
76+
finally:
77+
window.append(None)
78+
yield tuple(window)
79+
80+
81+
keyword_first_sets_match = {"False", "None", "True", "await", "lambda", "not"}
82+
keyword_first_sets_case = {"False", "None", "True"}
83+
84+
85+
def is_soft_keyword_used(*tokens: TI | None) -> bool:
86+
"""Returns True if the current token is a keyword in this context.
87+
88+
For the `*tokens` to match anything, they have to be a three-tuple of
89+
(previous, current, next).
90+
"""
91+
#trace("is_soft_keyword_used{t}", t=tokens)
92+
match tokens:
93+
case (
94+
None | TI(T.NEWLINE) | TI(T.INDENT) | TI(string=":"),
95+
TI(string="match"),
96+
TI(T.NUMBER | T.STRING | T.FSTRING_START | T.TSTRING_START)
97+
| TI(T.OP, string="(" | "*" | "[" | "{" | "~" | "...")
98+
):
99+
return True
100+
case (
101+
None | TI(T.NEWLINE) | TI(T.INDENT) | TI(string=":"),
102+
TI(string="match"),
103+
TI(T.NAME, string=s)
104+
):
105+
if keyword.iskeyword(s):
106+
return s in keyword_first_sets_match
107+
return True
108+
case (
109+
None | TI(T.NEWLINE) | TI(T.INDENT) | TI(T.DEDENT) | TI(string=":"),
110+
TI(string="case"),
111+
TI(T.NUMBER | T.STRING | T.FSTRING_START | T.TSTRING_START)
112+
| TI(T.OP, string="(" | "*" | "-" | "[" | "{")
113+
):
114+
return True
115+
case (
116+
None | TI(T.NEWLINE) | TI(T.INDENT) | TI(T.DEDENT) | TI(string=":"),
117+
TI(string="case"),
118+
TI(T.NAME, string=s)
119+
):
120+
if keyword.iskeyword(s):
121+
return s in keyword_first_sets_case
122+
return True
123+
case (TI(string="case"), TI(string="_"), TI(string=":")):
124+
return True
125+
case (
126+
None | TI(T.NEWLINE) | TI(T.INDENT) | TI(T.DEDENT) | TI(string=":"),
127+
TI(string="type"),
128+
TI(T.NAME, string=s)
129+
):
130+
return not keyword.iskeyword(s)
131+
case _:
132+
return False
133+
134+
135+
def recover_unterminated_string(
136+
exc: tokenize.TokenError,
137+
line_lengths: list[int],
138+
last_emitted: ColorSpan | None,
139+
buffer: str,
140+
) -> Iterator[ColorSpan]:
141+
msg, loc = exc.args
142+
if loc is None:
143+
return
144+
145+
line_no, column = loc
146+
147+
if msg.startswith(
148+
(
149+
"unterminated string literal",
150+
"unterminated f-string literal",
151+
"unterminated t-string literal",
152+
"EOF in multi-line string",
153+
"unterminated triple-quoted f-string literal",
154+
"unterminated triple-quoted t-string literal",
155+
)
156+
):
157+
start = line_lengths[line_no - 1] + column - 1
158+
end = line_lengths[-1] - 1
159+
160+
# in case FSTRING_START was already emitted
161+
if last_emitted and start <= last_emitted.span.start:
162+
start = last_emitted.span.end + 1
163+
164+
span = Span(start, end)
165+
yield ColorSpan(span, "STRING")
166+
167+
168+
def gen_colors_from_token_stream(
169+
token_generator: Iterator[TI],
170+
line_lengths: list[int],
171+
) -> Iterator[ColorSpan]:
172+
token_window = prev_next_window(token_generator)
173+
174+
is_def_name = False
175+
bracket_level = 0
176+
for prev_token, token, next_token in token_window:
177+
assert token is not None
178+
if token.start == token.end:
179+
continue
180+
181+
match token.type:
182+
case (
183+
T.STRING
184+
| T.FSTRING_START | T.FSTRING_MIDDLE | T.FSTRING_END
185+
| T.TSTRING_START | T.TSTRING_MIDDLE | T.TSTRING_END
186+
):
187+
span = Span.from_token(token, line_lengths)
188+
yield ColorSpan(span, "STRING")
189+
case T.COMMENT:
190+
span = Span.from_token(token, line_lengths)
191+
yield ColorSpan(span, "COMMENT")
192+
case T.NUMBER:
193+
span = Span.from_token(token, line_lengths)
194+
yield ColorSpan(span, "STRING")
195+
case T.OP:
196+
if token.string in "([{":
197+
bracket_level += 1
198+
elif token.string in ")]}":
199+
bracket_level -= 1
200+
# span = Span.from_token(token, line_lengths)
201+
# yield ColorSpan(span, "op")
202+
case T.NAME:
203+
if is_def_name:
204+
is_def_name = False
205+
span = Span.from_token(token, line_lengths)
206+
yield ColorSpan(span, "DEFINITION")
207+
elif keyword.iskeyword(token.string):
208+
span = Span.from_token(token, line_lengths)
209+
yield ColorSpan(span, "KEYWORD")
210+
if token.string in IDENTIFIERS_AFTER:
211+
is_def_name = True
212+
elif (
213+
keyword.issoftkeyword(token.string)
214+
and bracket_level == 0
215+
and is_soft_keyword_used(prev_token, token, next_token)
216+
):
217+
span = Span.from_token(token, line_lengths)
218+
yield ColorSpan(span, "KEYWORD")
219+
elif (
220+
token.string in BUILTINS
221+
and not (prev_token and prev_token.exact_type == T.DOT)
222+
):
223+
span = Span.from_token(token, line_lengths)
224+
yield ColorSpan(span, "BUILTIN")
225+
226+
227+
def gen_colors(buffer: str) -> Iterator[ColorSpan]:
228+
"""Returns a list of index spans to color using the given color tag.
229+
230+
The input `buffer` should be a valid start of a Python code block, i.e.
231+
it cannot be a block starting in the middle of a multiline string.
232+
"""
233+
sio = StringIO(buffer)
234+
line_lengths = [0] + [len(line) for line in sio.readlines()]
235+
# make line_lengths cumulative
236+
for i in range(1, len(line_lengths)):
237+
line_lengths[i] += line_lengths[i-1]
238+
239+
sio.seek(0)
240+
gen = tokenize.generate_tokens(sio.readline)
241+
last_emitted: ColorSpan | None = None
242+
try:
243+
for color in gen_colors_from_token_stream(gen, line_lengths):
244+
yield color
245+
last_emitted = color
246+
except SyntaxError:
247+
return
248+
except tokenize.TokenError as te:
249+
yield from recover_unterminated_string(
250+
te, line_lengths, last_emitted, buffer
251+
)
252+
253+
12254
def any(name, alternates):
13255
"Return a named group pattern matching list of alternates."
14256
return "(?P<%s>" % name + "|".join(alternates) + ")"
@@ -333,21 +575,22 @@ def _add_tag(self, start, end, head, matched_group_name):
333575
f"{head}+{end:d}c")
334576

335577
def _add_tags_in_section(self, chars, head):
336-
"""Parse and add highlighting tags to a given part of the text.
578+
"""Parse and add highlighting tags using pyrepl's tokenization.
337579
338580
`chars` is a string with the text to parse and to which
339581
highlighting is to be applied.
340582
341583
`head` is the index in the text widget where the text is found.
342584
"""
343-
for m in self.prog.finditer(chars):
344-
for name, matched_text in matched_named_groups(m):
345-
a, b = m.span(name)
346-
self._add_tag(a, b, head, name)
347-
if matched_text in ("def", "class"):
348-
if m1 := self.idprog.match(chars, b):
349-
a, b = m1.span(1)
350-
self._add_tag(a, b, head, "DEFINITION")
585+
# Use pyrepl's gen_colors to get color spans
586+
color_spans = list(gen_colors(chars))
587+
588+
# Convert pyrepl spans to IDLE text widget positions and add tags
589+
for color_span in color_spans:
590+
start_pos = color_span.span.start
591+
end_pos = color_span.span.end + 1 # pyrepl spans are inclusive, tkinter expects exclusive end
592+
tag = color_span.tag
593+
self._add_tag(start_pos, end_pos, head, tag)
351594

352595
def removecolors(self):
353596
"Remove all colorizing tags."

0 commit comments

Comments
 (0)