| 
2 | 2 | import keyword  | 
3 | 3 | import re  | 
4 | 4 | import time  | 
 | 5 | +import token as T  | 
 | 6 | +import tokenize  | 
 | 7 | +from collections import deque  | 
 | 8 | +from io import StringIO  | 
 | 9 | +from tokenize import TokenInfo as TI  | 
 | 10 | +from typing import Iterable, Iterator, Match, NamedTuple, Self  | 
5 | 11 | 
 
  | 
6 | 12 | from idlelib.config import idleConf  | 
7 | 13 | from idlelib.delegator import Delegator  | 
8 | 14 | 
 
  | 
9 | 15 | DEBUG = False  | 
10 | 16 | 
 
  | 
11 | 17 | 
 
  | 
 | 18 | +ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]")  | 
 | 19 | +ZERO_WIDTH_BRACKET = re.compile(r"\x01.*?\x02")  | 
 | 20 | +ZERO_WIDTH_TRANS = str.maketrans({"\x01": "", "\x02": ""})  | 
 | 21 | +IDENTIFIERS_AFTER = {"def", "class"}  | 
 | 22 | +KEYWORD_CONSTANTS = {"True", "False", "None"}  | 
 | 23 | +BUILTINS = {str(name) for name in dir(builtins) if not name.startswith('_')}  | 
 | 24 | + | 
 | 25 | + | 
 | 26 | +class Span(NamedTuple):  | 
 | 27 | +    """Span indexing that's inclusive on both ends."""  | 
 | 28 | + | 
 | 29 | +    start: int  | 
 | 30 | +    end: int  | 
 | 31 | + | 
 | 32 | +    @classmethod  | 
 | 33 | +    def from_re(cls, m: Match[str], group: int | str) -> Self:  | 
 | 34 | +        re_span = m.span(group)  | 
 | 35 | +        return cls(re_span[0], re_span[1] - 1)  | 
 | 36 | + | 
 | 37 | +    @classmethod  | 
 | 38 | +    def from_token(cls, token: TI, line_len: list[int]) -> Self:  | 
 | 39 | +        end_offset = -1  | 
 | 40 | +        if (token.type in {T.FSTRING_MIDDLE, T.TSTRING_MIDDLE}  | 
 | 41 | +            and token.string.endswith(("{", "}"))):  | 
 | 42 | +            # gh-134158: a visible trailing brace comes from a double brace in input  | 
 | 43 | +            end_offset += 1  | 
 | 44 | + | 
 | 45 | +        return cls(  | 
 | 46 | +            line_len[token.start[0] - 1] + token.start[1],  | 
 | 47 | +            line_len[token.end[0] - 1] + token.end[1] + end_offset,  | 
 | 48 | +        )  | 
 | 49 | + | 
 | 50 | + | 
 | 51 | +class ColorSpan(NamedTuple):  | 
 | 52 | +    span: Span  | 
 | 53 | +    tag: str  | 
 | 54 | + | 
 | 55 | + | 
 | 56 | +def prev_next_window[T](  | 
 | 57 | +    iterable: Iterable[T]  | 
 | 58 | +) -> Iterator[tuple[T | None, ...]]:  | 
 | 59 | +    """Generates three-tuples of (previous, current, next) items.  | 
 | 60 | +
  | 
 | 61 | +    On the first iteration previous is None. On the last iteration next  | 
 | 62 | +    is None. In case of exception next is None and the exception is re-raised  | 
 | 63 | +    on a subsequent next() call.  | 
 | 64 | +
  | 
 | 65 | +    Inspired by `sliding_window` from `itertools` recipes.  | 
 | 66 | +    """  | 
 | 67 | + | 
 | 68 | +    iterator = iter(iterable)  | 
 | 69 | +    window = deque((None, next(iterator)), maxlen=3)  | 
 | 70 | +    try:  | 
 | 71 | +        for x in iterator:  | 
 | 72 | +            window.append(x)  | 
 | 73 | +            yield tuple(window)  | 
 | 74 | +    except Exception:  | 
 | 75 | +        raise  | 
 | 76 | +    finally:  | 
 | 77 | +        window.append(None)  | 
 | 78 | +        yield tuple(window)  | 
 | 79 | + | 
 | 80 | + | 
 | 81 | +keyword_first_sets_match = {"False", "None", "True", "await", "lambda", "not"}  | 
 | 82 | +keyword_first_sets_case = {"False", "None", "True"}  | 
 | 83 | + | 
 | 84 | + | 
 | 85 | +def is_soft_keyword_used(*tokens: TI | None) -> bool:  | 
 | 86 | +    """Returns True if the current token is a keyword in this context.  | 
 | 87 | +
  | 
 | 88 | +    For the `*tokens` to match anything, they have to be a three-tuple of  | 
 | 89 | +    (previous, current, next).  | 
 | 90 | +    """  | 
 | 91 | +    #trace("is_soft_keyword_used{t}", t=tokens)  | 
 | 92 | +    match tokens:  | 
 | 93 | +        case (  | 
 | 94 | +            None | TI(T.NEWLINE) | TI(T.INDENT) | TI(string=":"),  | 
 | 95 | +            TI(string="match"),  | 
 | 96 | +            TI(T.NUMBER | T.STRING | T.FSTRING_START | T.TSTRING_START)  | 
 | 97 | +            | TI(T.OP, string="(" | "*" | "[" | "{" | "~" | "...")  | 
 | 98 | +        ):  | 
 | 99 | +            return True  | 
 | 100 | +        case (  | 
 | 101 | +            None | TI(T.NEWLINE) | TI(T.INDENT) | TI(string=":"),  | 
 | 102 | +            TI(string="match"),  | 
 | 103 | +            TI(T.NAME, string=s)  | 
 | 104 | +        ):  | 
 | 105 | +            if keyword.iskeyword(s):  | 
 | 106 | +                return s in keyword_first_sets_match  | 
 | 107 | +            return True  | 
 | 108 | +        case (  | 
 | 109 | +            None | TI(T.NEWLINE) | TI(T.INDENT) | TI(T.DEDENT) | TI(string=":"),  | 
 | 110 | +            TI(string="case"),  | 
 | 111 | +            TI(T.NUMBER | T.STRING | T.FSTRING_START | T.TSTRING_START)  | 
 | 112 | +            | TI(T.OP, string="(" | "*" | "-" | "[" | "{")  | 
 | 113 | +        ):  | 
 | 114 | +            return True  | 
 | 115 | +        case (  | 
 | 116 | +            None | TI(T.NEWLINE) | TI(T.INDENT) | TI(T.DEDENT) | TI(string=":"),  | 
 | 117 | +            TI(string="case"),  | 
 | 118 | +            TI(T.NAME, string=s)  | 
 | 119 | +        ):  | 
 | 120 | +            if keyword.iskeyword(s):  | 
 | 121 | +                return s in keyword_first_sets_case  | 
 | 122 | +            return True  | 
 | 123 | +        case (TI(string="case"), TI(string="_"), TI(string=":")):  | 
 | 124 | +            return True  | 
 | 125 | +        case (  | 
 | 126 | +            None | TI(T.NEWLINE) | TI(T.INDENT) | TI(T.DEDENT) | TI(string=":"),  | 
 | 127 | +            TI(string="type"),  | 
 | 128 | +            TI(T.NAME, string=s)  | 
 | 129 | +        ):  | 
 | 130 | +            return not keyword.iskeyword(s)  | 
 | 131 | +        case _:  | 
 | 132 | +            return False  | 
 | 133 | + | 
 | 134 | + | 
 | 135 | +def recover_unterminated_string(  | 
 | 136 | +    exc: tokenize.TokenError,  | 
 | 137 | +    line_lengths: list[int],  | 
 | 138 | +    last_emitted: ColorSpan | None,  | 
 | 139 | +    buffer: str,  | 
 | 140 | +) -> Iterator[ColorSpan]:  | 
 | 141 | +    msg, loc = exc.args  | 
 | 142 | +    if loc is None:  | 
 | 143 | +        return  | 
 | 144 | + | 
 | 145 | +    line_no, column = loc  | 
 | 146 | + | 
 | 147 | +    if msg.startswith(  | 
 | 148 | +        (  | 
 | 149 | +            "unterminated string literal",  | 
 | 150 | +            "unterminated f-string literal",  | 
 | 151 | +            "unterminated t-string literal",  | 
 | 152 | +            "EOF in multi-line string",  | 
 | 153 | +            "unterminated triple-quoted f-string literal",  | 
 | 154 | +            "unterminated triple-quoted t-string literal",  | 
 | 155 | +        )  | 
 | 156 | +    ):  | 
 | 157 | +        start = line_lengths[line_no - 1] + column - 1  | 
 | 158 | +        end = line_lengths[-1] - 1  | 
 | 159 | + | 
 | 160 | +        # in case FSTRING_START was already emitted  | 
 | 161 | +        if last_emitted and start <= last_emitted.span.start:  | 
 | 162 | +            start = last_emitted.span.end + 1  | 
 | 163 | + | 
 | 164 | +        span = Span(start, end)  | 
 | 165 | +        yield ColorSpan(span, "STRING")  | 
 | 166 | + | 
 | 167 | + | 
 | 168 | +def gen_colors_from_token_stream(  | 
 | 169 | +    token_generator: Iterator[TI],  | 
 | 170 | +    line_lengths: list[int],  | 
 | 171 | +) -> Iterator[ColorSpan]:  | 
 | 172 | +    token_window = prev_next_window(token_generator)  | 
 | 173 | + | 
 | 174 | +    is_def_name = False  | 
 | 175 | +    bracket_level = 0  | 
 | 176 | +    for prev_token, token, next_token in token_window:  | 
 | 177 | +        assert token is not None  | 
 | 178 | +        if token.start == token.end:  | 
 | 179 | +            continue  | 
 | 180 | + | 
 | 181 | +        match token.type:  | 
 | 182 | +            case (  | 
 | 183 | +                T.STRING  | 
 | 184 | +                | T.FSTRING_START | T.FSTRING_MIDDLE | T.FSTRING_END  | 
 | 185 | +                | T.TSTRING_START | T.TSTRING_MIDDLE | T.TSTRING_END  | 
 | 186 | +            ):  | 
 | 187 | +                span = Span.from_token(token, line_lengths)  | 
 | 188 | +                yield ColorSpan(span, "STRING")  | 
 | 189 | +            case T.COMMENT:  | 
 | 190 | +                span = Span.from_token(token, line_lengths)  | 
 | 191 | +                yield ColorSpan(span, "COMMENT")  | 
 | 192 | +            case T.NUMBER:  | 
 | 193 | +                span = Span.from_token(token, line_lengths)  | 
 | 194 | +                yield ColorSpan(span, "STRING")  | 
 | 195 | +            case T.OP:  | 
 | 196 | +                if token.string in "([{":  | 
 | 197 | +                    bracket_level += 1  | 
 | 198 | +                elif token.string in ")]}":  | 
 | 199 | +                    bracket_level -= 1  | 
 | 200 | +                # span = Span.from_token(token, line_lengths)  | 
 | 201 | +                # yield ColorSpan(span, "op")  | 
 | 202 | +            case T.NAME:  | 
 | 203 | +                if is_def_name:  | 
 | 204 | +                    is_def_name = False  | 
 | 205 | +                    span = Span.from_token(token, line_lengths)  | 
 | 206 | +                    yield ColorSpan(span, "DEFINITION")  | 
 | 207 | +                elif keyword.iskeyword(token.string):  | 
 | 208 | +                    span = Span.from_token(token, line_lengths)  | 
 | 209 | +                    yield ColorSpan(span, "KEYWORD")  | 
 | 210 | +                    if token.string in IDENTIFIERS_AFTER:  | 
 | 211 | +                        is_def_name = True  | 
 | 212 | +                elif (  | 
 | 213 | +                    keyword.issoftkeyword(token.string)  | 
 | 214 | +                    and bracket_level == 0  | 
 | 215 | +                    and is_soft_keyword_used(prev_token, token, next_token)  | 
 | 216 | +                ):  | 
 | 217 | +                    span = Span.from_token(token, line_lengths)  | 
 | 218 | +                    yield ColorSpan(span, "KEYWORD")  | 
 | 219 | +                elif (  | 
 | 220 | +                    token.string in BUILTINS  | 
 | 221 | +                    and not (prev_token and prev_token.exact_type == T.DOT)  | 
 | 222 | +                ):  | 
 | 223 | +                    span = Span.from_token(token, line_lengths)  | 
 | 224 | +                    yield ColorSpan(span, "BUILTIN")  | 
 | 225 | + | 
 | 226 | + | 
 | 227 | +def gen_colors(buffer: str) -> Iterator[ColorSpan]:  | 
 | 228 | +    """Returns a list of index spans to color using the given color tag.  | 
 | 229 | +
  | 
 | 230 | +    The input `buffer` should be a valid start of a Python code block, i.e.  | 
 | 231 | +    it cannot be a block starting in the middle of a multiline string.  | 
 | 232 | +    """  | 
 | 233 | +    sio = StringIO(buffer)  | 
 | 234 | +    line_lengths = [0] + [len(line) for line in sio.readlines()]  | 
 | 235 | +    # make line_lengths cumulative  | 
 | 236 | +    for i in range(1, len(line_lengths)):  | 
 | 237 | +        line_lengths[i] += line_lengths[i-1]  | 
 | 238 | + | 
 | 239 | +    sio.seek(0)  | 
 | 240 | +    gen = tokenize.generate_tokens(sio.readline)  | 
 | 241 | +    last_emitted: ColorSpan | None = None  | 
 | 242 | +    try:  | 
 | 243 | +        for color in gen_colors_from_token_stream(gen, line_lengths):  | 
 | 244 | +            yield color  | 
 | 245 | +            last_emitted = color  | 
 | 246 | +    except SyntaxError:  | 
 | 247 | +        return  | 
 | 248 | +    except tokenize.TokenError as te:  | 
 | 249 | +        yield from recover_unterminated_string(  | 
 | 250 | +            te, line_lengths, last_emitted, buffer  | 
 | 251 | +        )  | 
 | 252 | + | 
 | 253 | + | 
12 | 254 | def any(name, alternates):  | 
13 | 255 |     "Return a named group pattern matching list of alternates."  | 
14 | 256 |     return "(?P<%s>" % name + "|".join(alternates) + ")"  | 
@@ -333,21 +575,22 @@ def _add_tag(self, start, end, head, matched_group_name):  | 
333 | 575 |                      f"{head}+{end:d}c")  | 
334 | 576 | 
 
  | 
335 | 577 |     def _add_tags_in_section(self, chars, head):  | 
336 |  | -        """Parse and add highlighting tags to a given part of the text.  | 
 | 578 | +        """Parse and add highlighting tags using pyrepl's tokenization.  | 
337 | 579 | 
  | 
338 | 580 |         `chars` is a string with the text to parse and to which  | 
339 | 581 |         highlighting is to be applied.  | 
340 | 582 | 
  | 
341 | 583 |             `head` is the index in the text widget where the text is found.  | 
342 | 584 |         """  | 
343 |  | -        for m in self.prog.finditer(chars):  | 
344 |  | -            for name, matched_text in matched_named_groups(m):  | 
345 |  | -                a, b = m.span(name)  | 
346 |  | -                self._add_tag(a, b, head, name)  | 
347 |  | -                if matched_text in ("def", "class"):  | 
348 |  | -                    if m1 := self.idprog.match(chars, b):  | 
349 |  | -                        a, b = m1.span(1)  | 
350 |  | -                        self._add_tag(a, b, head, "DEFINITION")  | 
 | 585 | +        # Use pyrepl's gen_colors to get color spans  | 
 | 586 | +        color_spans = list(gen_colors(chars))  | 
 | 587 | + | 
 | 588 | +        # Convert pyrepl spans to IDLE text widget positions and add tags  | 
 | 589 | +        for color_span in color_spans:  | 
 | 590 | +            start_pos = color_span.span.start  | 
 | 591 | +            end_pos = color_span.span.end + 1  # pyrepl spans are inclusive, tkinter expects exclusive end  | 
 | 592 | +            tag = color_span.tag  | 
 | 593 | +            self._add_tag(start_pos, end_pos, head, tag)  | 
351 | 594 | 
 
  | 
352 | 595 |     def removecolors(self):  | 
353 | 596 |         "Remove all colorizing tags."  | 
 | 
0 commit comments