22import unicodedata
33import functools
44
5+ from idlelib import colorizer
6+ from typing import cast , Iterator , Literal , Match , NamedTuple , Pattern , Self
7+ from _colorize import ANSIColors
8+
59from .types import CharBuffer , CharWidths
610from .trace import trace
711
812ANSI_ESCAPE_SEQUENCE = re .compile (r"\x1b\[[ -@]*[A-~]" )
913ZERO_WIDTH_BRACKET = re .compile (r"\x01.*?\x02" )
1014ZERO_WIDTH_TRANS = str .maketrans ({"\x01 " : "" , "\x02 " : "" })
15+ COLORIZE_RE : Pattern [str ] = colorizer .prog
16+ IDENTIFIER_RE : Pattern [str ] = colorizer .idprog
17+ IDENTIFIERS_AFTER = {"def" , "class" }
18+ COLORIZE_GROUP_NAME_MAP : dict [str , str ] = colorizer .prog_group_name_to_tag
19+
20+ type ColorTag = (
21+ Literal ["KEYWORD" ]
22+ | Literal ["BUILTIN" ]
23+ | Literal ["COMMENT" ]
24+ | Literal ["STRING" ]
25+ | Literal ["DEFINITION" ]
26+ | Literal ["SYNC" ]
27+ )
28+
29+
30+ class Span (NamedTuple ):
31+ """Span indexing that's inclusive on both ends."""
32+
33+ start : int
34+ end : int
35+
36+ @classmethod
37+ def from_re (cls , m : Match [str ], group : int | str ) -> Self :
38+ re_span = m .span (group )
39+ return cls (re_span [0 ], re_span [1 ] - 1 )
40+
41+
42+ class ColorSpan (NamedTuple ):
43+ span : Span
44+ tag : ColorTag
45+
46+
47+ TAG_TO_ANSI : dict [ColorTag , str ] = {
48+ "KEYWORD" : ANSIColors .BOLD_BLUE ,
49+ "BUILTIN" : ANSIColors .CYAN ,
50+ "COMMENT" : ANSIColors .RED ,
51+ "STRING" : ANSIColors .GREEN ,
52+ "DEFINITION" : ANSIColors .BOLD_WHITE ,
53+ "SYNC" : ANSIColors .RESET ,
54+ }
1155
1256
1357@functools .cache
@@ -41,25 +85,82 @@ def unbracket(s: str, including_content: bool = False) -> str:
4185 return s .translate (ZERO_WIDTH_TRANS )
4286
4387
44- def disp_str (buffer : str ) -> tuple [CharBuffer , CharWidths ]:
45- r"""Decompose the input buffer into a printable variant.
88+ def gen_colors (buffer : str ) -> Iterator [ColorSpan ]:
89+ """Returns a list of index spans to color using the given color tag.
90+
91+ The input `buffer` should be a valid start of a Python code block, i.e.
92+ it cannot be a block starting in the middle of a multiline string.
93+ """
94+ for match in COLORIZE_RE .finditer (buffer ):
95+ yield from gen_color_spans (match )
96+
97+
98+ def gen_color_spans (re_match : Match [str ]) -> Iterator [ColorSpan ]:
99+ """Generate non-empty color spans."""
100+ for tag , data in re_match .groupdict ().items ():
101+ if not data :
102+ continue
103+ span = Span .from_re (re_match , tag )
104+ tag = COLORIZE_GROUP_NAME_MAP .get (tag , tag )
105+ yield ColorSpan (span , cast (ColorTag , tag ))
106+ if data in IDENTIFIERS_AFTER :
107+ if name_match := IDENTIFIER_RE .match (re_match .string , span .end + 1 ):
108+ span = Span .from_re (name_match , 1 )
109+ yield ColorSpan (span , "DEFINITION" )
110+
111+
112+ def disp_str (
113+ buffer : str , colors : list [ColorSpan ] | None = None , start_index : int = 0
114+ ) -> tuple [CharBuffer , CharWidths ]:
115+ r"""Decompose the input buffer into a printable variant with applied colors.
46116
47117 Returns a tuple of two lists:
48- - the first list is the input buffer, character by character;
118+ - the first list is the input buffer, character by character, with color
119+ escape codes added (while those codes contain multiple ASCII characters,
120+ each code is considered atomic *and is attached for the corresponding
121+ visible character*);
49122 - the second list is the visible width of each character in the input
50123 buffer.
51124
125+ Note on colors:
126+ - The `colors` list, if provided, is partially consumed within. We're using
127+ a list and not a generator since we need to hold onto the current
128+ unfinished span between calls to disp_str in case of multiline strings.
129+ - The `colors` list is computed from the start of the input block. `buffer`
130+ is only a subset of that input block, a single line within. This is why
131+ we need `start_index` to inform us which position is the start of `buffer`
132+ actually within user input. This allows us to match color spans correctly.
133+
52134 Examples:
53135 >>> utils.disp_str("a = 9")
54136 (['a', ' ', '=', ' ', '9'], [1, 1, 1, 1, 1])
137+
138+ >>> line = "while 1:"
139+ >>> colors = list(utils.gen_colors(line))
140+ >>> utils.disp_str(line, colors=colors)
141+ (['\x1b[1;34mw', 'h', 'i', 'l', 'e\x1b[0m', ' ', '1', ':'], [1, 1, 1, 1, 1, 1, 1, 1])
142+
55143 """
56144 chars : CharBuffer = []
57145 char_widths : CharWidths = []
58146
59147 if not buffer :
60148 return chars , char_widths
61149
62- for c in buffer :
150+ while colors and colors [0 ].span .end < start_index :
151+ # move past irrelevant spans
152+ colors .pop (0 )
153+
154+ pre_color = ""
155+ post_color = ""
156+ if colors and colors [0 ].span .start < start_index :
157+ # looks like we're continuing a previous color (e.g. a multiline str)
158+ pre_color = TAG_TO_ANSI [colors [0 ].tag ]
159+
160+ for i , c in enumerate (buffer , start_index ):
161+ if colors and colors [0 ].span .start == i : # new color starts now
162+ pre_color = TAG_TO_ANSI [colors [0 ].tag ]
163+
63164 if c == "\x1a " : # CTRL-Z on Windows
64165 chars .append (c )
65166 char_widths .append (2 )
@@ -73,5 +174,19 @@ def disp_str(buffer: str) -> tuple[CharBuffer, CharWidths]:
73174 else :
74175 chars .append (c )
75176 char_widths .append (str_width (c ))
177+
178+ if colors and colors [0 ].span .end == i : # current color ends now
179+ post_color = TAG_TO_ANSI ["SYNC" ]
180+ colors .pop (0 )
181+
182+ chars [- 1 ] = pre_color + chars [- 1 ] + post_color
183+ pre_color = ""
184+ post_color = ""
185+
186+ if colors and colors [0 ].span .start < i and colors [0 ].span .end > i :
187+ # even though the current color should be continued, reset it for now.
188+ # the next call to `disp_str()` will revive it.
189+ chars [- 1 ] += TAG_TO_ANSI ["SYNC" ]
190+
76191 trace ("disp_str({buffer}) = {s}, {b}" , buffer = repr (buffer ), s = chars , b = char_widths )
77192 return chars , char_widths
0 commit comments