22import  unicodedata 
33import  functools 
44
5+ from  idlelib  import  colorizer 
6+ from  typing  import  cast , Iterator , Literal , Match , NamedTuple , Pattern , Self 
7+ from  _colorize  import  ANSIColors 
8+ 
59from  .types  import  CharBuffer , CharWidths 
610from  .trace  import  trace 
711
812ANSI_ESCAPE_SEQUENCE  =  re .compile (r"\x1b\[[ -@]*[A-~]" )
913ZERO_WIDTH_BRACKET  =  re .compile (r"\x01.*?\x02" )
1014ZERO_WIDTH_TRANS  =  str .maketrans ({"\x01 " : "" , "\x02 " : "" })
15+ COLORIZE_RE : Pattern [str ] =  colorizer .prog 
16+ IDENTIFIER_RE : Pattern [str ] =  colorizer .idprog 
17+ IDENTIFIERS_AFTER  =  {"def" , "class" }
18+ COLORIZE_GROUP_NAME_MAP : dict [str , str ] =  colorizer .prog_group_name_to_tag 
19+ 
20+ type ColorTag  =  (
21+     Literal ["KEYWORD" ]
22+     |  Literal ["BUILTIN" ]
23+     |  Literal ["COMMENT" ]
24+     |  Literal ["STRING" ]
25+     |  Literal ["DEFINITION" ]
26+     |  Literal ["SYNC" ]
27+ )
28+ 
29+ 
30+ class  Span (NamedTuple ):
31+     """Span indexing that's inclusive on both ends.""" 
32+ 
33+     start : int 
34+     end : int 
35+ 
36+     @classmethod  
37+     def  from_re (cls , m : Match [str ], group : int  |  str ) ->  Self :
38+         re_span  =  m .span (group )
39+         return  cls (re_span [0 ], re_span [1 ] -  1 )
40+ 
41+ 
42+ class  ColorSpan (NamedTuple ):
43+     span : Span 
44+     tag : ColorTag 
45+ 
46+ 
47+ TAG_TO_ANSI : dict [ColorTag , str ] =  {
48+     "KEYWORD" : ANSIColors .BOLD_BLUE ,
49+     "BUILTIN" : ANSIColors .CYAN ,
50+     "COMMENT" : ANSIColors .RED ,
51+     "STRING" : ANSIColors .GREEN ,
52+     "DEFINITION" : ANSIColors .BOLD_WHITE ,
53+     "SYNC" : ANSIColors .RESET ,
54+ }
1155
1256
1357@functools .cache  
@@ -41,25 +85,82 @@ def unbracket(s: str, including_content: bool = False) -> str:
4185    return  s .translate (ZERO_WIDTH_TRANS )
4286
4387
44- def  disp_str (buffer : str ) ->  tuple [CharBuffer , CharWidths ]:
45-     r"""Decompose the input buffer into a printable variant. 
88+ def  gen_colors (buffer : str ) ->  Iterator [ColorSpan ]:
89+     """Returns a list of index spans to color using the given color tag. 
90+ 
91+     The input `buffer` should be a valid start of a Python code block, i.e. 
92+     it cannot be a block starting in the middle of a multiline string. 
93+     """ 
94+     for  match  in  COLORIZE_RE .finditer (buffer ):
95+         yield  from  gen_color_spans (match )
96+ 
97+ 
98+ def  gen_color_spans (re_match : Match [str ]) ->  Iterator [ColorSpan ]:
99+     """Generate non-empty color spans.""" 
100+     for  tag , data  in  re_match .groupdict ().items ():
101+         if  not  data :
102+             continue 
103+         span  =  Span .from_re (re_match , tag )
104+         tag  =  COLORIZE_GROUP_NAME_MAP .get (tag , tag )
105+         yield  ColorSpan (span , cast (ColorTag , tag ))
106+         if  data  in  IDENTIFIERS_AFTER :
107+             if  name_match  :=  IDENTIFIER_RE .match (re_match .string , span .end  +  1 ):
108+                 span  =  Span .from_re (name_match , 1 )
109+                 yield  ColorSpan (span , "DEFINITION" )
110+ 
111+ 
112+ def  disp_str (
113+     buffer : str , colors : list [ColorSpan ] |  None  =  None , start_index : int  =  0 
114+ ) ->  tuple [CharBuffer , CharWidths ]:
115+     r"""Decompose the input buffer into a printable variant with applied colors. 
46116
47117    Returns a tuple of two lists: 
48-     - the first list is the input buffer, character by character; 
118+     - the first list is the input buffer, character by character, with color 
119+       escape codes added (while those codes contain multiple ASCII characters, 
120+       each code is considered atomic *and is attached for the corresponding 
121+       visible character*); 
49122    - the second list is the visible width of each character in the input 
50123      buffer. 
51124
125+     Note on colors: 
126+     - The `colors` list, if provided, is partially consumed within. We're using 
127+       a list and not a generator since we need to hold onto the current 
128+       unfinished span between calls to disp_str in case of multiline strings. 
129+     - The `colors` list is computed from the start of the input block. `buffer` 
130+       is only a subset of that input block, a single line within. This is why 
131+       we need `start_index` to inform us which position is the start of `buffer` 
132+       actually within user input. This allows us to match color spans correctly. 
133+ 
52134    Examples: 
53135    >>> utils.disp_str("a = 9") 
54136    (['a', ' ', '=', ' ', '9'], [1, 1, 1, 1, 1]) 
137+ 
138+     >>> line = "while 1:" 
139+     >>> colors = list(utils.gen_colors(line)) 
140+     >>> utils.disp_str(line, colors=colors) 
141+     (['\x1b[1;34mw', 'h', 'i', 'l', 'e\x1b[0m', ' ', '1', ':'], [1, 1, 1, 1, 1, 1, 1, 1]) 
142+ 
55143    """ 
56144    chars : CharBuffer  =  []
57145    char_widths : CharWidths  =  []
58146
59147    if  not  buffer :
60148        return  chars , char_widths 
61149
62-     for  c  in  buffer :
150+     while  colors  and  colors [0 ].span .end  <  start_index :
151+         # move past irrelevant spans 
152+         colors .pop (0 )
153+ 
154+     pre_color  =  "" 
155+     post_color  =  "" 
156+     if  colors  and  colors [0 ].span .start  <  start_index :
157+         # looks like we're continuing a previous color (e.g. a multiline str) 
158+         pre_color  =  TAG_TO_ANSI [colors [0 ].tag ]
159+ 
160+     for  i , c  in  enumerate (buffer , start_index ):
161+         if  colors  and  colors [0 ].span .start  ==  i :  # new color starts now 
162+             pre_color  =  TAG_TO_ANSI [colors [0 ].tag ]
163+ 
63164        if  c  ==  "\x1a " :  # CTRL-Z on Windows 
64165            chars .append (c )
65166            char_widths .append (2 )
@@ -73,5 +174,19 @@ def disp_str(buffer: str) -> tuple[CharBuffer, CharWidths]:
73174        else :
74175            chars .append (c )
75176            char_widths .append (str_width (c ))
177+ 
178+         if  colors  and  colors [0 ].span .end  ==  i :  # current color ends now 
179+             post_color  =  TAG_TO_ANSI ["SYNC" ]
180+             colors .pop (0 )
181+ 
182+         chars [- 1 ] =  pre_color  +  chars [- 1 ] +  post_color 
183+         pre_color  =  "" 
184+         post_color  =  "" 
185+ 
186+     if  colors  and  colors [0 ].span .start  <  i  and  colors [0 ].span .end  >  i :
187+         # even though the current color should be continued, reset it for now. 
188+         # the next call to `disp_str()` will revive it. 
189+         chars [- 1 ] +=  TAG_TO_ANSI ["SYNC" ]
190+ 
76191    trace ("disp_str({buffer}) = {s}, {b}" , buffer = repr (buffer ), s = chars , b = char_widths )
77192    return  chars , char_widths 
0 commit comments