55from collections import defaultdict
66from functools import lru_cache
77from typing import TYPE_CHECKING , List , Tuple , Optional , Union
8- from wcwidth import wcswidth , wcwidth
8+ from wcwidth import wcswidth , iter_graphemes , wrap as wcwidth_wrap
99from asciimatics .screen import Screen
1010if TYPE_CHECKING :
1111 from asciimatics .widgets .widget import Widget
@@ -123,7 +123,8 @@ def _enforce_width(text: Union[str, ColouredText],
123123 :param split_on_words: Whether to respect word boundaries when splitting.
124124 :return: The resulting truncated text
125125 """
126- return _enforce_width_ext (text , width , unicode_aware = unicode_aware , split_on_words = split_on_words )[0 ]
126+ return _enforce_width_ext (
127+ text , width , unicode_aware = unicode_aware , split_on_words = split_on_words )[0 ]
127128
128129
129130def _enforce_width_ext (text : Union [str , ColouredText ],
@@ -132,7 +133,7 @@ def _enforce_width_ext(text: Union[str, ColouredText],
132133 split_on_words : bool = False ) -> Tuple [Union [str , ColouredText ], bool ]:
133134 """
134135 Enforce a displayed piece of text to be a certain number of cells wide. This takes into
135- account double-width characters used in CJK languages.
136+ account double-width characters used in CJK languages and grapheme clusters .
136137
137138 :param text: The text to be truncated
138139 :param width: The screen cell width to enforce
@@ -148,20 +149,24 @@ def _enforce_width_ext(text: Union[str, ColouredText],
148149 # Can still optimize performance if we are not handling unicode characters.
149150 if unicode_aware or split_on_words :
150151 size = 0
151- last_space = 9999999999
152- for i , char in enumerate (str (text )):
153- c_width = wcwidth (char ) if ord (char ) >= 256 else 1
154- if split_on_words and char in (" " , "\t " ):
155- last_space = i + 1
156- if size + c_width > width :
157- return text [0 :min (i , last_space )], True
158- size += c_width
152+ pos = 0
153+ last_space_pos = 9999999999
154+ text_str = str (text )
155+ for grapheme in iter_graphemes (text_str ):
156+ g_width = wcswidth (grapheme )
157+ if split_on_words and grapheme in (" " , "\t " ):
158+ last_space_pos = pos + len (grapheme )
159+ if size + g_width > width :
160+ return text [0 :min (pos , last_space_pos )], True
161+ size += g_width
162+ pos += len (grapheme )
159163 elif len (text ) + 1 > width :
160164 return text [0 :width ], True
161165 return text , False
162166
163167
164- def _find_min_start (text : str , max_width : int , unicode_aware : bool = True , at_end : bool = False ) -> int :
168+ def _find_min_start (text : str , max_width : int , unicode_aware : bool = True ,
169+ at_end : bool = False ) -> int :
165170 """
166171 Find the starting point in the string that will reduce it to be less than or equal to the
167172 specified width when displayed on screen.
@@ -176,24 +181,29 @@ def _find_min_start(text: str, max_width: int, unicode_aware: bool = True, at_en
176181 if 2 * len (text ) < max_width :
177182 return 0
178183
179- # OK - do it the hard way...
184+ # OK - do it the hard way, iterating by grapheme cluster to avoid splitting them ...
180185 result = 0
181- string_len = wcswidth if unicode_aware else len
182- char_len = wcwidth if unicode_aware else lambda x : 1
183- display_end = string_len (text )
184- while display_end > max_width :
185- result += 1
186- display_end -= char_len (text [0 ])
187- text = text [1 :]
186+ if unicode_aware :
187+ display_end = wcswidth (text )
188+ for grapheme in iter_graphemes (text ):
189+ if display_end <= max_width :
190+ break
191+ display_end -= wcswidth (grapheme )
192+ result += len (grapheme )
193+ else :
194+ display_end = len (text )
195+ while display_end > max_width :
196+ result += 1
197+ display_end -= 1
188198 if at_end and display_end == max_width :
189- result += 1
199+ result += len ( next ( iter_graphemes ( text [ result :]), "" )) if unicode_aware else 1
190200 return result
191201
192202
193203def _get_offset (text : str , visible_width : int , unicode_aware : bool = True ) -> int :
194204 """
195205 Find the character offset within some text for a given visible offset (taking into account the
196- fact that some character glyphs are double width).
206+ fact that some character glyphs are double width and grapheme clusters ).
197207
198208 :param text: The text to analyze
199209 :param visible_width: The required location within that text (as seen on screen).
@@ -202,13 +212,12 @@ def _get_offset(text: str, visible_width: int, unicode_aware: bool = True) -> in
202212 result = 0
203213 width = 0
204214 if unicode_aware :
205- for char in text :
206- if visible_width - width <= 0 :
215+ for grapheme in iter_graphemes (text ):
216+ g_width = wcswidth (grapheme )
217+ if width + g_width > visible_width :
207218 break
208- result += 1
209- width += wcwidth (char )
210- if visible_width - width < 0 :
211- result -= 1
219+ result += len (grapheme )
220+ width += g_width
212221 else :
213222 result = min (len (text ), visible_width )
214223 return result
@@ -227,42 +236,51 @@ def _split_text(text: str, width: int, height: int, unicode_aware: bool = True)
227236 :param height: The maximum height for the resulting text.
228237 :return: A list of strings of the broken up text.
229238 """
230- # At a high level, just try to split on whitespace for the best results.
231- tokens = text .split (" " )
232- result = []
233- current_line = ""
234239 string_len = wcswidth if unicode_aware else len
235- for token in tokens :
236- for i , line_token in enumerate (token .split ("\n " )):
237- if string_len (current_line + line_token ) > width or i > 0 :
238- # Don't bother inserting completely blank lines
239- # which should only happen on the very first
240- # line (as the rest will inject whitespace/newlines)
241- if len (current_line ) > 0 :
242- result .append (current_line .rstrip ())
243- current_line = line_token + " "
244- else :
245- current_line += line_token + " "
246240
247- # At this point we've either split nicely or have a hugely long unbroken string
248- # (e.g. because the language doesn't use whitespace.
249- # Either way, break this last line up as best we can.
250- current_line = current_line .rstrip ()
251- while string_len (current_line ) > 0 :
252- new_line = str (_enforce_width (current_line , width , unicode_aware ))
253- result .append (new_line )
254- current_line = current_line [len (new_line ):]
241+ if unicode_aware :
242+ # Use wcwidth.wrap() for grapheme, east-asian, emoji, and terminal sequence-aware word
243+ # wrapping, modeled after standard python textwrap.wrap(). We split on newlines first to
244+ # preserve newlines, as documented at bottom of API document of wcwidth.wrap(), its what
245+ # most people prefer, like the html classic '<br><br><br>' sometimes you want them.
246+ result = []
247+ for paragraph in text .split ("\n " ):
248+ if paragraph :
249+ result .extend (wcwidth_wrap (paragraph , width , break_long_words = True ))
250+ else :
251+ result .append ("" )
252+ else :
253+ # Legacy non-unicode path
254+ tokens = text .split (" " )
255+ result = []
256+ current_line = ""
257+ for token in tokens :
258+ for i , line_token in enumerate (token .split ("\n " )):
259+ if len (current_line + line_token ) > width or i > 0 :
260+ if len (current_line ) > 0 :
261+ result .append (current_line .rstrip ())
262+ current_line = line_token + " "
263+ else :
264+ current_line += line_token + " "
265+ current_line = current_line .rstrip ()
266+ while len (current_line ) > 0 :
267+ new_line = current_line [:width ]
268+ result .append (new_line )
269+ current_line = current_line [len (new_line ):]
255270
256- # Check for a height overrun and truncate.
271+ # Check for a height overrun and truncate with ellipsis .
257272 if len (result ) > height :
258273 result = result [:height ]
259- result [height - 1 ] = result [height - 1 ][:width - 3 ] + "..."
274+ last_line = result [height - 1 ]
275+ truncated = _enforce_width (last_line , width - 3 , unicode_aware )
276+ result [height - 1 ] = str (truncated ) + "..."
260277
261278 # Very small columns could be shorter than individual words - truncate
262279 # each line if necessary.
263280 for i , line in enumerate (result ):
264- if len (line ) > width :
265- result [i ] = line [:width - 3 ] + "..."
281+ if string_len (line ) > width :
282+ truncated = _enforce_width (line , width - 3 , unicode_aware )
283+ result [i ] = str (truncated ) + "..."
266284 return result
267285
268286
0 commit comments