Skip to content

Commit a285b7e

Browse files
authored
Integrate with wcwidth 0.5.0 for proper Unicode handling (#166)
- Simplify break_line() to use wcwidth.wrap() for word-boundary breaking - Add proper support for grapheme clusters (emoji, flags, ZWJ sequences) - Handle wide characters (CJK) correctly - Preserve ANSI sequences across line breaks - Fix word-wrap padding bug in containers - Require wcwidth>=0.5 - Add comprehensive tests for wcwidth integration - Add interactive wcwidth demo example
1 parent 1b89770 commit a285b7e

File tree

6 files changed

+308
-84
lines changed

6 files changed

+308
-84
lines changed

examples/wcwidth_demo.py

Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
#!/usr/bin/env python3
2+
"""Run: python examples/wcwidth_demo.py
3+
4+
Demonstrates break_line() issues with wide characters and grapheme clusters.
5+
"""
6+
import pytermgui as ptg
7+
8+
CJK = "中文字符"
9+
FAMILY = "\U0001F468\u200D\U0001F469\u200D\U0001F467"
10+
FLAG = "\U0001F1E8\U0001F1E6" # Canada
11+
12+
# Additional grapheme examples from wcwidth tests
13+
WAVE_SKIN = "\U0001F44B\U0001F3FB" # Waving hand + skin tone modifier
14+
HEART_VS16 = "\u2764\uFE0F" # Heart + variation selector-16
15+
CAFE = "cafe\u0301" # café with combining acute accent
16+
WOMAN_TECH = "\U0001f469\U0001f3fb\u200d\U0001f4bb" # Woman technologist (ZWJ)
17+
KISS = "\U0001F9D1\U0001F3FB\u200d\u2764\uFE0F\u200d\U0001F48B\u200d\U0001F9D1\U0001F3FD" # Kiss
18+
19+
# ANSI escape codes for color testing
20+
RST = "\x1b[0m"
21+
BOLD = "\x1b[1m"
22+
ITALIC = "\x1b[3m"
23+
RED = "\x1b[31m"
24+
GREEN = "\x1b[32m"
25+
ORANGE_256 = "\x1b[38;5;208m"
26+
PURPLE_256 = "\x1b[38;5;129m"
27+
CYAN_RGB = "\x1b[38;2;0;255;255m"
28+
PINK_RGB = "\x1b[38;2;255;105;180m"
29+
30+
# Big word with 24-bit RGB gradient
31+
CHOOCHOO = (
32+
"\x1b[38;2;255;0;0mc"
33+
"\x1b[38;2;255;127;0mh"
34+
"\x1b[38;2;255;255;0mo"
35+
"\x1b[38;2;0;255;0mo"
36+
"\x1b[38;2;0;255;127mc"
37+
"\x1b[38;2;0;255;255mh"
38+
"\x1b[38;2;0;127;255mo"
39+
"\x1b[38;2;0;0;255mo"
40+
"\x1b[38;2;127;0;255mp"
41+
"\x1b[38;2;255;0;255mo"
42+
"\x1b[38;2;255;0;127mn"
43+
"\x1b[38;2;255;0;0my"
44+
"\x1b[38;2;255;127;0me"
45+
"\x1b[38;2;255;255;0mx"
46+
"\x1b[38;2;0;255;0mp"
47+
"\x1b[38;2;0;255;127mr"
48+
"\x1b[38;2;0;255;255me"
49+
"\x1b[38;2;0;127;255ms"
50+
"\x1b[38;2;0;0;255ms"
51+
"\x1b[0m"
52+
)
53+
54+
ptg.WindowManager.autorun = False
55+
56+
# Track adjustable containers and current width
57+
current_width = 19
58+
column_width = 25
59+
adjustable_containers = []
60+
column_containers = []
61+
main_window = None
62+
63+
64+
def adjust_width(delta):
65+
"""Adjust width of all tracked containers."""
66+
global current_width, column_width, main_window
67+
new_width = max(6, min(30, current_width + delta))
68+
if new_width != current_width:
69+
current_width = new_width
70+
column_width = column_width + delta
71+
for container in adjustable_containers:
72+
container.static_width = current_width
73+
for column in column_containers:
74+
column.static_width = column_width
75+
if main_window is not None:
76+
main_window.width = main_window.width + delta * 4
77+
width_label.value = f"[dim]Width: {current_width} | ←/→ adjust | Ctrl+C exit[/]"
78+
79+
80+
with ptg.WindowManager() as manager:
81+
manager.mouse_enabled = False
82+
83+
# Bind arrow keys for width adjustment
84+
manager.bind(ptg.keys.LEFT, lambda *_: adjust_width(-1), "Decrease width")
85+
manager.bind(ptg.keys.RIGHT, lambda *_: adjust_width(1), "Increase width")
86+
87+
# Create containers and track the adjustable ones
88+
sgr_box = ptg.Container(ptg.Label(f"{BOLD}bold{RST} {ITALIC}italic{RST}"),
89+
static_width=current_width)
90+
colors_box = ptg.Container(ptg.Label(f"{RED}red{RST} {GREEN}green{RST}"),
91+
static_width=current_width)
92+
c256_box = ptg.Container(ptg.Label(f"{ORANGE_256}orange{RST} {PURPLE_256}purple{RST}"),
93+
static_width=current_width)
94+
rgb_box = ptg.Container(ptg.Label(f"{CYAN_RGB}cyan{RST} {PINK_RGB}pink{RST}"),
95+
static_width=current_width)
96+
97+
wrap_box = ptg.Container(ptg.Label("The quick-brown-fox"), static_width=current_width)
98+
cjk_box = ptg.Container(ptg.Label(CJK + CJK), static_width=current_width)
99+
family_box = ptg.Container(ptg.Label(f"Hi{FAMILY}!"), static_width=current_width)
100+
flag_box = ptg.Container(ptg.Label(f"{FLAG}Canada"), static_width=current_width)
101+
102+
combine_box = ptg.Container(ptg.Label(f"A {CAFE}!"), static_width=current_width)
103+
skin_box = ptg.Container(ptg.Label(f"Hi{WAVE_SKIN}!"), static_width=current_width)
104+
heart_box = ptg.Container(ptg.Label(f"I{HEART_VS16}U"), static_width=current_width)
105+
tech_box = ptg.Container(ptg.Label(f"{WOMAN_TECH}codes"), static_width=current_width)
106+
lorem_box = ptg.Container(ptg.Label("Lorem ipsum dolor sit"), static_width=current_width)
107+
choochoo_box = ptg.Container(ptg.Label(CHOOCHOO), static_width=current_width)
108+
109+
adjustable_containers = [
110+
sgr_box, colors_box, c256_box, rgb_box,
111+
wrap_box, cjk_box, family_box, flag_box,
112+
combine_box, skin_box, heart_box, tech_box,
113+
lorem_box, choochoo_box,
114+
]
115+
116+
left = ptg.Container(
117+
ptg.Label("[bold]ANSI Sequences[/]"),
118+
"",
119+
ptg.Label("SGR:"),
120+
sgr_box,
121+
"",
122+
ptg.Label("Colors:"),
123+
colors_box,
124+
"",
125+
ptg.Label("256-color:"),
126+
c256_box,
127+
"",
128+
ptg.Label("24-bit RGB:"),
129+
rgb_box,
130+
static_width=column_width,
131+
)
132+
133+
middle = ptg.Container(
134+
ptg.Label("[bold]Width & Graphemes[/]"),
135+
"",
136+
ptg.Label("Word wrap:"),
137+
wrap_box,
138+
"",
139+
ptg.Label("CJK (2-cell):"),
140+
cjk_box,
141+
"",
142+
ptg.Label("Family ZWJ:"),
143+
family_box,
144+
"",
145+
ptg.Label("Flag:"),
146+
flag_box,
147+
static_width=column_width,
148+
)
149+
150+
right = ptg.Container(
151+
ptg.Label("[bold]Grapheme Clusters[/]"),
152+
"",
153+
ptg.Label("Combining:"),
154+
combine_box,
155+
"",
156+
ptg.Label("Skin tone:"),
157+
skin_box,
158+
"",
159+
ptg.Label("VS-16 heart:"),
160+
heart_box,
161+
"",
162+
ptg.Label("Woman tech:"),
163+
tech_box,
164+
static_width=column_width,
165+
)
166+
167+
far_right = ptg.Container(
168+
ptg.Label("[bold]Long Text[/]"),
169+
"",
170+
ptg.Label("Lorem ipsum:"),
171+
lorem_box,
172+
"",
173+
ptg.Label("RGB gradient:"),
174+
choochoo_box,
175+
static_width=column_width,
176+
)
177+
178+
column_containers.extend([left, middle, right, far_right])
179+
180+
width_label = ptg.Label(f"[dim]Width: {current_width} | ←/→ adjust | Ctrl+C exit[/]")
181+
182+
main_window = ptg.Window(
183+
ptg.Label("[bold]break_line() Demo[/]"),
184+
"",
185+
ptg.Splitter(left, middle, right, far_right),
186+
"",
187+
width_label,
188+
width=113,
189+
)
190+
manager.add(main_window)
191+
manager.run()

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ description = """\
1313
license = {text = "MIT"}
1414

1515
requires-python = ">=3.8"
16-
dependencies = ["wcwidth", "typing_extensions"]
16+
dependencies = ["wcwidth>=0.5", "typing_extensions"]
1717

1818
keywords = [
1919
"terminal",

pytermgui/helpers.py

Lines changed: 21 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -4,110 +4,57 @@
44

55
from typing import Iterator
66

7-
from .markup import tokenize_ansi
8-
from .markup.parsing import LINK_TEMPLATE, PARSERS
7+
from wcwidth import wrap as wcwidth_wrap
8+
99
from .regex import real_length
1010

1111
__all__ = [
1212
"break_line",
1313
]
1414

1515

16-
def break_line( # pylint: disable=too-many-branches
16+
def break_line(
1717
line: str, limit: int, non_first_limit: int | None = None, fill: str | None = None
1818
) -> Iterator[str]:
1919
"""Breaks a line into a `list[str]` with maximum `limit` length per line.
2020
21-
It keeps ongoing ANSI sequences between lines, and inserts a reset sequence
22-
at the end of each style-containing line.
23-
24-
At the moment it splits strings exactly on the limit, and not on word
25-
boundaries. That functionality would be preferred, so it will end up being
26-
implemented at some point.
21+
Uses wcwidth.wrap() for proper word-boundary breaking, grapheme cluster
22+
handling, and wide character support. ANSI sequences are preserved and
23+
propagated across line breaks.
2724
2825
Args:
2926
line: The line to split. May or may not contain ANSI sequences.
3027
limit: The maximum amount of characters allowed in each line, excluding
3128
non-printing sequences.
3229
non_first_limit: The limit after the first line. If not given, defaults
3330
to `limit`.
31+
fill: Optional character to pad lines to the limit width.
3432
"""
3533

3634
if line in ["", "\x1b[0m"]:
3735
yield ""
3836
return
3937

40-
def _pad_and_link(line: str, link: str | None) -> str:
41-
count = limit - real_length(line)
42-
43-
if link is not None:
44-
line = LINK_TEMPLATE.format(uri=link, label=line)
45-
38+
def _pad_line(text: str, width: int) -> str:
4639
if fill is None:
47-
return line
48-
49-
line += count * fill
50-
51-
return line
40+
return text
5241

53-
used = 0
54-
current = ""
55-
sequences = ""
42+
count = width - real_length(text)
43+
if count > 0:
44+
return text + count * fill
45+
return text
5646

5747
if non_first_limit is None:
5848
non_first_limit = limit
5949

60-
parsers = PARSERS
61-
link = None
62-
63-
for token in tokenize_ansi(line):
64-
if token.is_plain():
65-
for char in token.value:
66-
if char == "\n" or used >= limit:
67-
if sequences != "":
68-
current += "\x1b[0m"
69-
70-
yield _pad_and_link(current, link)
71-
link = None
72-
73-
current = sequences
74-
used = 0
75-
76-
limit = non_first_limit
77-
78-
if char != "\n":
79-
current += char
80-
used += 1
81-
82-
# If the link wasn't yielded along with its token, remove and add it
83-
# to current manually.
84-
if link is not None:
85-
current = current[: -len(token.value)]
86-
current += LINK_TEMPLATE.format(uri=link, label=token.value)
87-
link = None
88-
89-
continue
90-
91-
if token.value == "/":
92-
sequences = "\x1b[0m"
93-
94-
if len(current) > 0:
95-
current += sequences
96-
50+
for segment in line.split("\n"):
51+
if not segment:
52+
yield _pad_line("", limit)
53+
limit = non_first_limit
9754
continue
9855

99-
if token.is_hyperlink():
100-
link = token.value
101-
continue
102-
103-
sequence = parsers[type(token)](token, {}, lambda: line) # type: ignore
104-
sequences += sequence
105-
current += sequence
106-
107-
if current == "":
108-
return
109-
110-
if sequences != "" and not current.endswith("\x1b[0m"):
111-
current += "\x1b[0m"
56+
wrapped = wcwidth_wrap(segment, limit)
11257

113-
yield _pad_and_link(current, link)
58+
for wrapped_line in wrapped:
59+
yield _pad_line(wrapped_line, limit)
60+
limit = non_first_limit

pytermgui/widgets/containers.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66

77
from __future__ import annotations
88

9-
from itertools import zip_longest
109
from typing import Any, Callable, Iterator, cast
1110

1211
from ..ansi_interface import MouseAction, MouseEvent, clear, reset
@@ -1030,6 +1029,7 @@ def get_lines(self) -> list[str]: # pylint: disable=too-many-locals
10301029

10311030
self.positioned_line_buffer = []
10321031
vertical_lines = []
1032+
column_widths = []
10331033
total_offset = 0
10341034

10351035
for widget in self._widgets:
@@ -1043,6 +1043,8 @@ def get_lines(self) -> list[str]: # pylint: disable=too-many-locals
10431043
width = widget.width
10441044
error = 0
10451045

1046+
column_widths.append(width)
1047+
10461048
aligned: str | None = None
10471049
for line in widget.get_lines():
10481050
# See `enums.py` for information about this ignore
@@ -1073,8 +1075,16 @@ def get_lines(self) -> list[str]: # pylint: disable=too-many-locals
10731075

10741076
vertical_lines.append(inner)
10751077

1078+
# Pad columns to max height using each column's actual width.
1079+
# (target_width is mutated above, so we can't use it as a fillvalue)
1080+
max_height = max(len(col) for col in vertical_lines) if vertical_lines else 0
1081+
for i, col in enumerate(vertical_lines):
1082+
fill = " " * column_widths[i]
1083+
while len(col) < max_height:
1084+
col.append(fill)
1085+
10761086
lines = []
1077-
for horizontal in zip_longest(*vertical_lines, fillvalue=" " * target_width):
1087+
for horizontal in zip(*vertical_lines):
10781088
lines.append((reset() + separator).join(horizontal))
10791089

10801090
self.height = max(widget.height for widget in self)

0 commit comments

Comments
 (0)