Skip to content

Commit 544d081

Browse files
authored
Add pure Python wcwidth util (#512)
1 parent 772b4b9 commit 544d081

File tree

2 files changed

+92
-0
lines changed

2 files changed

+92
-0
lines changed

src/cleo/_utils.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
from __future__ import annotations
22

33
import math
4+
import unicodedata
45

56
from dataclasses import dataclass
67
from difflib import SequenceMatcher
8+
from functools import lru_cache
79
from html.parser import HTMLParser
810

911

@@ -105,3 +107,56 @@ def format_time(secs: float) -> str:
105107
(fmt for fmt in _TIME_FORMATS if secs < fmt.threshold), _TIME_FORMATS[-1]
106108
)
107109
return time_format.apply(secs)
110+
111+
112+
@lru_cache(100)
113+
def wcwidth(c: str) -> int:
114+
"""Determine how many columns are needed to display a character in a terminal.
115+
116+
Returns -1 if the character is not printable.
117+
Returns 0, 1 or 2 for other characters.
118+
"""
119+
o = ord(c)
120+
121+
# ASCII fast path.
122+
if 0x20 <= o < 0x07F:
123+
return 1
124+
125+
# Some Cf/Zp/Zl characters which should be zero-width.
126+
if (
127+
o == 0x0000
128+
or 0x200B <= o <= 0x200F
129+
or 0x2028 <= o <= 0x202E
130+
or 0x2060 <= o <= 0x2063
131+
):
132+
return 0
133+
134+
category = unicodedata.category(c)
135+
136+
# Control characters.
137+
if category == "Cc":
138+
return -1
139+
140+
# Combining characters with zero width.
141+
if category in ("Me", "Mn"):
142+
return 0
143+
144+
# Full/Wide east asian characters.
145+
if unicodedata.east_asian_width(c) in ("F", "W"):
146+
return 2
147+
148+
return 1
149+
150+
151+
def wcswidth(s: str) -> int:
152+
"""Determine how many columns are needed to display a string in a terminal.
153+
154+
Returns -1 if the string contains non-printable characters.
155+
"""
156+
width = 0
157+
for c in unicodedata.normalize("NFC", s):
158+
wc = wcwidth(c)
159+
if wc < 0:
160+
return -1
161+
width += wc
162+
return width

tests/test_utils.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
from cleo._utils import find_similar_names
66
from cleo._utils import format_time
77
from cleo._utils import strip_tags
8+
from cleo._utils import wcswidth
9+
from cleo._utils import wcwidth
810

911

1012
@pytest.mark.parametrize(
@@ -45,3 +47,38 @@ def test_find_similar_names(name: str, expected: list[str]) -> None:
4547
)
4648
def test_strip_tags(value: str, expected: str) -> None:
4749
assert strip_tags(value) == expected
50+
51+
52+
@pytest.mark.parametrize(
53+
("c", "expected"),
54+
[
55+
("\0", 0),
56+
("\n", -1),
57+
("a", 1),
58+
("1", 1),
59+
("א", 1),
60+
("\u200b", 0),
61+
("\u1abe", 0),
62+
("\u0591", 0),
63+
("🉐", 2),
64+
("$", 2), # noqa: RUF001
65+
],
66+
)
67+
def test_wcwidth(c: str, expected: int) -> None:
68+
assert wcwidth(c) == expected
69+
70+
71+
@pytest.mark.parametrize(
72+
("s", "expected"),
73+
[
74+
("", 0),
75+
("hello, world!", 13),
76+
("hello, world!\n", -1),
77+
("0123456789", 10),
78+
("שלום, עולם!", 11),
79+
("שְבֻעָיים", 6),
80+
("🉐🉐🉐", 6),
81+
],
82+
)
83+
def test_wcswidth(s: str, expected: int) -> None:
84+
assert wcswidth(s) == expected

0 commit comments

Comments
 (0)