Add pure Python wcwidth util (#512)

Secrus · web-flow · commit 544d081360ae · 2025-10-17T14:49:39.000+02:00
diff --git a/src/cleo/_utils.py b/src/cleo/_utils.py
@@ -1,9 +1,11 @@
 from __future__ import annotations
 
 import math
+import unicodedata
 
 from dataclasses import dataclass
 from difflib import SequenceMatcher
+from functools import lru_cache
 from html.parser import HTMLParser
 
 
@@ -105,3 +107,56 @@ def format_time(secs: float) -> str:
         (fmt for fmt in _TIME_FORMATS if secs < fmt.threshold), _TIME_FORMATS[-1]
     )
     return time_format.apply(secs)
+
+
+@lru_cache(100)
+def wcwidth(c: str) -> int:
+    """Determine how many columns are needed to display a character in a terminal.
+
+    Returns -1 if the character is not printable.
+    Returns 0, 1 or 2 for other characters.
+    """
+    o = ord(c)
+
+    # ASCII fast path.
+    if 0x20 <= o < 0x07F:
+        return 1
+
+    # Some Cf/Zp/Zl characters which should be zero-width.
+    if (
+        o == 0x0000
+        or 0x200B <= o <= 0x200F
+        or 0x2028 <= o <= 0x202E
+        or 0x2060 <= o <= 0x2063
+    ):
+        return 0
+
+    category = unicodedata.category(c)
+
+    # Control characters.
+    if category == "Cc":
+        return -1
+
+    # Combining characters with zero width.
+    if category in ("Me", "Mn"):
+        return 0
+
+    # Full/Wide east asian characters.
+    if unicodedata.east_asian_width(c) in ("F", "W"):
+        return 2
+
+    return 1
+
+
+def wcswidth(s: str) -> int:
+    """Determine how many columns are needed to display a string in a terminal.
+
+    Returns -1 if the string contains non-printable characters.
+    """
+    width = 0
+    for c in unicodedata.normalize("NFC", s):
+        wc = wcwidth(c)
+        if wc < 0:
+            return -1
+        width += wc
+    return width
diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -5,6 +5,8 @@
 from cleo._utils import find_similar_names
 from cleo._utils import format_time
 from cleo._utils import strip_tags
+from cleo._utils import wcswidth
+from cleo._utils import wcwidth
 
 
 @pytest.mark.parametrize(
@@ -45,3 +47,38 @@ def test_find_similar_names(name: str, expected: list[str]) -> None:
 )
 def test_strip_tags(value: str, expected: str) -> None:
     assert strip_tags(value) == expected
+
+
+@pytest.mark.parametrize(
+    ("c", "expected"),
+    [
+        ("\0", 0),
+        ("\n", -1),
+        ("a", 1),
+        ("1", 1),
+        ("א", 1),
+        ("\u200b", 0),
+        ("\u1abe", 0),
+        ("\u0591", 0),
+        ("🉐", 2),
+        ("＄", 2),  # noqa: RUF001
+    ],
+)
+def test_wcwidth(c: str, expected: int) -> None:
+    assert wcwidth(c) == expected
+
+
+@pytest.mark.parametrize(
+    ("s", "expected"),
+    [
+        ("", 0),
+        ("hello, world!", 13),
+        ("hello, world!\n", -1),
+        ("0123456789", 10),
+        ("שלום, עולם!", 11),
+        ("שְבֻעָיים", 6),
+        ("🉐🉐🉐", 6),
+    ],
+)
+def test_wcswidth(s: str, expected: int) -> None:
+    assert wcswidth(s) == expected