Merge branch 'fix/wcwidth' into python-slip39

pjkundert · pjkundert · commit 5c49c44f05ad · 2025-10-27T11:27:39.000+04:00
diff --git a/tabulate/__init__.py b/tabulate/__init__.py
@@ -1638,7 +1638,13 @@ def _normalize_tabular_data(tabular_data, headers, showindex="default"):
     return rows, headers, headers_pad
 
 
-def _wrap_text_to_colwidths(list_of_lists, colwidths, numparses=True, break_long_words=_BREAK_LONG_WORDS, break_on_hyphens=_BREAK_ON_HYPHENS):
+def _wrap_text_to_colwidths(
+    list_of_lists,
+    colwidths,
+    numparses=True,
+    break_long_words=_BREAK_LONG_WORDS,
+    break_on_hyphens=_BREAK_ON_HYPHENS,
+):
     if len(list_of_lists):
         num_cols = len(list_of_lists[0])
     else:
@@ -1655,7 +1661,11 @@ def _wrap_text_to_colwidths(list_of_lists, colwidths, numparses=True, break_long
                 continue
 
             if width is not None:
-                wrapper = _CustomTextWrap(width=width, break_long_words=break_long_words, break_on_hyphens=break_on_hyphens)
+                wrapper = _CustomTextWrap(
+                    width=width,
+                    break_long_words=break_long_words,
+                    break_on_hyphens=break_on_hyphens,
+                )
                 casted_cell = str(cell)
                 wrapped = [
                     "\n".join(wrapper.wrap(line))
@@ -2258,7 +2268,11 @@ def tabulate(
 
         numparses = _expand_numparse(disable_numparse, num_cols)
         list_of_lists = _wrap_text_to_colwidths(
-            list_of_lists, maxcolwidths, numparses=numparses, break_long_words=break_long_words, break_on_hyphens=break_on_hyphens
+            list_of_lists,
+            maxcolwidths,
+            numparses=numparses,
+            break_long_words=break_long_words,
+            break_on_hyphens=break_on_hyphens,
         )
 
     if maxheadercolwidths is not None:
@@ -2272,7 +2286,11 @@ def tabulate(
 
         numparses = _expand_numparse(disable_numparse, num_cols)
         headers = _wrap_text_to_colwidths(
-            [headers], maxheadercolwidths, numparses=numparses, break_long_words=break_long_words, break_on_hyphens=break_on_hyphens
+            [headers],
+            maxheadercolwidths,
+            numparses=numparses,
+            break_long_words=break_long_words,
+            break_on_hyphens=break_on_hyphens,
         )[0]
 
     # empty values in the first column of RST tables should be escaped (issue #82)
@@ -2737,15 +2755,17 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
             space_left = width - cur_len
 
         # If we're allowed to break long words, then do so: put as much
-        # of the next chunk onto the current line as will fit.
-        if self.break_long_words:
+        # of the next chunk onto the current line as will fit.  Be careful
+        # of empty chunks after ANSI codes removed.
+        chunk = reversed_chunks[-1]
+        chunk_noansi = _strip_ansi(chunk)
+        if self.break_long_words and chunk_noansi:
             # Tabulate Custom: Build the string up piece-by-piece in order to
             # take each charcter's width into account
-            chunk = reversed_chunks[-1]
-            i = 1
             # Only count printable characters, so strip_ansi first, index later.
-            while len(_strip_ansi(chunk)[:i]) <= space_left:
-                i = i + 1
+            for i in range(1, len(chunk_noansi) + 1):
+                if self._len(chunk_noansi[:i]) > space_left:
+                    break
             # Consider escape codes when breaking words up
             total_escape_len = 0
             last_group = 0
diff --git a/test/test_textwrapper.py b/test/test_textwrapper.py
@@ -176,6 +176,41 @@ def test_wrap_color_line_longword():
     assert_equal(expected, result)
 
 
+def test_wrap_color_line_longword_zerowidth():
+    """Lines with zero-width symbols (eg. accents) must include those symbols with the prior symbol.
+    Let's exercise the calculation where the available symbols never satisfy the available width,
+    and ensure chunk calculation succeeds and ANSI colors are maintained.
+
+    Most combining marks combine with the preceding character (even in right-to-left alphabets):
+      - "e\u0301" → "é" (e + combining acute accent)
+      - "a\u0308" → "ä" (a + combining diaeresis)
+      - "n\u0303" → "ñ" (n + combining tilde)
+    Enclosing Marks: Some combining marks enclose the base character:
+      - "A\u20DD" → Ⓐ  Combining enclosing circle
+    Multiple Combining Marks: You can stack multiple combining marks on a single base character:
+      - "e\u0301\u0308" → e with both acute accent and diaeresis
+    Zero width space → "ab" with a :
+      - "a\u200Bb"
+
+    """
+    try:
+        import wcwidth  # noqa
+    except ImportError:
+        skip("test_wrap_wide_char is skipped")
+
+    # Exactly filled, with a green zero-width segment at the end.
+    data = "This_is_A\u20DD_\033[31mte\u0301st_string_\u200bto_te\u0301\u0308st_a\u0308ccent\033[32m\u200b\033[0m"
+
+    expected = [
+        "This_is_A\u20DD_\033[31mte\u0301\033[0m",
+        "\033[31mst_string_\u200bto\033[0m",
+        "\033[31m_te\u0301\u0308st_a\u0308ccent\033[32m\u200b\033[0m",
+    ]
+    wrapper = CTW(width=12)
+    result = wrapper.wrap(data)
+    assert_equal(expected, result)
+
+
 def test_wrap_color_line_multiple_escapes():
     data = "012345(\x1b[32ma\x1b[0mbc\x1b[32mdefghij\x1b[0m)"
     expected = [