Skip to content

Commit 5c49c44

Browse files
committed
Merge branch 'fix/wcwidth' into python-slip39
2 parents c1c2c79 + 2692e1a commit 5c49c44

File tree

2 files changed

+65
-10
lines changed

2 files changed

+65
-10
lines changed

tabulate/__init__.py

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1638,7 +1638,13 @@ def _normalize_tabular_data(tabular_data, headers, showindex="default"):
16381638
return rows, headers, headers_pad
16391639

16401640

1641-
def _wrap_text_to_colwidths(list_of_lists, colwidths, numparses=True, break_long_words=_BREAK_LONG_WORDS, break_on_hyphens=_BREAK_ON_HYPHENS):
1641+
def _wrap_text_to_colwidths(
1642+
list_of_lists,
1643+
colwidths,
1644+
numparses=True,
1645+
break_long_words=_BREAK_LONG_WORDS,
1646+
break_on_hyphens=_BREAK_ON_HYPHENS,
1647+
):
16421648
if len(list_of_lists):
16431649
num_cols = len(list_of_lists[0])
16441650
else:
@@ -1655,7 +1661,11 @@ def _wrap_text_to_colwidths(list_of_lists, colwidths, numparses=True, break_long
16551661
continue
16561662

16571663
if width is not None:
1658-
wrapper = _CustomTextWrap(width=width, break_long_words=break_long_words, break_on_hyphens=break_on_hyphens)
1664+
wrapper = _CustomTextWrap(
1665+
width=width,
1666+
break_long_words=break_long_words,
1667+
break_on_hyphens=break_on_hyphens,
1668+
)
16591669
casted_cell = str(cell)
16601670
wrapped = [
16611671
"\n".join(wrapper.wrap(line))
@@ -2258,7 +2268,11 @@ def tabulate(
22582268

22592269
numparses = _expand_numparse(disable_numparse, num_cols)
22602270
list_of_lists = _wrap_text_to_colwidths(
2261-
list_of_lists, maxcolwidths, numparses=numparses, break_long_words=break_long_words, break_on_hyphens=break_on_hyphens
2271+
list_of_lists,
2272+
maxcolwidths,
2273+
numparses=numparses,
2274+
break_long_words=break_long_words,
2275+
break_on_hyphens=break_on_hyphens,
22622276
)
22632277

22642278
if maxheadercolwidths is not None:
@@ -2272,7 +2286,11 @@ def tabulate(
22722286

22732287
numparses = _expand_numparse(disable_numparse, num_cols)
22742288
headers = _wrap_text_to_colwidths(
2275-
[headers], maxheadercolwidths, numparses=numparses, break_long_words=break_long_words, break_on_hyphens=break_on_hyphens
2289+
[headers],
2290+
maxheadercolwidths,
2291+
numparses=numparses,
2292+
break_long_words=break_long_words,
2293+
break_on_hyphens=break_on_hyphens,
22762294
)[0]
22772295

22782296
# empty values in the first column of RST tables should be escaped (issue #82)
@@ -2737,15 +2755,17 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
27372755
space_left = width - cur_len
27382756

27392757
# If we're allowed to break long words, then do so: put as much
2740-
# of the next chunk onto the current line as will fit.
2741-
if self.break_long_words:
2758+
# of the next chunk onto the current line as will fit. Be careful
2759+
# of empty chunks after ANSI codes removed.
2760+
chunk = reversed_chunks[-1]
2761+
chunk_noansi = _strip_ansi(chunk)
2762+
if self.break_long_words and chunk_noansi:
27422763
# Tabulate Custom: Build the string up piece-by-piece in order to
27432764
# take each charcter's width into account
2744-
chunk = reversed_chunks[-1]
2745-
i = 1
27462765
# Only count printable characters, so strip_ansi first, index later.
2747-
while len(_strip_ansi(chunk)[:i]) <= space_left:
2748-
i = i + 1
2766+
for i in range(1, len(chunk_noansi) + 1):
2767+
if self._len(chunk_noansi[:i]) > space_left:
2768+
break
27492769
# Consider escape codes when breaking words up
27502770
total_escape_len = 0
27512771
last_group = 0

test/test_textwrapper.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,41 @@ def test_wrap_color_line_longword():
176176
assert_equal(expected, result)
177177

178178

179+
def test_wrap_color_line_longword_zerowidth():
180+
"""Lines with zero-width symbols (eg. accents) must include those symbols with the prior symbol.
181+
Let's exercise the calculation where the available symbols never satisfy the available width,
182+
and ensure chunk calculation succeeds and ANSI colors are maintained.
183+
184+
Most combining marks combine with the preceding character (even in right-to-left alphabets):
185+
- "e\u0301" → "é" (e + combining acute accent)
186+
- "a\u0308" → "ä" (a + combining diaeresis)
187+
- "n\u0303" → "ñ" (n + combining tilde)
188+
Enclosing Marks: Some combining marks enclose the base character:
189+
- "A\u20DD" → Ⓐ Combining enclosing circle
190+
Multiple Combining Marks: You can stack multiple combining marks on a single base character:
191+
- "e\u0301\u0308" → e with both acute accent and diaeresis
192+
Zero width space → "ab" with a :
193+
- "a\u200Bb"
194+
195+
"""
196+
try:
197+
import wcwidth # noqa
198+
except ImportError:
199+
skip("test_wrap_wide_char is skipped")
200+
201+
# Exactly filled, with a green zero-width segment at the end.
202+
data = "This_is_A\u20DD_\033[31mte\u0301st_string_\u200bto_te\u0301\u0308st_a\u0308ccent\033[32m\u200b\033[0m"
203+
204+
expected = [
205+
"This_is_A\u20DD_\033[31mte\u0301\033[0m",
206+
"\033[31mst_string_\u200bto\033[0m",
207+
"\033[31m_te\u0301\u0308st_a\u0308ccent\033[32m\u200b\033[0m",
208+
]
209+
wrapper = CTW(width=12)
210+
result = wrapper.wrap(data)
211+
assert_equal(expected, result)
212+
213+
179214
def test_wrap_color_line_multiple_escapes():
180215
data = "012345(\x1b[32ma\x1b[0mbc\x1b[32mdefghij\x1b[0m)"
181216
expected = [

0 commit comments

Comments
 (0)