Skip to content

Commit 2692e1a

Browse files
committed
Fix and test long word wrapping
o Tests pass with/without wcwidth module installed o Include some linting changes
1 parent a2cb29e commit 2692e1a

File tree

2 files changed

+81
-42
lines changed

2 files changed

+81
-42
lines changed

tabulate/__init__.py

Lines changed: 46 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1638,7 +1638,13 @@ def _normalize_tabular_data(tabular_data, headers, showindex="default"):
16381638
return rows, headers, headers_pad
16391639

16401640

1641-
def _wrap_text_to_colwidths(list_of_lists, colwidths, numparses=True, break_long_words=_BREAK_LONG_WORDS, break_on_hyphens=_BREAK_ON_HYPHENS):
1641+
def _wrap_text_to_colwidths(
1642+
list_of_lists,
1643+
colwidths,
1644+
numparses=True,
1645+
break_long_words=_BREAK_LONG_WORDS,
1646+
break_on_hyphens=_BREAK_ON_HYPHENS,
1647+
):
16421648
if len(list_of_lists):
16431649
num_cols = len(list_of_lists[0])
16441650
else:
@@ -1655,7 +1661,11 @@ def _wrap_text_to_colwidths(list_of_lists, colwidths, numparses=True, break_long
16551661
continue
16561662

16571663
if width is not None:
1658-
wrapper = _CustomTextWrap(width=width, break_long_words=break_long_words, break_on_hyphens=break_on_hyphens)
1664+
wrapper = _CustomTextWrap(
1665+
width=width,
1666+
break_long_words=break_long_words,
1667+
break_on_hyphens=break_on_hyphens,
1668+
)
16591669
casted_cell = str(cell)
16601670
wrapped = [
16611671
"\n".join(wrapper.wrap(line))
@@ -2258,7 +2268,11 @@ def tabulate(
22582268

22592269
numparses = _expand_numparse(disable_numparse, num_cols)
22602270
list_of_lists = _wrap_text_to_colwidths(
2261-
list_of_lists, maxcolwidths, numparses=numparses, break_long_words=break_long_words, break_on_hyphens=break_on_hyphens
2271+
list_of_lists,
2272+
maxcolwidths,
2273+
numparses=numparses,
2274+
break_long_words=break_long_words,
2275+
break_on_hyphens=break_on_hyphens,
22622276
)
22632277

22642278
if maxheadercolwidths is not None:
@@ -2272,7 +2286,11 @@ def tabulate(
22722286

22732287
numparses = _expand_numparse(disable_numparse, num_cols)
22742288
headers = _wrap_text_to_colwidths(
2275-
[headers], maxheadercolwidths, numparses=numparses, break_long_words=break_long_words, break_on_hyphens=break_on_hyphens
2289+
[headers],
2290+
maxheadercolwidths,
2291+
numparses=numparses,
2292+
break_long_words=break_long_words,
2293+
break_on_hyphens=break_on_hyphens,
22762294
)[0]
22772295

22782296
# empty values in the first column of RST tables should be escaped (issue #82)
@@ -2737,46 +2755,32 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
27372755
space_left = width - cur_len
27382756

27392757
# If we're allowed to break long words, then do so: put as much
2740-
# of the next chunk onto the current line as will fit.
2741-
2742-
# Reverted the broken ANSI code handling stuff to fix wcwidth handling
2743-
# - Doesn't use self._lend, infinite loops
2744-
# - doesn't locate chunks correctly b/c could be split by ANSI codes
2745-
#
2746-
# if self.break_long_words and space_left > 0:
2747-
# # Tabulate Custom: Build the string up piece-by-piece in order to
2748-
# # take each charcter's width into account
2749-
# chunk = reversed_chunks[-1]
2750-
# # Only count printable characters, so strip_ansi first, index later.
2751-
# for i in range( 1, space_left + 1 ):
2752-
# if self._len(_strip_ansi(chunk)[:i]) > space_left:
2753-
# break
2754-
#
2755-
# # Consider escape codes when breaking words up
2756-
# total_escape_len = 0
2757-
# last_group = 0
2758-
# if _ansi_codes.search(chunk) is not None:
2759-
# for group, _, _, _ in _ansi_codes.findall(chunk):
2760-
# escape_len = len(group)
2761-
# if (
2762-
# group
2763-
# in chunk[last_group : i + total_escape_len + escape_len - 1]
2764-
# ):
2765-
# total_escape_len += escape_len
2766-
# found = _ansi_codes.search(chunk[last_group:])
2767-
# last_group += found.end()
2768-
# cur_line.append(chunk[: i + total_escape_len - 1])
2769-
# reversed_chunks[-1] = chunk[i + total_escape_len - 1 :]
2770-
2771-
if self.break_long_words: # and space_left > 0:
2758+
# of the next chunk onto the current line as will fit. Be careful
2759+
# of empty chunks after ANSI codes removed.
2760+
chunk = reversed_chunks[-1]
2761+
chunk_noansi = _strip_ansi(chunk)
2762+
if self.break_long_words and chunk_noansi:
27722763
# Tabulate Custom: Build the string up piece-by-piece in order to
27732764
# take each charcter's width into account
2774-
chunk = reversed_chunks[-1]
2775-
i = 1
2776-
while self._len(chunk[:i]) <= space_left:
2777-
i = i + 1
2778-
cur_line.append(chunk[: i - 1])
2779-
reversed_chunks[-1] = chunk[i - 1 :]
2765+
# Only count printable characters, so strip_ansi first, index later.
2766+
for i in range(1, len(chunk_noansi) + 1):
2767+
if self._len(chunk_noansi[:i]) > space_left:
2768+
break
2769+
# Consider escape codes when breaking words up
2770+
total_escape_len = 0
2771+
last_group = 0
2772+
if _ansi_codes.search(chunk) is not None:
2773+
for group, _, _, _ in _ansi_codes.findall(chunk):
2774+
escape_len = len(group)
2775+
if (
2776+
group
2777+
in chunk[last_group : i + total_escape_len + escape_len - 1]
2778+
):
2779+
total_escape_len += escape_len
2780+
found = _ansi_codes.search(chunk[last_group:])
2781+
last_group += found.end()
2782+
cur_line.append(chunk[: i + total_escape_len - 1])
2783+
reversed_chunks[-1] = chunk[i + total_escape_len - 1 :]
27802784

27812785
# Otherwise, we have to preserve the long word intact. Only add
27822786
# it to the current line if there's nothing already there --

test/test_textwrapper.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,41 @@ def test_wrap_color_line_longword():
176176
assert_equal(expected, result)
177177

178178

179+
def test_wrap_color_line_longword_zerowidth():
180+
"""Lines with zero-width symbols (eg. accents) must include those symbols with the prior symbol.
181+
Let's exercise the calculation where the available symbols never satisfy the available width,
182+
and ensure chunk calculation succeeds and ANSI colors are maintained.
183+
184+
Most combining marks combine with the preceding character (even in right-to-left alphabets):
185+
- "e\u0301" → "é" (e + combining acute accent)
186+
- "a\u0308" → "ä" (a + combining diaeresis)
187+
- "n\u0303" → "ñ" (n + combining tilde)
188+
Enclosing Marks: Some combining marks enclose the base character:
189+
- "A\u20DD" → Ⓐ Combining enclosing circle
190+
Multiple Combining Marks: You can stack multiple combining marks on a single base character:
191+
- "e\u0301\u0308" → e with both acute accent and diaeresis
192+
Zero width space → "ab" with a :
193+
- "a\u200Bb"
194+
195+
"""
196+
try:
197+
import wcwidth # noqa
198+
except ImportError:
199+
skip("test_wrap_wide_char is skipped")
200+
201+
# Exactly filled, with a green zero-width segment at the end.
202+
data = "This_is_A\u20DD_\033[31mte\u0301st_string_\u200bto_te\u0301\u0308st_a\u0308ccent\033[32m\u200b\033[0m"
203+
204+
expected = [
205+
"This_is_A\u20DD_\033[31mte\u0301\033[0m",
206+
"\033[31mst_string_\u200bto\033[0m",
207+
"\033[31m_te\u0301\u0308st_a\u0308ccent\033[32m\u200b\033[0m",
208+
]
209+
wrapper = CTW(width=12)
210+
result = wrapper.wrap(data)
211+
assert_equal(expected, result)
212+
213+
179214
def test_wrap_color_line_multiple_escapes():
180215
data = "012345(\x1b[32ma\x1b[0mbc\x1b[32mdefghij\x1b[0m)"
181216
expected = [

0 commit comments

Comments
 (0)