From 30e130fa5afabc1e0338f750c0b968f8062ab794 Mon Sep 17 00:00:00 2001 From: yihong0618 Date: Tue, 23 Sep 2025 09:47:32 +0800 Subject: [PATCH 1/3] fix: zero-width word paste can be wrong in default repl Signed-off-by: yihong0618 --- Lib/_pyrepl/utils.py | 3 +++ Lib/test/test_pyrepl/test_utils.py | 22 ++++++++++++++++++- ...-09-23-09-46-46.gh-issue-139246.pzfM-w.rst | 1 + 3 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2025-09-23-09-46-46.gh-issue-139246.pzfM-w.rst diff --git a/Lib/_pyrepl/utils.py b/Lib/_pyrepl/utils.py index 64708e843b685b..962a9a33b9f254 100644 --- a/Lib/_pyrepl/utils.py +++ b/Lib/_pyrepl/utils.py @@ -63,6 +63,9 @@ class ColorSpan(NamedTuple): def str_width(c: str) -> int: if ord(c) < 128: return 1 + # gh-139246 for zero-width joiner and combining characters + if unicodedata.combining(c) or unicodedata.category(c) == "Cf": + return 0 w = unicodedata.east_asian_width(c) if w in ("N", "Na", "H", "A"): return 1 diff --git a/Lib/test/test_pyrepl/test_utils.py b/Lib/test/test_pyrepl/test_utils.py index 05a4f329059835..9b83d4a040b4fa 100644 --- a/Lib/test/test_pyrepl/test_utils.py +++ b/Lib/test/test_pyrepl/test_utils.py @@ -5,10 +5,28 @@ class TestUtils(TestCase): def test_str_width(self): - characters = ['a', '1', '_', '!', '\x1a', '\u263A', '\uffb9'] + characters = [ + 'a', + '1', + '_', + '!', + '\x1a', + '\u263A', + '\uffb9', + '\N{LATIN SMALL LETTER E WITH ACUTE}', # é + '\N{LATIN SMALL LETTER E WITH CEDILLA}', # ȩ + ] for c in characters: self.assertEqual(str_width(c), 1) + zero_width_characters = [ + '\N{COMBINING ACUTE ACCENT}', + '\N{ZERO WIDTH JOINER}', + ] + for c in zero_width_characters: + with self.subTest(character=c): + self.assertEqual(str_width(c), 0) + characters = [chr(99989), chr(99999)] for c in characters: self.assertEqual(str_width(c), 2) @@ -25,6 +43,8 @@ def test_wlen(self): self.assertEqual(wlen('hello'), 5) self.assertEqual(wlen('hello' + '\x1a'), 7) + self.assertEqual(wlen('e\N{COMBINING ACUTE ACCENT}'), 1) + self.assertEqual(wlen('a\N{ZERO WIDTH JOINER}b'), 2) def test_prev_next_window(self): def gen_normal(): diff --git a/Misc/NEWS.d/next/Library/2025-09-23-09-46-46.gh-issue-139246.pzfM-w.rst b/Misc/NEWS.d/next/Library/2025-09-23-09-46-46.gh-issue-139246.pzfM-w.rst new file mode 100644 index 00000000000000..a816bda5cfe8e8 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-09-23-09-46-46.gh-issue-139246.pzfM-w.rst @@ -0,0 +1 @@ +fix: paste zero-width in default repl width is wrong. From 07aa4bcc8a3d590c0096f977aa02fe4f999493d7 Mon Sep 17 00:00:00 2001 From: yihong0618 Date: Wed, 24 Sep 2025 08:14:27 +0800 Subject: [PATCH 2/3] fix: address comments Signed-off-by: yihong0618 --- Lib/_pyrepl/utils.py | 7 ++++++- Lib/test/test_pyrepl/test_utils.py | 3 +++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/Lib/_pyrepl/utils.py b/Lib/_pyrepl/utils.py index 962a9a33b9f254..c75b7b9bc494b0 100644 --- a/Lib/_pyrepl/utils.py +++ b/Lib/_pyrepl/utils.py @@ -64,7 +64,12 @@ def str_width(c: str) -> int: if ord(c) < 128: return 1 # gh-139246 for zero-width joiner and combining characters - if unicodedata.combining(c) or unicodedata.category(c) == "Cf": + category = unicodedata.category(c) + if unicodedata.combining(c): + return 0 + if category == "Cf" and c != "\u00ad": + return 0 + if "\u2028" <= c <= "\u2029": return 0 w = unicodedata.east_asian_width(c) if w in ("N", "Na", "H", "A"): diff --git a/Lib/test/test_pyrepl/test_utils.py b/Lib/test/test_pyrepl/test_utils.py index 9b83d4a040b4fa..fabe038fb6ff28 100644 --- a/Lib/test/test_pyrepl/test_utils.py +++ b/Lib/test/test_pyrepl/test_utils.py @@ -15,6 +15,7 @@ def test_str_width(self): '\uffb9', '\N{LATIN SMALL LETTER E WITH ACUTE}', # é '\N{LATIN SMALL LETTER E WITH CEDILLA}', # ȩ + '\u00ad', ] for c in characters: self.assertEqual(str_width(c), 1) @@ -22,6 +23,8 @@ def test_str_width(self): zero_width_characters = [ '\N{COMBINING ACUTE ACCENT}', '\N{ZERO WIDTH JOINER}', + '\u2028', + '\u2029', ] for c in zero_width_characters: with self.subTest(character=c): From 41d2865a6db479603daba831c5b9cc9180d4d222 Mon Sep 17 00:00:00 2001 From: yihong Date: Wed, 24 Sep 2025 22:18:11 +0800 Subject: [PATCH 3/3] apply suggestion Lib/_pyrepl/utils.py Co-authored-by: grayjk --- Lib/_pyrepl/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/_pyrepl/utils.py b/Lib/_pyrepl/utils.py index c75b7b9bc494b0..e2dca55a183464 100644 --- a/Lib/_pyrepl/utils.py +++ b/Lib/_pyrepl/utils.py @@ -64,9 +64,9 @@ def str_width(c: str) -> int: if ord(c) < 128: return 1 # gh-139246 for zero-width joiner and combining characters - category = unicodedata.category(c) if unicodedata.combining(c): return 0 + category = unicodedata.category(c) if category == "Cf" and c != "\u00ad": return 0 if "\u2028" <= c <= "\u2029":