Serhiy's suggestions

StanFromIreland · StanFromIreland · commit 794fc8b1ad3a · 2025-03-03T18:48:37.000Z
diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py
@@ -517,49 +517,50 @@ def test_parse_keyword_spec(self):
                     parse_spec(spec)
                 self.assertEqual(str(cm.exception), message)
 
-    def test_normalize_multiline(self):
-        # required to set up normalize
-        options = SimpleNamespace(width=78)
-        make_escapes(True)
+    # required to set up normalize
+    make_escapes(True)
 
+    def test_normalize_multiline(self):
         s = 'multi-line\n translation'
         s_expected = '""\n"multi-line\\n"\n" translation"'
 
-        data = normalize(s, 'UTF-8', 'msgid', options)
+        data = normalize(s, 'UTF-8', 'msgid', 78)
         self.assertEqual(s_expected, data)
 
     def test_normalize_wrap(self):
-        # required to set up normalize
-        options = SimpleNamespace(width=30)
-        make_escapes(True)
+        s = 'fee fi fo fum fee fi '                # len = 29
+        s_expected = '"fee fi fo fum fee fi "'
+        data = normalize(s, 'UTF-8', 'msgid', 30)
+        self.assertEqual(s_expected, data)
 
-        s = 'this string should be wrapped to 30 chars'
-        s_expected = '""\n"this string should be "\n"wrapped to 30 chars"'
+        s = 'fee fi fo fum fee fi f'               # len = 30
+        s_expected = '"fee fi fo fum fee fi f"'
+        data = normalize(s, 'UTF-8', 'msgid', 30)
+        self.assertEqual(s_expected, data)
 
-        data = normalize(s, 'UTF-8', 'msgid', options)
+        s = 'fee fi fo fum fee fi fo'              # len = 31
+        s_expected = '""\n"fee fi fo fum fee fi fo"'
+        data = normalize(s, 'UTF-8', 'msgid', 30)
         self.assertEqual(s_expected, data)
 
     def test_normalize_nostr(self):
-        # required to set up normalize
-        options = SimpleNamespace(width=30)
-        make_escapes(True)
-
-        s = ''
-        s_expected = '""'
-
-        data = normalize(s, 'UTF-8', 'msgid', options)
-        self.assertEqual(s_expected, data)
+        data = normalize('', 'UTF-8', 'msgid', 30)
+        self.assertEqual('""', data)
 
-    def test_normalize_short_width(self):
+    def test_normalize_single_word(self):
         # required to set up normalize
-        options = SimpleNamespace(width=3)
         make_escapes(True)
-
-        s = 'foos'
-        s_expected = '"foos"'
-
-        data = normalize(s, 'UTF-8', 'msgid', options)
-        self.assertEqual(s_expected, data)
+        for s in ("fee", "fi", "fo", "fums"):
+            data = normalize(s, 'UTF-8', 'msgid', 3)
+            self.assertNotIn('""', data) # did not wrap
+
+    def test_normalize_split_on_whitespace(self):
+        for space in (' ', ' ', ' ', '\t', '\r'):
+            s = f'longlonglong{space}word'
+            space = {'\t': '\\t', '\r': '\\r'}.get(space, space)
+            s_expected = f'""\n"longlonglong{space}"\n"word"'
+            data = normalize(s, 'UTF-8', 'msgid', 10)
+            self.assertEqual(s_expected, data)
 
 
 def extract_from_snapshots():
diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py
@@ -155,6 +155,7 @@
 
 __version__ = '1.5'
 
+from test.test_doctest.test_doctest import wrapped
 
 # The normal pot-file header. msgmerge and Emacs's po-mode work better if it's
 # there.
@@ -216,24 +217,26 @@ def escape_nonascii(s, encoding):
 
 _space_splitter = re.compile(r'\s+|\S+\s*')
 
-def normalize(s, encoding, prefix, options):
+def normalize(s, encoding, prefix, width):
     # This converts the various Python string types into a format that is
     # appropriate for .po files, namely much closer to C style,
     # while wrapping to options.width.
     lines = []
+    wrap = False
     for line in s.splitlines(True):
         escaped_line = escape(line, encoding)
-        if len(escaped_line) + len(prefix) + 3 > options.width:
+        if len(escaped_line) + len(prefix) + 3 > width:
+            wrap = True
             words = _space_splitter.findall(line)
             words.reverse()
             buf = []
-            size = 0
+            size = 2
             while words:
                 word = words.pop()
                 escaped_word = escape(word, encoding)
                 escaped_word_len = len(escaped_word)
                 new_size = size + escaped_word_len
-                if new_size + 2 <= options.width or not buf:
+                if new_size <= width or not buf:
                     buf.append(escaped_word)
                     size = new_size
                 else:
@@ -243,7 +246,7 @@ def normalize(s, encoding, prefix, options):
             lines.append(''.join(buf))
         else:
             lines.append(escaped_line)
-    if len(lines) <= 1:
+    if len(lines) <= 1 and (not wrap or len(_space_splitter.findall(lines[0])) == 1):
         return f'"{escape(s, encoding)}"'
     return '""\n' + '\n'.join(f'"{line}"' for line in lines)
 
@@ -636,10 +639,10 @@ def write_pot_file(messages, options, fp):
             # to skip translating some unimportant docstrings.
             print('#, docstring', file=fp)
         if msg.msgctxt is not None:
-            print('msgctxt', normalize(msg.msgctxt, encoding, 'msgctxt', options), file=fp)
-        print('msgid', normalize(msg.msgid, encoding, 'msgid', options), file=fp)
+            print('msgctxt', normalize(msg.msgctxt, encoding, 'msgctxt', options.width), file=fp)
+        print('msgid', normalize(msg.msgid, encoding, 'msgid', options.width), file=fp)
         if msg.msgid_plural is not None:
-            print('msgid_plural', normalize(msg.msgid_plural, encoding, 'msgid_plural', options), file=fp)
+            print('msgid_plural', normalize(msg.msgid_plural, encoding, 'msgid_plural', options.width), file=fp)
             print('msgstr[0] ""', file=fp)
             print('msgstr[1] ""\n', file=fp)
         else: