python · StanFromIreland · Feb 28, 2025 · Feb 28, 2025 · Feb 28, 2025 · Mar 1, 2025
diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py
@@ -525,7 +525,7 @@ def test_normalize_multiline(self):
         s = 'multi-line\n translation'
         s_expected = '""\n"multi-line\\n"\n" translation"'
 
-        data = normalize(s, 'UTF-8', options)
+        data = normalize(s, 'UTF-8', 'msgid', options)
         self.assertEqual(s_expected, data)
 
     def test_normalize_wrap(self):
@@ -534,9 +534,9 @@ def test_normalize_wrap(self):
         make_escapes(True)
 
         s = 'this string should be wrapped to 30 chars'
-        s_expected = '""\n"this string should be wrapped "\n"to 30 chars"'
+        s_expected = '""\n"this string should be "\n"wrapped to 30 chars"'
 
-        data = normalize(s, 'UTF-8', options)
+        data = normalize(s, 'UTF-8', 'msgid', options)
         self.assertEqual(s_expected, data)
 
     def test_normalize_nostr(self):
@@ -547,7 +547,7 @@ def test_normalize_nostr(self):
         s = ''
         s_expected = '""'
 
-        data = normalize(s, 'UTF-8', options)
+        data = normalize(s, 'UTF-8', 'msgid', options)
         self.assertEqual(s_expected, data)
 
     def test_normalize_short_width(self):
@@ -558,7 +558,7 @@ def test_normalize_short_width(self):
         s = 'foos'
         s_expected = '"foos"'
 
-        data = normalize(s, 'UTF-8', options)
+        data = normalize(s, 'UTF-8', 'msgid', options)
         self.assertEqual(s_expected, data)
 
 

@@ -213,39 +213,43 @@ def escape_ascii(s, encoding):
 def escape_nonascii(s, encoding):
     return ''.join(escapes[b] for b in s.encode(encoding))
 
-# Split a string according to whitespaces and keep
-# the whitespaces in the resulting array thanks to
-# the capturing group.
-_space_splitter = re.compile(r'(\s+)').split
 
-def normalize(s, encoding, options):
+_space_splitter = re.compile(r'\s+|\S+\s*')
+
+def normalize(s, encoding, prefix, options):
     # This converts the various Python string types into a format that is
     # appropriate for .po files, namely much closer to C style,
     # while wrapping to options.width.
     lines = []
     for line in s.splitlines(True):
-        if len(escape(line, encoding)) > options.width and ' ' in line: # don't wrap single words
-            words = _space_splitter(line)
+        escaped_line = escape(line, encoding)
+        if len(escaped_line) + len(prefix) + 2 > options.width and _space_splitter.search(line):  # don't wrap single words
+            words = _space_splitter.findall(line)
+            words = [w for w in words if w]
             words.reverse()
             buf = []
             size = 0
             while words:
                 word = words.pop()
-                escaped_word_len = len(escape(word, encoding))
+                escaped_word = escape(word, encoding)
+                escaped_word_len = len(escaped_word)
                 new_size = size + escaped_word_len
-                if new_size <= options.width:
-                    buf.append(word)
+                if new_size + 2 <= options.width:
+                    buf.append(escaped_word)
+                    size = new_size
+                elif not buf:
+                    buf.append(escaped_word)
                     size = new_size
                 else:
                     lines.append(''.join(buf))
-                    buf = [word]
+                    buf = [escaped_word]
                     size = escaped_word_len
             lines.append(''.join(buf))
         else:
-            lines.append(line)
+            lines.append(escaped_line)
     if len(lines) <= 1:
         return f'"{escape(s, encoding)}"'
-    return '""\n' + '\n'.join(f'"{escape(line, encoding)}"' for line in lines)
+    return '""\n' + '\n'.join(f'"{line}"' for line in lines)
 
 
 def containsAny(str, set):
@@ -636,10 +640,10 @@ def write_pot_file(messages, options, fp):
             # to skip translating some unimportant docstrings.
             print('#, docstring', file=fp)
         if msg.msgctxt is not None:
-            print('msgctxt', normalize(msg.msgctxt, encoding, options), file=fp)
-        print('msgid', normalize(msg.msgid, encoding, options), file=fp)
+            print('msgctxt', normalize(msg.msgctxt, encoding, 'msgctxt', options), file=fp)
+        print('msgid', normalize(msg.msgid, encoding, 'msgid', options), file=fp)
         if msg.msgid_plural is not None:
-            print('msgid_plural', normalize(msg.msgid_plural, encoding, options), file=fp)
+            print('msgid_plural', normalize(msg.msgid_plural, encoding, 'msgid_plural', options), file=fp)
             print('msgstr[0] ""', file=fp)
             print('msgstr[1] ""\n', file=fp)
         else: