python · serhiy-storchaka · Apr 2, 2025 · Mar 16, 2025 · Mar 31, 2025 · Mar 31, 2025
diff --git a/Lib/test/test_tools/i18n_data/ascii-escapes.pot b/Lib/test/test_tools/i18n_data/ascii-escapes.pot
@@ -0,0 +1,51 @@
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR ORGANIZATION
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"POT-Creation-Date: 2000-01-01 00:00+0000\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <[email protected]>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Generated-By: pygettext.py 1.5\n"
+
+
+#. Special characters that are always escaped in the POT file
+#: escapes.py:5
+msgid ""
+"\"\t\n"
+"\r\\"
+msgstr ""
+
+#. All ascii characters 0-31
+#: escapes.py:8
+msgid ""
+"\000\001\002\003\004\005\006\007\010\t\n"
+"\013\014\r\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037"
+msgstr ""
+
+#. All ascii characters 32-126
+#: escapes.py:13
+msgid " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"
+msgstr ""
+
+#. ascii char 127
+#: escapes.py:17
+msgid "\177"
+msgstr ""
+
+#. characters 128-255
+#: escapes.py:20
+msgid " ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"
+msgstr ""
+
+#. some characters >= 256
+#: escapes.py:26
+msgid "ě š č ř α β γ δ ㄱ ㄲ ㄴ ㄷ"
+msgstr ""
+
diff --git a/Lib/test/test_tools/i18n_data/escapes.pot b/Lib/test/test_tools/i18n_data/escapes.pot
@@ -15,19 +15,37 @@ msgstr ""
 "Generated-By: pygettext.py 1.5\n"
 
 
-#: escapes.py:4
-msgid "ascii"
+#. Special characters that are always escaped in the POT file
+#: escapes.py:5
+msgid ""
+"\"\t\n"
+"\r\\"
 msgstr ""
 
-#: escapes.py:6
-msgid "\304\233 \305\241 \304\215 \305\231"
+#. All ascii characters 0-31
+#: escapes.py:8
+msgid ""
+"\000\001\002\003\004\005\006\007\010\t\n"
+"\013\014\r\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037"
 msgstr ""
 
-#: escapes.py:8
-msgid "\316\261 \316\262 \316\263 \316\264"
+#. All ascii characters 32-126
+#: escapes.py:13
+msgid " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"
+msgstr ""
+
+#. ascii char 127
+#: escapes.py:17
+msgid "\177"
+msgstr ""
+
+#. characters 128-255
+#: escapes.py:20
+msgid "\302\200\302\201\302\202\302\203\302\204\302\205\302\206\302\207\302\210\302\211\302\212\302\213\302\214\302\215\302\216\302\217\302\220\302\221\302\222\302\223\302\224\302\225\302\226\302\227\302\230\302\231\302\232\302\233\302\234\302\235\302\236\302\237\302\240\302\241\302\242\302\243\302\244\302\245\302\246\302\247\302\250\302\251\302\252\302\253\302\254\302\255\302\256\302\257\302\260\302\261\302\262\302\263\302\264\302\265\302\266\302\267\302\270\302\271\302\272\302\273\302\274\302\275\302\276\302\277\303\200\303\201\303\202\303\203\303\204\303\205\303\206\303\207\303\210\303\211\303\212\303\213\303\214\303\215\303\216\303\217\303\220\303\221\303\222\303\223\303\224\303\225\303\226\303\227\303\230\303\231\303\232\303\233\303\234\303\235\303\236\303\237\303\240\303\241\303\242\303\243\303\244\303\245\303\246\303\247\303\250\303\251\303\252\303\253\303\254\303\255\303\256\303\257\303\260\303\261\303\262\303\263\303\264\303\265\303\266\303\267\303\270\303\271\303\272\303\273\303\274\303\275\303\276\303\277"
 msgstr ""
 
-#: escapes.py:10
-msgid "\343\204\261 \343\204\262 \343\204\264 \343\204\267"
+#. some characters >= 256
+#: escapes.py:26
+msgid "\304\233 \305\241 \304\215 \305\231 \316\261 \316\262 \316\263 \316\264 \343\204\261 \343\204\262 \343\204\264 \343\204\267"
 msgstr ""
 
diff --git a/Lib/test/test_tools/i18n_data/escapes.py b/Lib/test/test_tools/i18n_data/escapes.py
@@ -1,10 +1,26 @@
 import gettext as _
 
 
-_('ascii')
+# Special characters that are always escaped in the POT file
+_('"\t\n\r\\')
 
-_('ě š č ř')
+# All ascii characters 0-31
+_('\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n'
+  '\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15'
+  '\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f')
 
-_('α β γ δ')
+# All ascii characters 32-126
+_(' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+  '[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~')
 
-_('ㄱ ㄲ ㄴ ㄷ')
+# ascii char 127
+_('\x7f')
+
+# characters 128-255
+_('\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90'
+  '\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0'
+  '¡¢£¤¥¦§¨©ª«¬\xad®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞ'
+  'ßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ')
+
+# some characters >= 256
+_('ě š č ř α β γ δ ㄱ ㄲ ㄴ ㄷ')
diff --git a/Lib/test/test_tools/i18n_data/messages.pot b/Lib/test/test_tools/i18n_data/messages.pot
@@ -97,7 +97,3 @@ msgid_plural "worlds"
 msgstr[0] ""
 msgstr[1] ""
 
-#: messages.py:122
-msgid "α β γ δ"
-msgstr ""
-
diff --git a/Lib/test/test_tools/i18n_data/messages.py b/Lib/test/test_tools/i18n_data/messages.py
@@ -117,6 +117,3 @@ def _(x="don't extract me"):
 # f-strings
 f"Hello, {_('world')}!"
 f"Hello, {ngettext('world', 'worlds', 3)}!"
-
-# non-ascii
-_("α β γ δ")
diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py
@@ -18,8 +18,7 @@
 
 
 with imports_under_tool("i18n"):
-    import pygettext
-    from pygettext import make_escapes, parse_spec
+    from pygettext import parse_spec
 
 
 def normalize_POT_file(pot):
@@ -518,110 +517,6 @@ def test_parse_keyword_spec(self):
                 self.assertEqual(str(cm.exception), message)
 
 
-class TestCharacterEscapes(unittest.TestCase):
-    # Pygettext always escapes the following characters:
-    special_chars = {
-        '\\': r'\\',
-        '\t': r'\t',
-        '\r': r'\r',
-        '\n': r'\n',
-        '\"': r'\"',
-    }
-
-    def tearDownClass():
-        # Reset the global 'escapes' dict to the default
-        make_escapes(pass_nonascii=True)
-
-    def test_special_chars(self):
-        # special_chars are always escaped regardless of the
-        # --escape option
-        for pass_nonascii in (True, False):
-            make_escapes(pass_nonascii=pass_nonascii)
-            with self.subTest(pass_nonascii=pass_nonascii):
-                for char in self.special_chars:
-                    self.assertEqual(pygettext.escape(char, encoding='utf-8'),
-                                     self.special_chars[char])
-
-    def _char_to_octal_escape(self, char):
-        """Convert a character to its octal escape representation."""
-        return r"\%03o" % ord(char)
-
-    def _octal_escape_to_string(self, escaped):
-        """Convert an octal escape representation to string."""
-        octal_escapes = re.findall(r'\\([0-7]{3})', escaped)
-        bytestr = bytes([int(n, 8) for n in octal_escapes])
-        return bytestr.decode('utf-8')
-
-    def test_not_escaped(self):
-        """
-        Test escaping when the --escape is not used.
-
-        When --escape is not used, only some characters withing the ASCII
-        range are escaoped. Characters >= 128 are not escaped.
-        """
-        # This is the same as invoking pygettext without
-        # the --escape option (the default behavior).
-        make_escapes(pass_nonascii=True)
-        # The encoding option is not used when --escape is not passed
-        encoding = 'foo'
-
-        # First 32 characters use octal escapes (except for special chars)
-        for i in range(32):
-            char = chr(i)
-            if char in self.special_chars:
-                continue
-            self.assertEqual(pygettext.escape(char, encoding=encoding),
-                             self._char_to_octal_escape(char))
-
-        # Characters 32-126 are not escaped (except for special chars)
-        for i in range(32, 127):
-            char = chr(i)
-            if char in self.special_chars:
-                continue
-            self.assertEqual(pygettext.escape(char, encoding=encoding), char)
-
-        # chr(127) uses octal escape
-        self.assertEqual(pygettext.escape(chr(127), encoding=encoding),
-                         '\\177')
-
-        # All characters >= 128 are not escaped
-        for i in range(128, 256):
-            char = chr(i)
-            self.assertEqual(pygettext.escape(char, encoding=encoding), char)
-
-
-    def test_escaped(self):
-        """
-        Test escaping when --escape is used.
-
-        When --escape is used, all characters are escaped, including
-        """
-        make_escapes(pass_nonascii=False)
-        encoding = 'utf-8'
-
-        # First 32 characters use octal escapes (except for special chars)
-        for i in range(32):
-            char = chr(i)
-            if char in self.special_chars:
-                continue
-            self.assertEqual(pygettext.escape(char, encoding=encoding),
-                             self._char_to_octal_escape(char))
-
-        # Characters 32-126 are not escaped (except for special chars)
-        for i in range(32, 127):
-            char = chr(i)
-            if char in self.special_chars:
-                continue
-            self.assertEqual(pygettext.escape(char, encoding=encoding), char)
-
-        # Characters >= 127 are escaped
-        for i in range(127, 256):
-            char = chr(i)
-            escaped = pygettext.escape(char, encoding=encoding)
-            decoded_char = self._octal_escape_to_string(escaped)
-            self.assertEqual(char, decoded_char)
-
-
 def extract_from_snapshots():
     snapshots = {
         'messages.py': (),
@@ -631,13 +526,21 @@ def extract_from_snapshots():
         'custom_keywords.py': ('--keyword=foo', '--keyword=nfoo:1,2',
                                '--keyword=pfoo:1c,2',
                                '--keyword=npfoo:1c,2,3', '--keyword=_:1,2'),
-        # Test escaping non-ASCII characters
-        'escapes.py': ('--escape',),
+        # == Test character escaping
+        # Escape ascii and unicode:
+        'escapes.py': ('--escape', '--add-comments='),
+        # Escape only ascii and let unicode pass through:
+        ('escapes.py', 'ascii-escapes.pot'): ('--add-comments=',),
     }
 
     for filename, args in snapshots.items():
-        input_file = DATA_DIR / filename
-        output_file = input_file.with_suffix('.pot')
+        if isinstance(filename, tuple):
+            filename, output_file = filename
+            output_file = DATA_DIR / output_file
+            input_file = DATA_DIR / filename
+        else:
+            input_file = DATA_DIR / filename
+            output_file = input_file.with_suffix('.pot')
         contents = input_file.read_bytes()
         with temp_cwd(None):
             Path(input_file.name).write_bytes(contents)