Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions Lib/test/test_tools/i18n_data/ascii-escapes.pot
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) YEAR ORGANIZATION
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
#
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"POT-Creation-Date: 2000-01-01 00:00+0000\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <[email protected]>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: pygettext.py 1.5\n"


#. Special characters that are always escaped in the POT file
#: escapes.py:5
msgid ""
"\"\t\n"
"\r\\"
msgstr ""

#. All ascii characters 0-31
#: escapes.py:8
msgid ""
"\000\001\002\003\004\005\006\007\010\t\n"
"\013\014\r\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037"
msgstr ""

#. All ascii characters 32-126
#: escapes.py:13
msgid " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"
msgstr ""

#. ascii char 127
#: escapes.py:17
msgid "\177"
msgstr ""

#. characters 128-255
#: escapes.py:20
msgid "€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"
msgstr ""

#. some characters >= 256
#: escapes.py:26
msgid "ě š č ř α β γ δ ㄱ ㄲ ㄴ ㄷ"
msgstr ""

34 changes: 26 additions & 8 deletions Lib/test/test_tools/i18n_data/escapes.pot
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,37 @@ msgstr ""
"Generated-By: pygettext.py 1.5\n"


#: escapes.py:4
msgid "ascii"
#. Special characters that are always escaped in the POT file
#: escapes.py:5
msgid ""
"\"\t\n"
"\r\\"
msgstr ""

#: escapes.py:6
msgid "\304\233 \305\241 \304\215 \305\231"
#. All ascii characters 0-31
#: escapes.py:8
msgid ""
"\000\001\002\003\004\005\006\007\010\t\n"
"\013\014\r\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037"
msgstr ""

#: escapes.py:8
msgid "\316\261 \316\262 \316\263 \316\264"
#. All ascii characters 32-126
#: escapes.py:13
msgid " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"
msgstr ""

#. ascii char 127
#: escapes.py:17
msgid "\177"
msgstr ""

#. characters 128-255
#: escapes.py:20
msgid
msgstr ""

#: escapes.py:10
msgid "\343\204\261 \343\204\262 \343\204\264 \343\204\267"
#. some characters >= 256
#: escapes.py:26
msgid "\304\233 \305\241 \304\215 \305\231 \316\261 \316\262 \316\263 \316\264 \343\204\261 \343\204\262 \343\204\264 \343\204\267"
msgstr ""

24 changes: 20 additions & 4 deletions Lib/test/test_tools/i18n_data/escapes.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,26 @@
import gettext as _


_('ascii')
# Special characters that are always escaped in the POT file
_('"\t\n\r\\')

_('ě š č ř')
# All ascii characters 0-31
_('\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n'
'\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15'
'\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f')

_('α β γ δ')
# All ascii characters 32-126
_(' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ'
'[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~')

_('ㄱ ㄲ ㄴ ㄷ')
# ascii char 127
_('\x7f')

# characters 128-255
_('\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90'
'\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0'
'¡¢£¤¥¦§¨©ª«¬\xad®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞ'
'ßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ')

# some characters >= 256
_('ě š č ř α β γ δ ㄱ ㄲ ㄴ ㄷ')
4 changes: 0 additions & 4 deletions Lib/test/test_tools/i18n_data/messages.pot
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,3 @@ msgid_plural "worlds"
msgstr[0] ""
msgstr[1] ""

#: messages.py:122
msgid "α β γ δ"
msgstr ""

3 changes: 0 additions & 3 deletions Lib/test/test_tools/i18n_data/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,3 @@ def _(x="don't extract me"):
# f-strings
f"Hello, {_('world')}!"
f"Hello, {ngettext('world', 'worlds', 3)}!"

# non-ascii
_("α β γ δ")
123 changes: 13 additions & 110 deletions Lib/test/test_tools/test_i18n.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@


with imports_under_tool("i18n"):
import pygettext
from pygettext import make_escapes, parse_spec
from pygettext import parse_spec


def normalize_POT_file(pot):
Expand Down Expand Up @@ -518,110 +517,6 @@ def test_parse_keyword_spec(self):
self.assertEqual(str(cm.exception), message)


class TestCharacterEscapes(unittest.TestCase):
# Pygettext always escapes the following characters:
special_chars = {
'\\': r'\\',
'\t': r'\t',
'\r': r'\r',
'\n': r'\n',
'\"': r'\"',
}

def tearDownClass():
# Reset the global 'escapes' dict to the default
make_escapes(pass_nonascii=True)

def test_special_chars(self):
# special_chars are always escaped regardless of the
# --escape option
for pass_nonascii in (True, False):
make_escapes(pass_nonascii=pass_nonascii)
with self.subTest(pass_nonascii=pass_nonascii):
for char in self.special_chars:
self.assertEqual(pygettext.escape(char, encoding='utf-8'),
self.special_chars[char])

def _char_to_octal_escape(self, char):
"""Convert a character to its octal escape representation."""
return r"\%03o" % ord(char)

def _octal_escape_to_string(self, escaped):
"""Convert an octal escape representation to string."""
octal_escapes = re.findall(r'\\([0-7]{3})', escaped)
bytestr = bytes([int(n, 8) for n in octal_escapes])
return bytestr.decode('utf-8')

def test_not_escaped(self):
"""
Test escaping when the --escape is not used.

When --escape is not used, only some characters withing the ASCII
range are escaoped. Characters >= 128 are not escaped.
"""
# This is the same as invoking pygettext without
# the --escape option (the default behavior).
make_escapes(pass_nonascii=True)
# The encoding option is not used when --escape is not passed
encoding = 'foo'

# First 32 characters use octal escapes (except for special chars)
for i in range(32):
char = chr(i)
if char in self.special_chars:
continue
self.assertEqual(pygettext.escape(char, encoding=encoding),
self._char_to_octal_escape(char))

# Characters 32-126 are not escaped (except for special chars)
for i in range(32, 127):
char = chr(i)
if char in self.special_chars:
continue
self.assertEqual(pygettext.escape(char, encoding=encoding), char)

# chr(127) uses octal escape
self.assertEqual(pygettext.escape(chr(127), encoding=encoding),
'\\177')

# All characters >= 128 are not escaped
for i in range(128, 256):
char = chr(i)
self.assertEqual(pygettext.escape(char, encoding=encoding), char)


def test_escaped(self):
"""
Test escaping when --escape is used.

When --escape is used, all characters are escaped, including
"""
make_escapes(pass_nonascii=False)
encoding = 'utf-8'

# First 32 characters use octal escapes (except for special chars)
for i in range(32):
char = chr(i)
if char in self.special_chars:
continue
self.assertEqual(pygettext.escape(char, encoding=encoding),
self._char_to_octal_escape(char))

# Characters 32-126 are not escaped (except for special chars)
for i in range(32, 127):
char = chr(i)
if char in self.special_chars:
continue
self.assertEqual(pygettext.escape(char, encoding=encoding), char)

# Characters >= 127 are escaped
for i in range(127, 256):
char = chr(i)
escaped = pygettext.escape(char, encoding=encoding)
decoded_char = self._octal_escape_to_string(escaped)
self.assertEqual(char, decoded_char)


def extract_from_snapshots():
snapshots = {
'messages.py': (),
Expand All @@ -631,13 +526,21 @@ def extract_from_snapshots():
'custom_keywords.py': ('--keyword=foo', '--keyword=nfoo:1,2',
'--keyword=pfoo:1c,2',
'--keyword=npfoo:1c,2,3', '--keyword=_:1,2'),
# Test escaping non-ASCII characters
'escapes.py': ('--escape',),
# == Test character escaping
# Escape ascii and unicode:
'escapes.py': ('--escape', '--add-comments='),
# Escape only ascii and let unicode pass through:
('escapes.py', 'ascii-escapes.pot'): ('--add-comments=',),
}

for filename, args in snapshots.items():
input_file = DATA_DIR / filename
output_file = input_file.with_suffix('.pot')
if isinstance(filename, tuple):
filename, output_file = filename
output_file = DATA_DIR / output_file
input_file = DATA_DIR / filename
else:
input_file = DATA_DIR / filename
output_file = input_file.with_suffix('.pot')
contents = input_file.read_bytes()
with temp_cwd(None):
Path(input_file.name).write_bytes(contents)
Expand Down
Loading