Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions Lib/test/test_tools/i18n_data/escapes.pot
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) YEAR ORGANIZATION
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
#
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"POT-Creation-Date: 2000-01-01 00:00+0000\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <[email protected]>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: pygettext.py 1.5\n"


#: escapes.py:4
msgid "ascii"
msgstr ""

#: escapes.py:6
msgid "\304\233 \305\241 \304\215 \305\231"
msgstr ""

#: escapes.py:8
msgid "\316\261 \316\262 \316\263 \316\264"
msgstr ""

#: escapes.py:10
msgid "\343\204\261 \343\204\262 \343\204\264 \343\204\267"
msgstr ""

10 changes: 10 additions & 0 deletions Lib/test/test_tools/i18n_data/escapes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import gettext as _


_('ascii')

_('ě š č ř')

_('α β γ δ')

_('ㄱ ㄲ ㄴ ㄷ')
4 changes: 4 additions & 0 deletions Lib/test/test_tools/i18n_data/messages.pot
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,7 @@ msgid_plural "worlds"
msgstr[0] ""
msgstr[1] ""

#: messages.py:122
msgid "α β γ δ"
msgstr ""

3 changes: 3 additions & 0 deletions Lib/test/test_tools/i18n_data/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,6 @@ def _(x="don't extract me"):
# f-strings
f"Hello, {_('world')}!"
f"Hello, {ngettext('world', 'worlds', 3)}!"

# non-ascii
_("α β γ δ")
109 changes: 108 additions & 1 deletion Lib/test/test_tools/test_i18n.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@


with imports_under_tool("i18n"):
from pygettext import parse_spec
import pygettext
from pygettext import make_escapes, parse_spec


def normalize_POT_file(pot):
Expand Down Expand Up @@ -517,6 +518,110 @@ def test_parse_keyword_spec(self):
self.assertEqual(str(cm.exception), message)


class TestCharacterEscapes(unittest.TestCase):
# Pygettext always escapes the following characters:
special_chars = {
'\\': r'\\',
'\t': r'\t',
'\r': r'\r',
'\n': r'\n',
'\"': r'\"',
}

def tearDownClass():
# Reset the global 'escapes' dict to the default
make_escapes(pass_nonascii=True)

def test_special_chars(self):
# special_chars are always escaped regardless of the
# --escape option
for pass_nonascii in (True, False):
make_escapes(pass_nonascii=pass_nonascii)
with self.subTest(pass_nonascii=pass_nonascii):
for char in self.special_chars:
self.assertEqual(pygettext.escape(char, encoding='utf-8'),
self.special_chars[char])

def _char_to_octal_escape(self, char):
"""Convert a character to its octal escape representation."""
return r"\%03o" % ord(char)

def _octal_escape_to_string(self, escaped):
"""Convert an octal escape representation to string."""
octal_escapes = re.findall(r'\\([0-7]{3})', escaped)
bytestr = bytes([int(n, 8) for n in octal_escapes])
return bytestr.decode('utf-8')

def test_not_escaped(self):
"""
Test escaping when the --escape is not used.

When --escape is not used, only some characters withing the ASCII
range are escaoped. Characters >= 128 are not escaped.
"""
# This is the same as invoking pygettext without
# the --escape option (the default behavior).
make_escapes(pass_nonascii=True)
# The encoding option is not used when --escape is not passed
encoding = 'foo'

# First 32 characters use octal escapes (except for special chars)
for i in range(32):
char = chr(i)
if char in self.special_chars:
continue
self.assertEqual(pygettext.escape(char, encoding=encoding),
self._char_to_octal_escape(char))

# Characters 32-126 are not escaped (except for special chars)
for i in range(32, 127):
char = chr(i)
if char in self.special_chars:
continue
self.assertEqual(pygettext.escape(char, encoding=encoding), char)

# chr(127) uses octal escape
self.assertEqual(pygettext.escape(chr(127), encoding=encoding),
'\\177')

# All characters >= 128 are not escaped
for i in range(128, 256):
char = chr(i)
self.assertEqual(pygettext.escape(char, encoding=encoding), char)


def test_escaped(self):
"""
Test escaping when --escape is used.

When --escape is used, all characters are escaped, including
"""
make_escapes(pass_nonascii=False)
encoding = 'utf-8'

# First 32 characters use octal escapes (except for special chars)
for i in range(32):
char = chr(i)
if char in self.special_chars:
continue
self.assertEqual(pygettext.escape(char, encoding=encoding),
self._char_to_octal_escape(char))

# Characters 32-126 are not escaped (except for special chars)
for i in range(32, 127):
char = chr(i)
if char in self.special_chars:
continue
self.assertEqual(pygettext.escape(char, encoding=encoding), char)

# Characters >= 127 are escaped
for i in range(127, 256):
char = chr(i)
escaped = pygettext.escape(char, encoding=encoding)
decoded_char = self._octal_escape_to_string(escaped)
self.assertEqual(char, decoded_char)


def extract_from_snapshots():
snapshots = {
'messages.py': (),
Expand All @@ -526,6 +631,8 @@ def extract_from_snapshots():
'custom_keywords.py': ('--keyword=foo', '--keyword=nfoo:1,2',
'--keyword=pfoo:1c,2',
'--keyword=npfoo:1c,2,3', '--keyword=_:1,2'),
# Test escaping non-ASCII characters
'escapes.py': ('--escape',),
}

for filename, args in snapshots.items():
Expand Down
2 changes: 1 addition & 1 deletion Tools/i18n/pygettext.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def make_escapes(pass_nonascii):
global escapes, escape
if pass_nonascii:
# Allow non-ascii characters to pass through so that e.g. 'msgid
# "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we
# "Höhe"' would not result in 'msgid "H\366he"'. Otherwise we
# escape any character outside the 32..126 range.
mod = 128
escape = escape_ascii
Expand Down
Loading