Skip to content

Commit c9f657c

Browse files
committed
Test pygettext --escape option
1 parent 46ada1e commit c9f657c

File tree

6 files changed

+159
-2
lines changed

6 files changed

+159
-2
lines changed
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# SOME DESCRIPTIVE TITLE.
2+
# Copyright (C) YEAR ORGANIZATION
3+
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
4+
#
5+
msgid ""
6+
msgstr ""
7+
"Project-Id-Version: PACKAGE VERSION\n"
8+
"POT-Creation-Date: 2000-01-01 00:00+0000\n"
9+
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
10+
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
11+
"Language-Team: LANGUAGE <[email protected]>\n"
12+
"MIME-Version: 1.0\n"
13+
"Content-Type: text/plain; charset=UTF-8\n"
14+
"Content-Transfer-Encoding: 8bit\n"
15+
"Generated-By: pygettext.py 1.5\n"
16+
17+
18+
#: escapes.py:4
19+
msgid "ascii"
20+
msgstr ""
21+
22+
#: escapes.py:6
23+
msgid "\304\233 \305\241 \304\215 \305\231"
24+
msgstr ""
25+
26+
#: escapes.py:8
27+
msgid "\316\261 \316\262 \316\263 \316\264"
28+
msgstr ""
29+
30+
#: escapes.py:10
31+
msgid "\343\204\261 \343\204\262 \343\204\264 \343\204\267"
32+
msgstr ""
33+
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
import gettext as _
2+
3+
4+
_('ascii')
5+
6+
_('ě š č ř')
7+
8+
_('α β γ δ')
9+
10+
_('ㄱ ㄲ ㄴ ㄷ')

Lib/test/test_tools/i18n_data/messages.pot

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,3 +97,7 @@ msgid_plural "worlds"
9797
msgstr[0] ""
9898
msgstr[1] ""
9999

100+
#: messages.py:122
101+
msgid "α β γ δ"
102+
msgstr ""
103+

Lib/test/test_tools/i18n_data/messages.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,3 +117,6 @@ def _(x="don't extract me"):
117117
# f-strings
118118
f"Hello, {_('world')}!"
119119
f"Hello, {ngettext('world', 'worlds', 3)}!"
120+
121+
# non-ascii
122+
_("α β γ δ")

Lib/test/test_tools/test_i18n.py

Lines changed: 108 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818

1919

2020
with imports_under_tool("i18n"):
21-
from pygettext import parse_spec
21+
import pygettext
22+
from pygettext import make_escapes, parse_spec
2223

2324

2425
def normalize_POT_file(pot):
@@ -517,6 +518,110 @@ def test_parse_keyword_spec(self):
517518
self.assertEqual(str(cm.exception), message)
518519

519520

521+
class TestCharacterEscapes(unittest.TestCase):
522+
# Pygettext always escapes the following characters:
523+
special_chars = {
524+
'\\': r'\\',
525+
'\t': r'\t',
526+
'\r': r'\r',
527+
'\n': r'\n',
528+
'\"': r'\"',
529+
}
530+
531+
def tearDownClass():
532+
# Reset the global 'escapes' dict to the default
533+
make_escapes(pass_nonascii=True)
534+
535+
def test_special_chars(self):
536+
# special_chars are always escaped regardless of the
537+
# --escape option
538+
for pass_nonascii in (True, False):
539+
make_escapes(pass_nonascii=pass_nonascii)
540+
with self.subTest(pass_nonascii=pass_nonascii):
541+
for char in self.special_chars:
542+
self.assertEqual(pygettext.escape(char, encoding='utf-8'),
543+
self.special_chars[char])
544+
545+
def _char_to_octal_escape(self, char):
546+
"""Convert a character to its octal escape representation."""
547+
return r"\%03o" % ord(char)
548+
549+
def _octal_escape_to_string(self, escaped):
550+
"""Convert an octal escape representation to string."""
551+
octal_escapes = re.findall(r'\\([0-7]{3})', escaped)
552+
bytestr = bytes([int(n, 8) for n in octal_escapes])
553+
return bytestr.decode('utf-8')
554+
555+
def test_not_escaped(self):
556+
"""
557+
Test escaping when the --escape is not used.
558+
559+
When --escape is not used, only some characters withing the ASCII
560+
range are escaoped. Characters >= 128 are not escaped.
561+
"""
562+
# This is the same as invoking pygettext without
563+
# the --escape option (the default behavior).
564+
make_escapes(pass_nonascii=True)
565+
# The encoding option is not used when --escape is not passed
566+
encoding = 'foo'
567+
568+
# First 32 characters use octal escapes (except for special chars)
569+
for i in range(32):
570+
char = chr(i)
571+
if char in self.special_chars:
572+
continue
573+
self.assertEqual(pygettext.escape(char, encoding=encoding),
574+
self._char_to_octal_escape(char))
575+
576+
# Characters 32-126 are not escaped (except for special chars)
577+
for i in range(32, 127):
578+
char = chr(i)
579+
if char in self.special_chars:
580+
continue
581+
self.assertEqual(pygettext.escape(char, encoding=encoding), char)
582+
583+
# chr(127) uses octal escape
584+
self.assertEqual(pygettext.escape(chr(127), encoding=encoding),
585+
'\\177')
586+
587+
# All characters >= 128 are not escaped
588+
for i in range(128, 256):
589+
char = chr(i)
590+
self.assertEqual(pygettext.escape(char, encoding=encoding), char)
591+
592+
593+
def test_escaped(self):
594+
"""
595+
Test escaping when --escape is used.
596+
597+
When --escape is used, all characters are escaped, including
598+
"""
599+
make_escapes(pass_nonascii=False)
600+
encoding = 'utf-8'
601+
602+
# First 32 characters use octal escapes (except for special chars)
603+
for i in range(32):
604+
char = chr(i)
605+
if char in self.special_chars:
606+
continue
607+
self.assertEqual(pygettext.escape(char, encoding=encoding),
608+
self._char_to_octal_escape(char))
609+
610+
# Characters 32-126 are not escaped (except for special chars)
611+
for i in range(32, 127):
612+
char = chr(i)
613+
if char in self.special_chars:
614+
continue
615+
self.assertEqual(pygettext.escape(char, encoding=encoding), char)
616+
617+
# Characters >= 127 are escaped
618+
for i in range(127, 256):
619+
char = chr(i)
620+
escaped = pygettext.escape(char, encoding=encoding)
621+
decoded_char = self._octal_escape_to_string(escaped)
622+
self.assertEqual(char, decoded_char)
623+
624+
520625
def extract_from_snapshots():
521626
snapshots = {
522627
'messages.py': (),
@@ -526,6 +631,8 @@ def extract_from_snapshots():
526631
'custom_keywords.py': ('--keyword=foo', '--keyword=nfoo:1,2',
527632
'--keyword=pfoo:1c,2',
528633
'--keyword=npfoo:1c,2,3', '--keyword=_:1,2'),
634+
# Test escaping non-ASCII characters
635+
'escapes.py': ('--escape',),
529636
}
530637

531638
for filename, args in snapshots.items():

Tools/i18n/pygettext.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ def make_escapes(pass_nonascii):
188188
global escapes, escape
189189
if pass_nonascii:
190190
# Allow non-ascii characters to pass through so that e.g. 'msgid
191-
# "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we
191+
# "Höhe"' would not result in 'msgid "H\366he"'. Otherwise we
192192
# escape any character outside the 32..126 range.
193193
mod = 128
194194
escape = escape_ascii

0 commit comments

Comments
 (0)