Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions Lib/test/test_tools/i18n_data/custom_keywords.pot
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) YEAR ORGANIZATION
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
#
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"POT-Creation-Date: 2000-01-01 00:00+0000\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <[email protected]>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: pygettext.py 1.5\n"


#: custom_keywords.py:9 custom_keywords.py:10
msgid "bar"
msgstr ""

#: custom_keywords.py:12
msgid "cat"
msgid_plural "cats"
msgstr[0] ""
msgstr[1] ""

#: custom_keywords.py:13
msgid "dog"
msgid_plural "dogs"
msgstr[0] ""
msgstr[1] ""

#: custom_keywords.py:15
msgctxt "context"
msgid "bar"
msgstr ""

#: custom_keywords.py:17
msgctxt "context"
msgid "cat"
msgid_plural "cats"
msgstr[0] ""
msgstr[1] ""

30 changes: 30 additions & 0 deletions Lib/test/test_tools/i18n_data/custom_keywords.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from gettext import (
gettext as foo,
ngettext as nfoo,
pgettext as pfoo,
npgettext as npfoo,
gettext as bar,
)

foo('bar')
foo('bar', 'baz')

nfoo('cat', 'cats', 1)
nfoo('dog', 'dogs')

pfoo('context', 'bar')

npfoo('context', 'cat', 'cats', 1)

# This is an unknown keyword and should be ignored
bar('baz')

# 'nfoo' requires at least 2 arguments
nfoo('dog')

# 'pfoo' requires at least 2 arguments
pfoo('context')

# 'npfoo' requires at least 3 arguments
npfoo('context')
npfoo('context', 'cat')
80 changes: 63 additions & 17 deletions Lib/test/test_tools/test_i18n.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from pathlib import Path

from test.support.script_helper import assert_python_ok
from test.test_tools import skip_if_missing, toolsdir
from test.test_tools import imports_under_tool, skip_if_missing, toolsdir
from test.support.os_helper import temp_cwd, temp_dir


Expand All @@ -17,6 +17,10 @@
DATA_DIR = Path(__file__).resolve().parent / 'i18n_data'


with imports_under_tool("i18n"):
from pygettext import parse_spec


def normalize_POT_file(pot):
"""Normalize the POT creation timestamp, charset and
file locations to make the POT file easier to compare.
Expand Down Expand Up @@ -377,16 +381,8 @@ class _(object):

def test_pygettext_output(self):
"""Test that the pygettext output exactly matches snapshots."""
for input_file in DATA_DIR.glob('*.py'):
output_file = input_file.with_suffix('.pot')
with self.subTest(input_file=f'i18n_data/{input_file}'):
contents = input_file.read_text(encoding='utf-8')
with temp_cwd(None):
Path(input_file.name).write_text(contents)
assert_python_ok('-Xutf8', self.script, '--docstrings',
'--add-comments=i18n:', input_file.name)
output = Path('messages.pot').read_text(encoding='utf-8')

for input_file, output_file, output in extract_from_snapshots():
with self.subTest(input_file=input_file):
expected = output_file.read_text(encoding='utf-8')
self.assert_POT_equal(expected, output)

Expand Down Expand Up @@ -485,17 +481,67 @@ def test_comments_not_extracted_without_tags(self):
'''), raw=True)
self.assertNotIn('#.', data)


def update_POT_snapshots():
for input_file in DATA_DIR.glob('*.py'):
def test_parse_keyword_spec(self):
valid = (
('foo', ('foo', {0: 'msgid'})),
('foo:1', ('foo', {0: 'msgid'})),
('foo:1,2', ('foo', {0: 'msgid', 1: 'msgid_plural'})),
('foo:1, 2', ('foo', {0: 'msgid', 1: 'msgid_plural'})),
('foo:1,2c', ('foo', {0: 'msgid', 1: 'msgctxt'})),
('foo:2c,1', ('foo', {0: 'msgid', 1: 'msgctxt'})),
('foo:2c ,1', ('foo', {0: 'msgid', 1: 'msgctxt'})),
('foo:1,2,3c', ('foo', {0: 'msgid', 1: 'msgid_plural', 2: 'msgctxt'})),
('foo:1, 2, 3c', ('foo', {0: 'msgid', 1: 'msgid_plural', 2: 'msgctxt'})),
('foo:3c,1,2', ('foo', {0: 'msgid', 1: 'msgid_plural', 2: 'msgctxt'})),
)
for spec, expected in valid:
with self.subTest(spec=spec):
self.assertEqual(parse_spec(spec), expected)

invalid = (
('foo:', "Invalid keyword spec 'foo:': missing argument positions"),
('foo:bar', "Invalid keyword spec 'foo:bar': position is not an integer"),
('foo:0', "Invalid keyword spec 'foo:0': argument positions must be strictly positive"),
('foo:-2', "Invalid keyword spec 'foo:-2': argument positions must be strictly positive"),
('foo:1,1', "Invalid keyword spec 'foo:1,1': duplicate positions"),
('foo:1,2,1', "Invalid keyword spec 'foo:1,2,1': duplicate positions"),
('foo:1c,2,1c', "Invalid keyword spec 'foo:1c,2,1c': duplicate positions"),
('foo:1c,2,3c', "Invalid keyword spec 'foo:1c,2,3c': msgctxt can only appear once"),
('foo:1,2,3', "Invalid keyword spec 'foo:1,2,3': too many positions"),
('foo:1c', "Invalid keyword spec 'foo:1c': msgctxt cannot appear without msgid"),
)
for spec, message in invalid:
with self.subTest(spec=spec):
with self.assertRaises(ValueError) as cm:
parse_spec(spec)
self.assertEqual(str(cm.exception), message)


def extract_from_snapshots():
snapshots = {
'messages.py': (),
'fileloc.py': ('--docstrings',),
'docstrings.py': ('--docstrings',),
'comments.py': ('--add-comments=i18n:',),
'custom_keywords.py': ('--keyword=foo', '--keyword=nfoo:1,2',
'--keyword=pfoo:1c,2',
'--keyword=npfoo:1c,2,3'),
}

for filename, args in snapshots.items():
input_file = DATA_DIR / filename
output_file = input_file.with_suffix('.pot')
contents = input_file.read_bytes()
with temp_cwd(None):
Path(input_file.name).write_bytes(contents)
assert_python_ok('-Xutf8', Test_pygettext.script, '--docstrings',
'--add-comments=i18n:', input_file.name)
output = Path('messages.pot').read_text(encoding='utf-8')
assert_python_ok('-Xutf8', Test_pygettext.script, *args,
input_file.name)
yield (input_file, output_file,
Path('messages.pot').read_text(encoding='utf-8'))


def update_POT_snapshots():
for _, output_file, output in extract_from_snapshots():
output = normalize_POT_file(output)
output_file.write_text(output, encoding='utf-8')

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Extend support for specifying custom keywords in :program:`pygettext`.
87 changes: 86 additions & 1 deletion Tools/i18n/pygettext.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,88 @@ def getFilesForName(name):
}


def parse_spec(spec):
"""Parse a keyword spec string into a dictionary.

The keyword spec format defines the name of the gettext function and the
positions of the arguments that correspond to msgid, msgid_plural, and
msgctxt. The format is as follows:

name - the name of the gettext function, assumed to
have a single argument that is the msgid.
name:pos1 - the name of the gettext function and the position
of the msgid argument.
name:pos1,pos2 - the name of the gettext function and the positions
of the msgid and msgid_plural arguments.
name:pos1,pos2c - the name of the gettext function and the positions
of the msgid and msgctxt arguments.
name:pos1,pos2,pos3c - the name of the gettext function and the
positions of the msgid, msgid_plural, and
msgctxt arguments.

As an example, the spec 'foo:1,2,3c' means that the function foo has three
arguments, the first one is the msgid, the second one is the msgid_plural,
and the third one is the msgctxt. The positions are 1-based.

The msgctxt argument can appear in any position, but it can only appear
once. For example, the keyword specs 'foo:3c,1,2' and 'foo:1,2,3c' are
equivalent.

See https://www.gnu.org/software/gettext/manual/gettext.html
for more information.
"""
parts = spec.strip().split(':', 1)
if len(parts) == 1:
name = parts[0]
return name, {0: 'msgid'}

name, args = parts
if not args:
raise ValueError(f'Invalid keyword spec {spec!r}: '
'missing argument positions')

result = {}
for arg in args.split(','):
arg = arg.strip()
is_context = False
if arg.endswith('c'):
is_context = True
arg = arg[:-1]

try:
pos = int(arg) - 1
except ValueError as e:
raise ValueError(f'Invalid keyword spec {spec!r}: '
'position is not an integer') from e

if pos < 0:
raise ValueError(f'Invalid keyword spec {spec!r}: '
'argument positions must be strictly positive')

if pos in result:
raise ValueError(f'Invalid keyword spec {spec!r}: '
'duplicate positions')

if is_context:
if 'msgctxt' in result.values():
raise ValueError(f'Invalid keyword spec {spec!r}: '
'msgctxt can only appear once')
result[pos] = 'msgctxt'
elif 'msgid' not in result.values():
result[pos] = 'msgid'
elif 'msgid_plural' not in result.values():
result[pos] = 'msgid_plural'
else:
raise ValueError(f'Invalid keyword spec {spec!r}: '
'too many positions')

if 'msgid' not in result.values() and 'msgctxt' in result.values():
raise ValueError(f'Invalid keyword spec {spec!r}: '
'msgctxt cannot appear without msgid')

return name, result


@dataclass(frozen=True)
class Location:
filename: str
Expand Down Expand Up @@ -646,7 +728,10 @@ class Options:
make_escapes(not options.escape)

# calculate all keywords
options.keywords = {kw: {0: 'msgid'} for kw in options.keywords}
try:
options.keywords = dict(parse_spec(spec) for spec in options.keywords)
except ValueError as e:
raise SystemExit(e)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Other errors cause print() + sys.exit(1).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed, though I believe raise SystemExit(s) is functionally equivalent to print(..., file=sys.stderr) + sys.exit(1) and since it's shorter and the intent is clearer, I thought I'd start using that instead.

Though if you prefer to be consistent, I can change it to print+sys.exit?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it just for consistency (also, sys.exit() allows to set different return codes, but this is not used here).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated in 18d29cb to use print+sys.exit

if not no_default_keywords:
options.keywords |= DEFAULTKEYWORDS

Expand Down
Loading