Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions Lib/test/test_tools/i18n_data/custom_keywords.pot
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) YEAR ORGANIZATION
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
#
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"POT-Creation-Date: 2000-01-01 00:00+0000\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <[email protected]>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: pygettext.py 1.5\n"


#: custom_keywords.py:9 custom_keywords.py:10
msgid "bar"
msgstr ""

#: custom_keywords.py:12
msgid "cat"
msgid_plural "cats"
msgstr[0] ""
msgstr[1] ""

#: custom_keywords.py:13
msgid "dog"
msgid_plural "dogs"
msgstr[0] ""
msgstr[1] ""

#: custom_keywords.py:15
msgctxt "context"
msgid "bar"
msgstr ""

#: custom_keywords.py:17
msgctxt "context"
msgid "cat"
msgid_plural "cats"
msgstr[0] ""
msgstr[1] ""

30 changes: 30 additions & 0 deletions Lib/test/test_tools/i18n_data/custom_keywords.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from gettext import (
gettext as foo,
ngettext as nfoo,
pgettext as pfoo,
npgettext as npfoo,
gettext as bar,
)

foo('bar')
foo('bar', 'baz')

nfoo('cat', 'cats', 1)
nfoo('dog', 'dogs')

pfoo('context', 'bar')

npfoo('context', 'cat', 'cats', 1)

# This is an unknown keyword and should be ignored
bar('baz')

# 'nfoo' requires at least 2 arguments
nfoo('dog')

# 'pfoo' requires at least 2 arguments
pfoo('context')

# 'npfoo' requires at least 3 arguments
npfoo('context')
npfoo('context', 'cat')
80 changes: 63 additions & 17 deletions Lib/test/test_tools/test_i18n.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from pathlib import Path

from test.support.script_helper import assert_python_ok
from test.test_tools import skip_if_missing, toolsdir
from test.test_tools import imports_under_tool, skip_if_missing, toolsdir
from test.support.os_helper import temp_cwd, temp_dir


Expand All @@ -17,6 +17,10 @@
DATA_DIR = Path(__file__).resolve().parent / 'i18n_data'


with imports_under_tool("i18n"):
from pygettext import parse_spec


def normalize_POT_file(pot):
"""Normalize the POT creation timestamp, charset and
file locations to make the POT file easier to compare.
Expand Down Expand Up @@ -377,16 +381,8 @@ class _(object):

def test_pygettext_output(self):
"""Test that the pygettext output exactly matches snapshots."""
for input_file in DATA_DIR.glob('*.py'):
output_file = input_file.with_suffix('.pot')
with self.subTest(input_file=f'i18n_data/{input_file}'):
contents = input_file.read_text(encoding='utf-8')
with temp_cwd(None):
Path(input_file.name).write_text(contents)
assert_python_ok('-Xutf8', self.script, '--docstrings',
'--add-comments=i18n:', input_file.name)
output = Path('messages.pot').read_text(encoding='utf-8')

for input_file, output_file, output in extract_from_snapshots():
with self.subTest(input_file=input_file):
expected = output_file.read_text(encoding='utf-8')
self.assert_POT_equal(expected, output)

Expand Down Expand Up @@ -485,17 +481,67 @@ def test_comments_not_extracted_without_tags(self):
'''), raw=True)
self.assertNotIn('#.', data)


def update_POT_snapshots():
for input_file in DATA_DIR.glob('*.py'):
def test_parse_keyword_spec(self):
valid = (
('foo', ('foo', {0: 'msgid'})),
('foo:1', ('foo', {0: 'msgid'})),
('foo:1,2', ('foo', {0: 'msgid', 1: 'msgid_plural'})),
('foo:1, 2', ('foo', {0: 'msgid', 1: 'msgid_plural'})),
('foo:1,2c', ('foo', {0: 'msgid', 1: 'msgctxt'})),
('foo:2c,1', ('foo', {0: 'msgid', 1: 'msgctxt'})),
('foo:2c ,1', ('foo', {0: 'msgid', 1: 'msgctxt'})),
('foo:1,2,3c', ('foo', {0: 'msgid', 1: 'msgid_plural', 2: 'msgctxt'})),
('foo:1, 2, 3c', ('foo', {0: 'msgid', 1: 'msgid_plural', 2: 'msgctxt'})),
('foo:3c,1,2', ('foo', {0: 'msgid', 1: 'msgid_plural', 2: 'msgctxt'})),
)
for spec, expected in valid:
with self.subTest(spec=spec):
self.assertEqual(parse_spec(spec), expected)

invalid = (
('foo:', "Invalid keyword spec 'foo:': missing argument positions"),
('foo:bar', "Invalid keyword spec 'foo:bar': position is not an integer"),
('foo:0', "Invalid keyword spec 'foo:0': argument positions must be strictly positive"),
('foo:-2', "Invalid keyword spec 'foo:-2': argument positions must be strictly positive"),
('foo:1,1', "Invalid keyword spec 'foo:1,1': duplicate positions"),
('foo:1,2,1', "Invalid keyword spec 'foo:1,2,1': duplicate positions"),
('foo:1c,2,1c', "Invalid keyword spec 'foo:1c,2,1c': duplicate positions"),
('foo:1c,2,3c', "Invalid keyword spec 'foo:1c,2,3c': msgctxt can only appear once"),
('foo:1,2,3', "Invalid keyword spec 'foo:1,2,3': too many positions"),
('foo:1c', "Invalid keyword spec 'foo:1c': msgctxt cannot appear without msgid"),
)
for spec, message in invalid:
with self.subTest(spec=spec):
with self.assertRaises(ValueError) as cm:
parse_spec(spec)
self.assertEqual(str(cm.exception), message)


def extract_from_snapshots():
snapshots = {
'messages.py': (),
'fileloc.py': ('--docstrings',),
'docstrings.py': ('--docstrings',),
'comments.py': ('--add-comments=i18n:',),
'custom_keywords.py': ('--keyword=foo', '--keyword=nfoo:1,2',
'--keyword=pfoo:1c,2',
'--keyword=npfoo:1c,2,3'),
}

for filename, args in snapshots.items():
input_file = DATA_DIR / filename
output_file = input_file.with_suffix('.pot')
contents = input_file.read_bytes()
with temp_cwd(None):
Path(input_file.name).write_bytes(contents)
assert_python_ok('-Xutf8', Test_pygettext.script, '--docstrings',
'--add-comments=i18n:', input_file.name)
output = Path('messages.pot').read_text(encoding='utf-8')
assert_python_ok('-Xutf8', Test_pygettext.script, *args,
input_file.name)
yield (input_file, output_file,
Path('messages.pot').read_text(encoding='utf-8'))


def update_POT_snapshots():
for _, output_file, output in extract_from_snapshots():
output = normalize_POT_file(output)
output_file.write_text(output, encoding='utf-8')

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Extend support for specifying custom keywords in :program:`pygettext`.
88 changes: 87 additions & 1 deletion Tools/i18n/pygettext.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,88 @@ def getFilesForName(name):
}


def parse_spec(spec):
"""Parse a keyword spec string into a dictionary.

The keyword spec format defines the name of the gettext function and the
positions of the arguments that correspond to msgid, msgid_plural, and
msgctxt. The format is as follows:

name - the name of the gettext function, assumed to
have a single argument that is the msgid.
name:pos1 - the name of the gettext function and the position
of the msgid argument.
name:pos1,pos2 - the name of the gettext function and the positions
of the msgid and msgid_plural arguments.
name:pos1,pos2c - the name of the gettext function and the positions
of the msgid and msgctxt arguments.
name:pos1,pos2,pos3c - the name of the gettext function and the
positions of the msgid, msgid_plural, and
msgctxt arguments.

As an example, the spec 'foo:1,2,3c' means that the function foo has three
arguments, the first one is the msgid, the second one is the msgid_plural,
and the third one is the msgctxt. The positions are 1-based.

The msgctxt argument can appear in any position, but it can only appear
once. For example, the keyword specs 'foo:3c,1,2' and 'foo:1,2,3c' are
equivalent.

See https://www.gnu.org/software/gettext/manual/gettext.html
for more information.
"""
parts = spec.strip().split(':', 1)
if len(parts) == 1:
name = parts[0]
return name, {0: 'msgid'}

name, args = parts
if not args:
raise ValueError(f'Invalid keyword spec {spec!r}: '
'missing argument positions')

result = {}
for arg in args.split(','):
arg = arg.strip()
is_context = False
if arg.endswith('c'):
is_context = True
arg = arg[:-1]

try:
pos = int(arg) - 1
except ValueError as e:
raise ValueError(f'Invalid keyword spec {spec!r}: '
'position is not an integer') from e

if pos < 0:
raise ValueError(f'Invalid keyword spec {spec!r}: '
'argument positions must be strictly positive')

if pos in result.values():
raise ValueError(f'Invalid keyword spec {spec!r}: '
'duplicate positions')

if is_context:
if 'msgctxt' in result:
raise ValueError(f'Invalid keyword spec {spec!r}: '
'msgctxt can only appear once')
result['msgctxt'] = pos
elif 'msgid' not in result:
result['msgid'] = pos
elif 'msgid_plural' not in result:
result['msgid_plural'] = pos
else:
raise ValueError(f'Invalid keyword spec {spec!r}: '
'too many positions')

if 'msgid' not in result and 'msgctxt' in result:
raise ValueError(f'Invalid keyword spec {spec!r}: '
'msgctxt cannot appear without msgid')

return name, {v: k for k, v in result.items()}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be simpler to build result in that form from the beginning?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did that in d861c84, let me know if you like it better like that

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It was just a question. I am fine with both variants.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just wanted to let you see the difference :) I don't have a strong preference either, let's stick with the current version, then?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, I tried implementing some followup work on top of this PR (support for the t specifier, multiple keywords with the same funcname) and it's better to use the original representation because the diff in the followup PRs will be smaller.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I did the right thing by letting the PR lie down for two days. 😄

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, good call 🙂 And thanks for your super thorough reviews! It's really appreciated



@dataclass(frozen=True)
class Location:
filename: str
Expand Down Expand Up @@ -646,7 +728,11 @@ class Options:
make_escapes(not options.escape)

# calculate all keywords
options.keywords = {kw: {0: 'msgid'} for kw in options.keywords}
try:
options.keywords = dict(parse_spec(spec) for spec in options.keywords)
except ValueError as e:
print(e, file=sys.stderr)
sys.exit(1)
if not no_default_keywords:
options.keywords |= DEFAULTKEYWORDS

Expand Down
Loading