From 74b023508f6f62bd706ae393b677702f32838320 Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Sun, 9 Feb 2025 17:38:34 +0100 Subject: [PATCH 01/10] Support custom keywords --- .../test_tools/i18n_data/custom_keywords.pot | 45 +++++++++ .../test_tools/i18n_data/custom_keywords.py | 30 ++++++ Lib/test/test_tools/test_i18n.py | 82 +++++++++++++---- Tools/i18n/pygettext.py | 92 ++++++++++++++++++- 4 files changed, 229 insertions(+), 20 deletions(-) create mode 100644 Lib/test/test_tools/i18n_data/custom_keywords.pot create mode 100644 Lib/test/test_tools/i18n_data/custom_keywords.py diff --git a/Lib/test/test_tools/i18n_data/custom_keywords.pot b/Lib/test/test_tools/i18n_data/custom_keywords.pot new file mode 100644 index 00000000000000..48df2e7f579cc7 --- /dev/null +++ b/Lib/test/test_tools/i18n_data/custom_keywords.pot @@ -0,0 +1,45 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR ORGANIZATION +# FIRST AUTHOR , YEAR. +# +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"POT-Creation-Date: 2000-01-01 00:00+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: pygettext.py 1.5\n" + + +#: custom_keywords.py:9 custom_keywords.py:10 +msgid "bar" +msgstr "" + +#: custom_keywords.py:12 +msgid "cat" +msgid_plural "cats" +msgstr[0] "" +msgstr[1] "" + +#: custom_keywords.py:13 +msgid "dog" +msgid_plural "dogs" +msgstr[0] "" +msgstr[1] "" + +#: custom_keywords.py:15 +msgctxt "context" +msgid "bar" +msgstr "" + +#: custom_keywords.py:17 +msgctxt "context" +msgid "cat" +msgid_plural "cats" +msgstr[0] "" +msgstr[1] "" + diff --git a/Lib/test/test_tools/i18n_data/custom_keywords.py b/Lib/test/test_tools/i18n_data/custom_keywords.py new file mode 100644 index 00000000000000..8359fd96f3614b --- /dev/null +++ b/Lib/test/test_tools/i18n_data/custom_keywords.py @@ -0,0 +1,30 @@ +from gettext import ( + gettext as foo, + ngettext as nfoo, + pgettext as pfoo, + npgettext as npfoo, + gettext as bar, +) + +foo('bar') +foo('bar', 'baz') + +nfoo('cat', 'cats', 1) +nfoo('dog', 'dogs') + +pfoo('context', 'bar') + +npfoo('context', 'cat', 'cats', 1) + +# This is an unknown keyword and should be ignored +bar('baz') + +# 'nfoo' requires at least 2 arguments +nfoo('dog') + +# 'pfoo' requires at least 2 arguments +pfoo('context') + +# 'npfoo' requires at least 3 arguments +npfoo('context') +npfoo('context', 'cat') diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py index d97fdb116fcd19..c0c7cf7f04999c 100644 --- a/Lib/test/test_tools/test_i18n.py +++ b/Lib/test/test_tools/test_i18n.py @@ -8,7 +8,7 @@ from pathlib import Path from test.support.script_helper import assert_python_ok -from test.test_tools import skip_if_missing, toolsdir +from test.test_tools import imports_under_tool, skip_if_missing, toolsdir from test.support.os_helper import temp_cwd, temp_dir @@ -17,6 +17,10 @@ DATA_DIR = Path(__file__).resolve().parent / 'i18n_data' +with imports_under_tool("i18n"): + from pygettext import parse_spec + + def normalize_POT_file(pot): """Normalize the POT creation timestamp, charset and file locations to make the POT file easier to compare. @@ -377,16 +381,8 @@ class _(object): def test_pygettext_output(self): """Test that the pygettext output exactly matches snapshots.""" - for input_file in DATA_DIR.glob('*.py'): - output_file = input_file.with_suffix('.pot') - with self.subTest(input_file=f'i18n_data/{input_file}'): - contents = input_file.read_text(encoding='utf-8') - with temp_cwd(None): - Path(input_file.name).write_text(contents) - assert_python_ok('-Xutf8', self.script, '--docstrings', - '--add-comments=i18n:', input_file.name) - output = Path('messages.pot').read_text(encoding='utf-8') - + for input_file, output_file, output in extract_from_snapshots(): + with self.subTest(input_file=input_file): expected = output_file.read_text(encoding='utf-8') self.assert_POT_equal(expected, output) @@ -485,17 +481,69 @@ def test_comments_not_extracted_without_tags(self): '''), raw=True) self.assertNotIn('#.', data) - -def update_POT_snapshots(): - for input_file in DATA_DIR.glob('*.py'): + def test_parse_keyword_spec(self): + valid = ( + ('foo', ('foo', {0: 'msgid'})), + ('foo:1', ('foo', {0: 'msgid'})), + ('foo:1,2', ('foo', {0: 'msgid', 1: 'msgid_plural'})), + ('foo:1, 2', ('foo', {0: 'msgid', 1: 'msgid_plural'})), + ('foo:1,2c', ('foo', {0: 'msgid', 1: 'msgctxt'})), + ('foo:2c,1', ('foo', {0: 'msgid', 1: 'msgctxt'})), + ('foo:2c ,1', ('foo', {0: 'msgid', 1: 'msgctxt'})), + ('foo:1,2,3c', ('foo', {0: 'msgid', 1: 'msgid_plural', 2: 'msgctxt'})), + ('foo:1, 2, 3c', ('foo', {0: 'msgid', 1: 'msgid_plural', 2: 'msgctxt'})), + ('foo:3c,1,2', ('foo', {0: 'msgid', 1: 'msgid_plural', 2: 'msgctxt'})), + ) + for spec, expected in valid: + with self.subTest(spec=spec): + self.assertEqual(parse_spec(spec), expected) + + invalid = ( + ('foo:', "Invalid keyword spec 'foo:': missing argument positions"), + ('foo:bar', "Invalid keyword spec 'foo:bar': position is not an integer"), + ('foo:0', "Invalid keyword spec 'foo:0': argument positions must be strictly positive"), + ('foo:-2', "Invalid keyword spec 'foo:-2': argument positions must be strictly positive"), + ('foo:1,1', "Invalid keyword spec 'foo:1,1': duplicate positions"), + ('foo:1,2,1', "Invalid keyword spec 'foo:1,2,1': duplicate positions"), + ('foo:1c,2,1c', "Invalid keyword spec 'foo:1c,2,1c': duplicate positions"), + ('foo:1c,2,3c', "Invalid keyword spec 'foo:1c,2,3c': msgctxt can only appear once"), + ('foo:1,2,3', "Invalid keyword spec 'foo:1,2,3': too many positions"), + ('foo:1c', "Invalid keyword spec 'foo:1c': msgctxt cannot appear without msgid"), + ) + for spec, message in invalid: + with self.subTest(spec=spec): + with self.assertRaises(ValueError) as cm: + parse_spec(spec) + self.assertEqual(str(cm.exception), message) + + +def extract_from_snapshots(): + snapshots = { + 'messages.py': (), + 'fileloc.py': ('--docstrings',), + 'docstrings.py': ('--docstrings',), + 'comments.py': ('--add-comments=i18n:',), + 'custom_keywords.py': ('--keyword=foo', '--keyword=nfoo:1,2', + '--keyword=pfoo:1c,2', + '--keyword=npfoo:1c,2,3'), + } + + for filename, args in snapshots.items(): + input_file = DATA_DIR / filename output_file = input_file.with_suffix('.pot') contents = input_file.read_bytes() with temp_cwd(None): Path(input_file.name).write_bytes(contents) - assert_python_ok('-Xutf8', Test_pygettext.script, '--docstrings', - '--add-comments=i18n:', input_file.name) - output = Path('messages.pot').read_text(encoding='utf-8') + assert_python_ok('-Xutf8', Test_pygettext.script, *args, + input_file.name) + yield ( + input_file, + output_file, Path('messages.pot').read_text(encoding='utf-8') + ) + +def update_POT_snapshots(): + for _, output_file, output in extract_from_snapshots(): output = normalize_POT_file(output) output_file.write_text(output, encoding='utf-8') diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index 4681c84387958e..293cbcecc606f1 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -294,6 +294,89 @@ def getFilesForName(name): } +def parse_spec(spec): + """Parse a keyword spec string into a dictionary. + + The keyword spec format defines the name of the gettext function and the + positions of the arguments that correspond to msgid, msgid_plural, and + msgctxt. The format is as follows: + + name - the name of the gettext function, assumed to + have a single argument that is the msgid. + name:pos1 - the name of the gettext function and the position + of the msgid argument. + name:pos1,pos2 - the name of the gettext function and the positions + of the msgid and msgid_plural arguments. + name:pos1,pos2c - the name of the gettext function and the positions + of the msgid and msgctxt arguments. + name:pos1,pos2,pos3c - the name of the gettext function and the + positions of the msgid, msgid_plural, and + msgctxt arguments. + + As an example, the spec 'foo:1,2,3c' means that the function foo has three + arguments, the first one is the msgid, the second one is the msgid_plural, + and the third one is the msgctxt. The positions are 1-based. + + The msgctxt argument can appear in any position, but it can only appear + once. For example, the keyword specs 'foo:3c,1,2' and 'foo:1,2,3c' are + equivalent. + + See https://www.gnu.org/software/gettext/manual/gettext.html + for more information. + """ + parts = spec.strip().split(':', 1) + if len(parts) == 1: + name = parts[0] + return name, {0: 'msgid'} + + name, args = parts + if not args: + raise ValueError(f'Invalid keyword spec {spec!r}: ' + 'missing argument positions') + + result = {} + for arg in args.split(','): + arg = arg.strip() + is_context = False + if arg.endswith('c'): + is_context = True + arg = arg[:-1] + + try: + pos = int(arg) - 1 + except ValueError as e: + raise ValueError(f'Invalid keyword spec {spec!r}: ' + 'position is not an integer') from e + + if pos < 0: + raise ValueError(f'Invalid keyword spec {spec!r}: ' + 'argument positions must be strictly positive') + + for k, v in result.items(): + if v == pos: + raise ValueError(f'Invalid keyword spec {spec!r}: ' + 'duplicate positions') + + if is_context: + if 'msgctxt' in result: + raise ValueError(f'Invalid keyword spec {spec!r}: ' + 'msgctxt can only appear once') + result['msgctxt'] = pos + elif 'msgid' not in result: + result['msgid'] = pos + elif 'msgid_plural' not in result: + result['msgid_plural'] = pos + else: + raise ValueError(f'Invalid keyword spec {spec!r}: ' + 'too many positions') + + if 'msgid' not in result and 'msgctxt' in result: + raise ValueError(f'Invalid keyword spec {spec!r}: ' + 'msgctxt cannot appear without msgid') + + return name, {v: k for k, v in result.items()} + + @dataclass(frozen=True) class Location: filename: str @@ -568,7 +651,7 @@ class Options: # defaults extractall = 0 # FIXME: currently this option has no effect at all. escape = 0 - keywords = [] + keywords = set() outpath = '' outfile = 'messages.pot' writelocations = 1 @@ -602,7 +685,7 @@ class Options: elif opt in ('-D', '--docstrings'): options.docstrings = 1 elif opt in ('-k', '--keyword'): - options.keywords.append(arg) + options.keywords.add(arg) elif opt in ('-K', '--no-default-keywords'): no_default_keywords = True elif opt in ('-n', '--add-location'): @@ -646,7 +729,10 @@ class Options: make_escapes(not options.escape) # calculate all keywords - options.keywords = {kw: {0: 'msgid'} for kw in options.keywords} + try: + options.keywords = dict(parse_spec(spec) for spec in options.keywords) + except ValueError as e: + raise SystemExit(e) if not no_default_keywords: options.keywords |= DEFAULTKEYWORDS From 619cad51c124750522ff0dadf51c0c823c3bfa4f Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Sat, 22 Feb 2025 18:08:48 +0100 Subject: [PATCH 02/10] Add news entry --- .../Tools-Demos/2025-02-22-18-08-35.gh-issue-130453.njRXG8.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Tools-Demos/2025-02-22-18-08-35.gh-issue-130453.njRXG8.rst diff --git a/Misc/NEWS.d/next/Tools-Demos/2025-02-22-18-08-35.gh-issue-130453.njRXG8.rst b/Misc/NEWS.d/next/Tools-Demos/2025-02-22-18-08-35.gh-issue-130453.njRXG8.rst new file mode 100644 index 00000000000000..cb7b3d4cbdc8e1 --- /dev/null +++ b/Misc/NEWS.d/next/Tools-Demos/2025-02-22-18-08-35.gh-issue-130453.njRXG8.rst @@ -0,0 +1 @@ +Extend support for specifying custom keywords in :program:`pygettext`. From 184232eb1a0c329e2dc7b5f93264fbb45fe365f1 Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Sat, 22 Feb 2025 18:11:37 +0100 Subject: [PATCH 03/10] PEP8 --- Lib/test/test_tools/test_i18n.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py index c0c7cf7f04999c..d73fcff4c9cb11 100644 --- a/Lib/test/test_tools/test_i18n.py +++ b/Lib/test/test_tools/test_i18n.py @@ -536,10 +536,8 @@ def extract_from_snapshots(): Path(input_file.name).write_bytes(contents) assert_python_ok('-Xutf8', Test_pygettext.script, *args, input_file.name) - yield ( - input_file, - output_file, Path('messages.pot').read_text(encoding='utf-8') - ) + yield (input_file, output_file, + Path('messages.pot').read_text(encoding='utf-8')) def update_POT_snapshots(): From 4dd889bc596a4dc35e6fdb2fbee11a09539b9aa9 Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Sat, 22 Feb 2025 18:42:52 +0100 Subject: [PATCH 04/10] Remove whitespace --- Tools/i18n/pygettext.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index 293cbcecc606f1..ea67282bd8ebbf 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -296,7 +296,7 @@ def getFilesForName(name): def parse_spec(spec): """Parse a keyword spec string into a dictionary. - + The keyword spec format defines the name of the gettext function and the positions of the arguments that correspond to msgid, msgid_plural, and msgctxt. The format is as follows: From 6a46ce73a58a956564e7ed97e686fedb0f99ddfb Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Sat, 22 Feb 2025 18:48:51 +0100 Subject: [PATCH 05/10] Remove trailing whitespace --- Lib/test/test_tools/i18n_data/custom_keywords.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_tools/i18n_data/custom_keywords.py b/Lib/test/test_tools/i18n_data/custom_keywords.py index 8359fd96f3614b..01ea56c348cb55 100644 --- a/Lib/test/test_tools/i18n_data/custom_keywords.py +++ b/Lib/test/test_tools/i18n_data/custom_keywords.py @@ -7,7 +7,7 @@ ) foo('bar') -foo('bar', 'baz') +foo('bar', 'baz') nfoo('cat', 'cats', 1) nfoo('dog', 'dogs') @@ -20,7 +20,7 @@ bar('baz') # 'nfoo' requires at least 2 arguments -nfoo('dog') +nfoo('dog') # 'pfoo' requires at least 2 arguments pfoo('context') From bb50cfe4f9081ccb323ed6d5b4eda30e79085888 Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Sun, 23 Feb 2025 20:59:55 +0100 Subject: [PATCH 06/10] Simplify code --- Tools/i18n/pygettext.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index ea67282bd8ebbf..21ed2514a0227e 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -352,10 +352,9 @@ def parse_spec(spec): raise ValueError(f'Invalid keyword spec {spec!r}: ' 'argument positions must be strictly positive') - for k, v in result.items(): - if v == pos: - raise ValueError(f'Invalid keyword spec {spec!r}: ' - 'duplicate positions') + if pos in result.values(): + raise ValueError(f'Invalid keyword spec {spec!r}: ' + 'duplicate positions') if is_context: if 'msgctxt' in result: From 83a21e06c0865b9335b5c6e95c0624d582ae54b3 Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Sun, 23 Feb 2025 21:06:58 +0100 Subject: [PATCH 07/10] Revert making keywords a set --- Tools/i18n/pygettext.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index 21ed2514a0227e..bb2bc97865e4e4 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -650,7 +650,7 @@ class Options: # defaults extractall = 0 # FIXME: currently this option has no effect at all. escape = 0 - keywords = set() + keywords = [] outpath = '' outfile = 'messages.pot' writelocations = 1 @@ -684,7 +684,7 @@ class Options: elif opt in ('-D', '--docstrings'): options.docstrings = 1 elif opt in ('-k', '--keyword'): - options.keywords.add(arg) + options.keywords.append(arg) elif opt in ('-K', '--no-default-keywords'): no_default_keywords = True elif opt in ('-n', '--add-location'): From d861c84294871b301072d032857fbad00112201a Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Sun, 23 Feb 2025 21:17:06 +0100 Subject: [PATCH 08/10] Simplify 'parse_spec' --- Tools/i18n/pygettext.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index bb2bc97865e4e4..2fa0f91016ef30 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -352,28 +352,28 @@ def parse_spec(spec): raise ValueError(f'Invalid keyword spec {spec!r}: ' 'argument positions must be strictly positive') - if pos in result.values(): + if pos in result: raise ValueError(f'Invalid keyword spec {spec!r}: ' 'duplicate positions') if is_context: - if 'msgctxt' in result: + if 'msgctxt' in result.values(): raise ValueError(f'Invalid keyword spec {spec!r}: ' 'msgctxt can only appear once') - result['msgctxt'] = pos - elif 'msgid' not in result: - result['msgid'] = pos - elif 'msgid_plural' not in result: - result['msgid_plural'] = pos + result[pos] = 'msgctxt' + elif 'msgid' not in result.values(): + result[pos] = 'msgid' + elif 'msgid_plural' not in result.values(): + result[pos] = 'msgid_plural' else: raise ValueError(f'Invalid keyword spec {spec!r}: ' 'too many positions') - if 'msgid' not in result and 'msgctxt' in result: + if 'msgid' not in result.values() and 'msgctxt' in result.values(): raise ValueError(f'Invalid keyword spec {spec!r}: ' 'msgctxt cannot appear without msgid') - return name, {v: k for k, v in result.items()} + return name, result @dataclass(frozen=True) From 18d29cb48f1026c6b6b14e71423b186e215dbd70 Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Sun, 23 Feb 2025 21:37:16 +0100 Subject: [PATCH 09/10] Use print+sys.exit for consistency --- Tools/i18n/pygettext.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index 2fa0f91016ef30..cbd745b008a54b 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -731,7 +731,8 @@ class Options: try: options.keywords = dict(parse_spec(spec) for spec in options.keywords) except ValueError as e: - raise SystemExit(e) + print(e, file=sys.stderr) + sys.exit(1) if not no_default_keywords: options.keywords |= DEFAULTKEYWORDS From a3ef55bc629d77a0a673d94621d77dac61bb07b4 Mon Sep 17 00:00:00 2001 From: Tomas Roun Date: Mon, 24 Feb 2025 20:55:46 +0100 Subject: [PATCH 10/10] Revert changes to parse_spec --- Tools/i18n/pygettext.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index cbd745b008a54b..0f5f32c7d6c18f 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -352,28 +352,28 @@ def parse_spec(spec): raise ValueError(f'Invalid keyword spec {spec!r}: ' 'argument positions must be strictly positive') - if pos in result: + if pos in result.values(): raise ValueError(f'Invalid keyword spec {spec!r}: ' 'duplicate positions') if is_context: - if 'msgctxt' in result.values(): + if 'msgctxt' in result: raise ValueError(f'Invalid keyword spec {spec!r}: ' 'msgctxt can only appear once') - result[pos] = 'msgctxt' - elif 'msgid' not in result.values(): - result[pos] = 'msgid' - elif 'msgid_plural' not in result.values(): - result[pos] = 'msgid_plural' + result['msgctxt'] = pos + elif 'msgid' not in result: + result['msgid'] = pos + elif 'msgid_plural' not in result: + result['msgid_plural'] = pos else: raise ValueError(f'Invalid keyword spec {spec!r}: ' 'too many positions') - if 'msgid' not in result.values() and 'msgctxt' in result.values(): + if 'msgid' not in result and 'msgctxt' in result: raise ValueError(f'Invalid keyword spec {spec!r}: ' 'msgctxt cannot appear without msgid') - return name, result + return name, {v: k for k, v in result.items()} @dataclass(frozen=True)