diff --git a/Lib/gettext.py b/Lib/gettext.py index 6c11ab2b1eb570..5fbfae30573de2 100644 --- a/Lib/gettext.py +++ b/Lib/gettext.py @@ -43,9 +43,13 @@ # you'll need to study the GNU gettext code to do this. +import ast import operator import os import sys +from functools import cache +from string.templatelib import Interpolation, Template, convert +from typing import Any __all__ = ['NullTranslations', 'GNUTranslations', 'Catalog', @@ -290,11 +294,23 @@ def add_fallback(self, fallback): def gettext(self, message): if self._fallback: return self._fallback.gettext(message) + if isinstance(message, Template): + message, values = _template_to_format(message) + return message.format(**values) return message def ngettext(self, msgid1, msgid2, n): if self._fallback: return self._fallback.ngettext(msgid1, msgid2, n) + msgid1_is_template = isinstance(msgid1, Template) + msgid2_is_template = isinstance(msgid2, Template) + if msgid1_is_template and msgid2_is_template: + message, values = _template_to_format( + msgid1 if n == 1 else msgid2 + ) + return message.format(**values) + elif msgid1_is_template or msgid2_is_template: + raise TypeError('msgids cannot mix strings and t-strings') n = _as_int2(n) if n == 1: return msgid1 @@ -304,11 +320,23 @@ def ngettext(self, msgid1, msgid2, n): def pgettext(self, context, message): if self._fallback: return self._fallback.pgettext(context, message) + if isinstance(message, Template): + message, values = _template_to_format(message) + return message.format(**values) return message def npgettext(self, context, msgid1, msgid2, n): if self._fallback: return self._fallback.npgettext(context, msgid1, msgid2, n) + msgid1_is_template = isinstance(msgid1, Template) + msgid2_is_template = isinstance(msgid2, Template) + if msgid1_is_template and msgid2_is_template: + message, values = _template_to_format( + msgid1 if n == 1 else msgid2 + ) + return message.format(**values) + elif msgid1_is_template or msgid2_is_template: + raise TypeError('msgids cannot mix strings and t-strings') n = _as_int2(n) if n == 1: return msgid1 @@ -437,50 +465,104 @@ def _parse(self, fp): def gettext(self, message): missing = object() + orig_message = message + t_values = None + if isinstance(message, Template): + message, t_values = _template_to_format(message) tmsg = self._catalog.get(message, missing) if tmsg is missing: tmsg = self._catalog.get((message, self.plural(1)), missing) if tmsg is not missing: + if t_values is not None: + return tmsg.format(**t_values) return tmsg if self._fallback: - return self._fallback.gettext(message) + return self._fallback.gettext(orig_message) + if t_values is not None: + return message.format(**t_values) return message def ngettext(self, msgid1, msgid2, n): + orig_msgid1 = msgid1 + orig_msgid2 = msgid2 + msgid1_is_template = isinstance(msgid1, Template) + msgid2_is_template = isinstance(msgid2, Template) + t_values1 = t_values2 = None + if msgid1_is_template and msgid2_is_template: + msgid1, t_values1 = _template_to_format(msgid1) + msgid2, t_values2 = _template_to_format(msgid2) + elif msgid1_is_template or msgid2_is_template: + raise TypeError('msgids cannot mix strings and t-strings') + plural = self.plural(n) + t_values = t_values2 if plural else t_values1 try: - tmsg = self._catalog[(msgid1, self.plural(n))] + tmsg = self._catalog[(msgid1, plural)] except KeyError: if self._fallback: - return self._fallback.ngettext(msgid1, msgid2, n) + return self._fallback.ngettext(orig_msgid1, orig_msgid2, n) if n == 1: - tmsg = msgid1 + if t_values1 is not None: + return msgid1.format(**t_values1) + return msgid1 else: - tmsg = msgid2 + if t_values2 is not None: + return msgid2.format(**t_values2) + return msgid2 + if t_values is not None: + return tmsg.format(**t_values) return tmsg def pgettext(self, context, message): + orig_message = message + t_values = None + if isinstance(message, Template): + message, t_values = _template_to_format(message) ctxt_msg_id = self.CONTEXT % (context, message) missing = object() tmsg = self._catalog.get(ctxt_msg_id, missing) if tmsg is missing: tmsg = self._catalog.get((ctxt_msg_id, self.plural(1)), missing) if tmsg is not missing: + if t_values is not None: + return tmsg.format(**t_values) return tmsg if self._fallback: - return self._fallback.pgettext(context, message) + return self._fallback.pgettext(context, orig_message) + if t_values is not None: + return message.format(**t_values) return message def npgettext(self, context, msgid1, msgid2, n): + orig_msgid1 = msgid1 + orig_msgid2 = msgid2 + msgid1_is_template = isinstance(msgid1, Template) + msgid2_is_template = isinstance(msgid2, Template) + t_values1 = t_values2 = None + if msgid1_is_template and msgid2_is_template: + msgid1, t_values1 = _template_to_format(msgid1) + msgid2, t_values2 = _template_to_format(msgid2) + elif msgid1_is_template or msgid2_is_template: + raise TypeError('msgids cannot mix strings and t-strings') + plural = self.plural(n) + t_values = t_values2 if plural else t_values1 ctxt_msg_id = self.CONTEXT % (context, msgid1) try: - tmsg = self._catalog[ctxt_msg_id, self.plural(n)] + tmsg = self._catalog[ctxt_msg_id, plural] except KeyError: if self._fallback: - return self._fallback.npgettext(context, msgid1, msgid2, n) + return self._fallback.npgettext( + context, orig_msgid1, orig_msgid2, n + ) if n == 1: - tmsg = msgid1 + if t_values1 is not None: + return msgid1.format(**t_values1) + return msgid1 else: - tmsg = msgid2 + if t_values2 is not None: + return msgid2.format(**t_values2) + return msgid2 + if t_values is not None: + return tmsg.format(**t_values) return tmsg @@ -655,3 +737,144 @@ def npgettext(context, msgid1, msgid2, n): # gettext. Catalog = translation + + +# utils for t-string handling in gettext translation + pygettext extraction +# TBD where they should go, and whether this should be a public API or internal, +# especially the part about generating names from interpolations which is IMHO +# beneficial to have in stdlib so any implementation can re-use it without +# risking diverging behavior for the same expression between implementations + +class _NameTooComplexError(ValueError): + """ + Raised when an expression is too complex to derive a format string name + from it, or the resulting name would not be valid in a format string. + """ + + +class _ExtractNamesVisitor(ast.NodeVisitor): + def __init__(self): + self._name_parts = [] + + @property + def name(self) -> str: + name = '__'.join(self._name_parts) + if not name.isidentifier(): + raise _NameTooComplexError( + 'Only expressions which can be converted to a format string ' + 'placeholder may be used in a gettext call; assign the ' + 'expression to a variable and use that instead' + ) + return name + + def generic_visit(self, node): + name = node.__class__.__name__ + raise _NameTooComplexError( + f'Only simple expressions are supported, {name} is not allowed; ' + 'assign the expression to a variable and use that instead' + ) + + def visit_Attribute(self, node): + self.visit(node.value) + self._name_parts.append(node.attr) + + def visit_Name(self, node): + self._name_parts.append(node.id) + + def visit_Subscript(self, node): + self.visit(node.value) + if not isinstance(node.slice, ast.Constant): + raise _NameTooComplexError( + 'Only constant value dict keys may be used in a gettext call; ' + 'assign the expression to a variable and use that instead' + ) + self.visit(node.slice) + + def visit_Constant(self, node): + self._name_parts.append(str(node.value)) + + def visit_Call(self, node): + self.visit(node.func) + if node.args: + raise _NameTooComplexError( + 'Function calls with arguments are not supported in gettext ' + 'calls; assign the result to a variable and use that instead' + ) + + +def _template_node_to_format(node: ast.TemplateStr) -> str: + """Generate a format string from a template string AST node. + + This fails with a `_NameTooComplexError` in case the expression is not + suitable for conversion. + """ + parts = [] + interpolation_format_names = {} + for child in node.values: + match child: + case ast.Constant(value): + parts.append(value.replace('{', '{{').replace('}', '}}')) + case ast.Interpolation(value): + visitor = _ExtractNamesVisitor() + visitor.visit(value) + name = visitor.name + expr = ast.unparse(value) + if ( + existing_expr := interpolation_format_names.get(name) + ) and existing_expr != expr: + raise _NameTooComplexError( + f'Interpolations of {existing_expr} and {expr} cannot ' + 'be mixed in the same gettext call; assign one of ' + 'them to a variable and use that instead' + ) + interpolation_format_names[name] = expr + parts.append(f'{{{name}}}') + return ''.join(parts) + + +def _template_to_format(template: Template) -> tuple[str, dict[str, Any]]: + """Convert a template to a format string and its value dict. + + This takes a :class:`~string.templatelib.Template`, and converts all the + interpolations with format string placeholders derived from the original + expression. + + This fails with a `_NameTooComplexError` in case the expression is not + suitable for conversion. + """ + parts = [] + interpolation_format_names = {} + values = {} + for item in template: + match item: + case str() as s: + parts.append(s.replace('{', '{{').replace('}', '}}')) + case Interpolation(value, expr, conversion, format_spec): + value = convert(value, conversion) + value = format(value, format_spec) + name = _expr_to_format_field_name(expr) + if ( + existing_expr := interpolation_format_names.get(name) + ) and existing_expr != expr: + raise _NameTooComplexError( + f'Interpolations of {existing_expr} and {expr} cannot ' + 'be mixed in the same gettext call; assign one of ' + 'them to a variable and use that instead' + ) + interpolation_format_names[name] = expr + values[name] = value + parts.append(f'{{{name}}}') + return ''.join(parts), values + + +@cache +def _expr_to_format_field_name(expr: str) -> str: + # handle simple cases w/o the overhead of dealing with an ast + if expr.isidentifier(): + return expr + if all(x.isidentifier() for x in expr.split('.')): + return '__'.join(expr.split('.')) + expr_node = ast.parse(expr, mode='eval').body + visitor = _ExtractNamesVisitor() + visitor.visit(expr_node) + return visitor.name diff --git a/Lib/test/test_gettext.py b/Lib/test/test_gettext.py index 9ad37909a8ec4e..0f72a0f4e4f120 100644 --- a/Lib/test/test_gettext.py +++ b/Lib/test/test_gettext.py @@ -38,6 +38,27 @@ bmsgd2luayAoaW4gIm15IG90aGVyIGNvbnRleHQiKQB3aW5rIHdpbmsA ''' +GNU_TSTRINGS_MO_DATA = b'''\ +3hIElQAAAAAJAAAAHAAAAGQAAAANAAAArAAAAAAAAADgAAAAGQAAAOEAAAASAAAA+wAAAA0AAAAO +AQAAPQAAABwBAABFAAAAWgEAABYAAACgAQAAFAAAALcBAAAWAAAAzAEAAB8BAADjAQAAGQAAAAMD +AAASAAAAHQMAAA0AAAAwAwAANgAAAD4DAAA+AAAAdQMAAAwAAAC0AwAAGwAAAMEDAAAWAAAA3QMA +AAEAAAAFAAAAAAAAAAYAAAAAAAAABwAAAAkAAAAEAAAACAAAAAIAAAAAAAAAAwAAAAAAAAAASGVs +bG8ge3VzZXJfX25hbWVfX3RpdGxlfQBIZWxsbyB7dXNlcl9fbmFtZX0ASGVsbG8ge3doZXJlfQBU +aGVyZSBpcyB7bn0gZ3JlZW4gc25lawBUaGVyZSBhcmUge259IGNvbG9yZnVsIHNuZWtzOiB7Y29s +b3J9AGVuZ3Jpc2gEVGhlcmUgaXMge259IGdyZWVuIHNuZWsAVGhlcmUgYXJlIHtufSBjb2xvcmZ1 +bCBzbmVrczoge2NvbG9yfQBub3J0aGVybgRIZWxsbyB7d2hlcmV9AHt7YnJhY2V9fSBmb3Ige3do +YXR9AHt7e2JyYWNlfX19IGZvciB7d2hhdH0AUHJvamVjdC1JZC1WZXJzaW9uOiBQQUNLQUdFIFZF +UlNJT04KUE8tUmV2aXNpb24tRGF0ZTogWUVBUi1NTy1EQSBITzpNSStaT05FCkxhc3QtVHJhbnNs +YXRvcjogRlVMTCBOQU1FIDxFTUFJTEBBRERSRVNTPgpMYW5ndWFnZS1UZWFtOiBMQU5HVUFHRSA8 +TExAbGkub3JnPgpMYW5ndWFnZTogCk1JTUUtVmVyc2lvbjogMS4wCkNvbnRlbnQtVHlwZTogdGV4 +dC9wbGFpbjsgY2hhcnNldD1VVEYtOApDb250ZW50LVRyYW5zZmVyLUVuY29kaW5nOiA4Yml0Ckdl +bmVyYXRlZC1CeTogcHlnZXR0ZXh0LnB5IDEuNQoASGFsbG8ge3VzZXJfX25hbWVfX3RpdGxlfQBI +YWxsbyB7dXNlcl9fbmFtZX0ASGFsbG8ge3doZXJlfQBEYSBpc3Qge259IGdlbGJlIFNuZWsARGEg +c2luZCB7bn0gYnVudGUgU25la3M6IHtjb2xvcn0ARGEgaXN0IHtufSBnZWxiZSBTY2huZWNrZQBE +YSBzaW5kIHtufSBidW50ZSBTY2huZWNrZW46IHtjb2xvcn0ATW9pbiB7d2hlcmV9AHt7YmVyZWl0 +bWFjaGVufX0gYXVmIHt3aGF0fQB7e3ticmFjZX19fSBhdWYge3doYXR9AA== +''' + # .mo file with an invalid magic number GNU_MO_DATA_BAD_MAGIC_NUMBER = base64.b64encode(b'ABCD') @@ -156,6 +177,7 @@ LOCALEDIR = os.path.join('xx', 'LC_MESSAGES') MOFILE = os.path.join(LOCALEDIR, 'gettext.mo') +TSTRINGS_MOFILE = os.path.join(LOCALEDIR, 'gettext_tstrings.mo') MOFILE_BAD_MAGIC_NUMBER = os.path.join(LOCALEDIR, 'gettext_bad_magic_number.mo') MOFILE_BAD_MAJOR_VERSION = os.path.join(LOCALEDIR, 'gettext_bad_major_version.mo') MOFILE_BAD_MINOR_VERSION = os.path.join(LOCALEDIR, 'gettext_bad_minor_version.mo') @@ -179,6 +201,8 @@ def setUpClass(cls): os.makedirs(LOCALEDIR) with open(MOFILE, 'wb') as fp: fp.write(base64.decodebytes(GNU_MO_DATA)) + with open(TSTRINGS_MOFILE, 'wb') as fp: + fp.write(base64.decodebytes(GNU_TSTRINGS_MO_DATA)) with open(MOFILE_BAD_MAGIC_NUMBER, 'wb') as fp: fp.write(base64.decodebytes(GNU_MO_DATA_BAD_MAGIC_NUMBER)) with open(MOFILE_BAD_MAJOR_VERSION, 'wb') as fp: @@ -727,6 +751,169 @@ def test_unicode_msgstr_with_context(self): eq(t, "Hay %s ficheros (context)") +class TemplateStringsTestsMixin: + def setup_tstrings_test(self, *, expect_translations=False): + # We use this weird naming of the gettext functions here to allow + # easy extraction of the .po file using pygettext; see the comment + # next to the po file content near the bottom of this file on how + # to regenerate it. + self.gettexT = self.t.gettext + self.ngettexT = self.t.ngettext + self.pgettexT = self.t.pgettext + self.npgettexT = self.t.npgettext + if expect_translations: + self.strings = { + '_': 'Hallo world', + '_unt': 'Hallo Scrooge', + '_un': 'Hallo scrooge', + '_undict': 'Hallo dOnAlD', + '_br1': '{bereitmachen} auf impact', + '_br2': '{}{} auf impact', + 'p': 'Moin world', + 'n1': 'Da ist 1 gelbe Snek', + 'n2': 'Da sind 42 bunte Sneks: rainbow', + 'np1': 'Da ist 1 gelbe Schnecke', + 'np2': 'Da sind 42 bunte Schnecken: rainbow', + } + else: + self.strings = { + '_': 'Hello world', + '_unt': 'Hello Scrooge', + '_un': 'Hello scrooge', + '_undict': 'Hello dOnAlD', + '_br1': '{brace} for impact', + '_br2': '{}{} for impact', + 'p': 'Hello world', + 'n1': 'There is 1 green snek', + 'n2': 'There are 42 colorful sneks: rainbow', + 'np1': 'There is 1 green snek', + 'np2': 'There are 42 colorful sneks: rainbow', + } + + def test_gettext(self): + eq = self.assertEqual + where = 'world' + class _User: + name = 'scrooge' + def __getitem__(self, key): + if key == 'name': + return 'dOnAlD' + user = _User() + eq(self.gettexT(t'Hello {where}'), self.strings['_']) + eq(self.gettexT(t'Hello {user.name.title()}'), self.strings['_unt']) + eq(self.gettexT(t'Hello {user.name}'), self.strings['_un']) + eq(self.gettexT(t'Hello {user["name"]}'), self.strings['_undict']) + + def test_gettext_braces(self): + eq = self.assertEqual + what = 'impact' + brace = '}{' + eq(self.gettexT(t'{{brace}} for {what}'), self.strings['_br1']) + eq(self.gettexT(t'{{{brace}}} for {what}'), self.strings['_br2']) + + def test_pgettext(self): + eq = self.assertEqual + where = 'world' + t = self.pgettexT('northern', t'Hello {where}') + eq(t, self.strings['p']) + + def test_ngettext(self): + eq = self.assertEqual + color = 'rainbow' + n = 1 + t = self.ngettexT( + t'There is {n} green snek', + t'There are {n} colorful sneks: {color}', + n, + ) + eq(t, self.strings['n1']) + n = 42 + t = self.ngettexT( + t'There is {n} green snek', + t'There are {n} colorful sneks: {color}', + n, + ) + eq(t, self.strings['n2']) + + def test_npgettext(self): + eq = self.assertEqual + color = 'rainbow' + n = 1 + t = self.npgettexT( + 'engrish', + t'There is {n} green snek', + t'There are {n} colorful sneks: {color}', + n, + ) + eq(t, self.strings['np1']) + n = 42 + t = self.npgettexT( + 'engrish', + t'There is {n} green snek', + t'There are {n} colorful sneks: {color}', + n, + ) + eq(t, self.strings['np2']) + + +class TemplateStrNullTranslationsTest( + TemplateStringsTestsMixin, unittest.TestCase +): + """Test that NullTranslations works with t-strings.""" + def setUp(self): + self.t = gettext.NullTranslations() + self.setup_tstrings_test(expect_translations=False) + + +class TemplateStrGNUTranslationsTest( + TemplateStringsTestsMixin, GettextBaseTest +): + """Test that GNUTranslations works with t-strings. + + In this test case we have translations for all our strings. + """ + def setUp(self): + GettextBaseTest.setUp(self) + with open(TSTRINGS_MOFILE, 'rb') as fp: + self.t = gettext.GNUTranslations(fp) + self.setup_tstrings_test(expect_translations=True) + + +class TemplateStrGNUTranslationsMissingTest( + TemplateStringsTestsMixin, GettextBaseTest +): + """Test that GNUTranslations works with t-strings. + + In this test case there are no translations, so we expect the original + strings to be used. + """ + def setUp(self): + GettextBaseTest.setUp(self) + with open(TSTRINGS_MOFILE, 'rb') as fp: + self.t = gettext.GNUTranslations(fp) + self.t._catalog.clear() + self.setup_tstrings_test(expect_translations=False) + + +class TemplateStrGNUTranslationsFallbackTest( + TemplateStringsTestsMixin, GettextBaseTest +): + """Test that GNUTranslations works with t-strings and a fallback. + + In this test case there are no translations, but we have a fallback to + a GNUTranslations object which has all the translations, so the translated + strings should be used. + """ + def setUp(self): + GettextBaseTest.setUp(self) + with open(TSTRINGS_MOFILE, 'rb') as fp: + self.t = gettext.GNUTranslations(fp) + self.t._catalog.clear() + fp.seek(0) + self.t.add_fallback(gettext.GNUTranslations(fp)) + self.setup_tstrings_test(expect_translations=True) + + class WeirdMetadataTest(GettextBaseTest): def setUp(self): GettextBaseTest.setUp(self) @@ -1084,3 +1271,65 @@ def test_translation_fallback(self): "#-#-#-#-# messages.po (EdX Studio) #-#-#-#-#\n" "Content-Type: text/plain; charset=UTF-8\n" ''' + +# Here's the .po file used to created the GNU_TMO_DATA above. +# It was extracted using pygettext: +# ./Tools/i18n/pygettext.py -o /tmp/gettext_tstrings.pot -K -k gettexT \ +# -k ngettexT:1,2 -k pgettexT:1c,2 -k npgettexT:1c,2,3 Lib/test/test_gettext.py +# then updated w/ the translations using `msgmerge`: +# msgmerge -U /tmp/gettext_tstrings.po /tmp/gettext_tstrings.pot +# and then compiled using `msgfmt`: +# msgfmt /tmp/gettext_tstrings.po -o - | base64 + +rb''' +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"POT-Creation-Date: 2025-08-02 22:10+0200\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"Language: \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: pygettext.py 1.5\n" + +#: Lib/test/test_gettext.py:798 +msgid "Hello {where}" +msgstr "Hallo {where}" + +#: Lib/test/test_gettext.py:799 +msgid "Hello {user__name__title}" +msgstr "Hallo {user__name__title}" + +#: Lib/test/test_gettext.py:800 Lib/test/test_gettext.py:801 +msgid "Hello {user__name}" +msgstr "Hallo {user__name}" + +#: Lib/test/test_gettext.py:807 +msgid "{{brace}} for {what}" +msgstr "{{bereitmachen}} auf {what}" + +#: Lib/test/test_gettext.py:808 +msgid "{{{brace}}} for {what}" +msgstr "{{{brace}}} auf {what}" + +#: Lib/test/test_gettext.py:813 +msgctxt "northern" +msgid "Hello {where}" +msgstr "Moin {where}" + +#: Lib/test/test_gettext.py:820 Lib/test/test_gettext.py:827 +msgid "There is {n} green snek" +msgid_plural "There are {n} colorful sneks: {color}" +msgstr[0] "Da ist {n} gelbe Snek" +msgstr[1] "Da sind {n} bunte Sneks: {color}" + +#: Lib/test/test_gettext.py:838 Lib/test/test_gettext.py:846 +msgctxt "engrish" +msgid "There is {n} green snek" +msgid_plural "There are {n} colorful sneks: {color}" +msgstr[0] "Da ist {n} gelbe Schnecke" +msgstr[1] "Da sind {n} bunte Schnecken: {color}" +''' diff --git a/Lib/test/test_tools/i18n_data/messages.pot b/Lib/test/test_tools/i18n_data/messages.pot index e8167acfc0742b..2a36f12ace5afa 100644 --- a/Lib/test/test_tools/i18n_data/messages.pot +++ b/Lib/test/test_tools/i18n_data/messages.pot @@ -97,3 +97,59 @@ msgid_plural "worlds" msgstr[0] "" msgstr[1] "" +#: messages.py:122 messages.py:123 +msgid "Hello World" +msgstr "" + +#: messages.py:124 +msgid "Hello {name}" +msgstr "" + +#: messages.py:125 +msgid "Hello {name__title}" +msgstr "" + +#: messages.py:126 messages.py:127 messages.py:128 +msgid "Hello {user__name}" +msgstr "" + +#: messages.py:129 +msgid "Hello {numbers__69}" +msgstr "" + +#: messages.py:132 +msgid "Hello {{escaped braces}}" +msgstr "" + +#: messages.py:133 +msgid "Hello {{{interpolated_braces}}} inside esacped braces" +msgstr "" + +#: messages.py:134 +msgid "}}Even{{ more {{braces}}" +msgstr "" + +#: messages.py:135 +msgid "}}Even{{ more {{{interpolated_braces}}}" +msgstr "" + +#: messages.py:139 +msgid "Weird {meow__False}" +msgstr "" + +#: messages.py:140 +msgid "Weird {meow__True}" +msgstr "" + +#: messages.py:141 +msgid "Weird {meow__69j}" +msgstr "" + +#: messages.py:142 +msgid "Weird {meow__Ellipsis}" +msgstr "" + +#: messages.py:143 +msgid "Weird {meow__None}" +msgstr "" + diff --git a/Lib/test/test_tools/i18n_data/messages.py b/Lib/test/test_tools/i18n_data/messages.py index 9457bcb8611020..4f29d362f1c3a0 100644 --- a/Lib/test/test_tools/i18n_data/messages.py +++ b/Lib/test/test_tools/i18n_data/messages.py @@ -117,3 +117,42 @@ def _(x="don't extract me"): # f-strings f"Hello, {_('world')}!" f"Hello, {ngettext('world', 'worlds', 3)}!" + +# t-strings +_(t'Hello World') +_(t'Hello' t' World') +_(t'Hello {name}') +_(t'Hello {name.title()}') +_(t'Hello {user.name}') +_(t'Hello {user['name']}') +_(t'Hello {user["name"]}') +_(t'Hello {numbers[69]}') + +# t-strings - escaped braces +_(t'Hello {{escaped braces}}') +_(t'Hello {{{interpolated_braces}}} inside esacped braces') +_(t'}}Even{{ more {{braces}}') +_(t'}}Even{{ more {{{interpolated_braces}}}') + +# t-strings - slightly weird cases but simple enough to convert in a +# straightforward manner +_(t'Weird {meow[False]}') +_(t'Weird {meow[True]}') +_(t'Weird {meow[69j]}') +_(t'Weird {meow[...]}') +_(t'Weird {meow[None]}') + +# t-strings - invalid cases +_(t'Invalid {t"nesting"}') # nested tstrings are not allowed +_(t'Invalid {meow[meow()]}') # non-const subscript +_(t'Invalid {meow[kitty]}') # non-const subscript +_(t'Invalid {meow[()]}') # non-primitive subscript +_(t'Invalid {meow(42)}') # call with argument +_(t'Invalid {meow["foo:r"]}') # subscript that cannot be formatstringified +_(t'Invalid {meow[3.14]}') # subscript that cannot be formatstringified +_(t'Invalid {meow[...]} {meow.Ellipsis}') # same name for different expressions +_(t'Invalid {meow.loudly} {meow["loudly"]}') # same name for different expressions +_(t'Invalid {meow.loudly} {meow.loudly()}') # same name for different expressions +_(t'Invalid {3.14}') # format string is not a valid identifier +_(t'Invalid {42}') # format string is not a valid identifier +_(t'Invalid {69j}') # format string is not a valid identifier diff --git a/Misc/NEWS.d/next/Library/2025-08-03-21-21-27.gh-issue-137353.vjS-0H.rst b/Misc/NEWS.d/next/Library/2025-08-03-21-21-27.gh-issue-137353.vjS-0H.rst new file mode 100644 index 00000000000000..18905881c850a9 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-08-03-21-21-27.gh-issue-137353.vjS-0H.rst @@ -0,0 +1 @@ +Support using t-strings in the :mod:`gettext` module. diff --git a/Misc/NEWS.d/next/Tools-Demos/2025-08-03-21-21-56.gh-issue-137353.S_u1YW.rst b/Misc/NEWS.d/next/Tools-Demos/2025-08-03-21-21-56.gh-issue-137353.S_u1YW.rst new file mode 100644 index 00000000000000..8a3b403258dff3 --- /dev/null +++ b/Misc/NEWS.d/next/Tools-Demos/2025-08-03-21-21-56.gh-issue-137353.S_u1YW.rst @@ -0,0 +1 @@ +Support extracting t-strings in :program:`pygettext`. diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index f46b05067d7fde..6c21f5e1bd44d3 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -149,6 +149,7 @@ import time import tokenize from dataclasses import dataclass, field +from gettext import _NameTooComplexError, _template_node_to_format from io import BytesIO from operator import itemgetter @@ -537,6 +538,12 @@ def _extract_message_with_spec(self, node, spec): msg_data = {} for arg_type, position in spec.items(): arg = node.args[position] + if self._is_template_str(arg): + try: + msg_data[arg_type] = _template_node_to_format(arg) + except _NameTooComplexError as exc: + return str(exc) + continue if not self._is_string_const(arg): return (f'Expected a string constant for argument ' f'{position + 1}, got {ast.unparse(arg)}') @@ -626,6 +633,9 @@ def _get_func_name(self, node): def _is_string_const(self, node): return isinstance(node, ast.Constant) and isinstance(node.value, str) + def _is_template_str(self, node): + return isinstance(node, ast.TemplateStr) + def write_pot_file(messages, options, fp): timestamp = time.strftime('%Y-%m-%d %H:%M%z') encoding = fp.encoding if fp.encoding else 'UTF-8'