Skip to content

Commit 8994130

Browse files
committed
Do not extract messages from function definitions.
Fixes a bug where pygettext would attempt to extract a message from a code like this: def _(x): pass This is because pygettext only looks at one token at a time and '_(x)' looks like a function call. However, since 'x' is not a string literal, it would erroneously issue a warning. This commit fixes that by keeping track of the previous token and checking if it's 'def' or 'class'.
1 parent a83472f commit 8994130

File tree

2 files changed

+44
-12
lines changed

2 files changed

+44
-12
lines changed

Lib/test/test_tools/test_i18n.py

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -87,17 +87,26 @@ def assert_POT_equal(self, expected, actual):
8787
self.maxDiff = None
8888
self.assertEqual(normalize_POT_file(expected), normalize_POT_file(actual))
8989

90-
def extract_docstrings_from_str(self, module_content):
91-
""" utility: return all msgids extracted from module_content """
92-
filename = 'test_docstrings.py'
93-
with temp_cwd(None) as cwd:
90+
def extract_from_str(self, module_content, *, args=(), strict=True):
91+
filename = 'test.py'
92+
with temp_cwd(None):
9493
with open(filename, 'w', encoding='utf-8') as fp:
9594
fp.write(module_content)
96-
assert_python_ok('-Xutf8', self.script, '-D', filename)
95+
res = assert_python_ok('-Xutf8', self.script, *args, filename)
96+
if strict:
97+
self.assertEqual(res.err, b'')
9798
with open('messages.pot', encoding='utf-8') as fp:
9899
data = fp.read()
99100
return self.get_msgids(data)
100101

102+
def extract_docstrings_from_str(self, module_content):
103+
"""Return all docstrings extracted from module_content."""
104+
return self.extract_from_str(module_content, args=('--docstrings',), strict=False)
105+
106+
def extract_messages_from_str(self, module_content):
107+
"""Return all msgids extracted from module_content."""
108+
return self.extract_from_str(module_content)
109+
101110
def test_header(self):
102111
"""Make sure the required fields are in the header, according to:
103112
http://www.gnu.org/software/gettext/manual/gettext.html#Header-Entry
@@ -344,6 +353,23 @@ def test_calls_in_fstring_with_partially_wrong_expression(self):
344353
self.assertNotIn('foo', msgids)
345354
self.assertIn('bar', msgids)
346355

356+
def test_function_and_class_names(self):
357+
"""Test that function and class names are not mistakenly extracted."""
358+
msgids = self.extract_messages_from_str(dedent('''\
359+
def _(x):
360+
pass
361+
362+
def _(x="foo"):
363+
pass
364+
365+
async def _(x):
366+
pass
367+
368+
class _(object):
369+
pass
370+
'''))
371+
self.assertEqual(msgids, [''])
372+
347373
def test_pygettext_output(self):
348374
"""Test that the pygettext output exactly matches snapshots."""
349375
for input_file in DATA_DIR.glob('*.py'):

Tools/i18n/pygettext.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# Minimally patched to make it even more xgettext compatible
66
# by Peter Funk <[email protected]>
77
#
8-
# 2002-11-22 Jürgen Hermann <[email protected]>
8+
# 2002-11-22 Jürgen Hermann <[email protected]>
99
# Added checks that _() only contains string literals, and
1010
# command line args are resolved to module lists, i.e. you
1111
# can now pass a filename, a module or package name, or a
@@ -207,7 +207,7 @@ def make_escapes(pass_nonascii):
207207
global escapes, escape
208208
if pass_nonascii:
209209
# Allow non-ascii characters to pass through so that e.g. 'msgid
210-
# "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we
210+
# "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we
211211
# escape any character outside the 32..126 range.
212212
mod = 128
213213
escape = escape_ascii
@@ -306,6 +306,11 @@ def getFilesForName(name):
306306
return []
307307

308308

309+
def _is_def_or_class_keyword(token):
310+
ttype, tstring, *_ = token
311+
return ttype == tokenize.NAME and tstring in ('def', 'class')
312+
313+
309314
class TokenEater:
310315
def __init__(self, options):
311316
self.__options = options
@@ -316,13 +321,11 @@ def __init__(self, options):
316321
self.__freshmodule = 1
317322
self.__curfile = None
318323
self.__enclosurecount = 0
324+
self.__prev_token = None
319325

320326
def __call__(self, ttype, tstring, stup, etup, line):
321-
# dispatch
322-
## import token
323-
## print('ttype:', token.tok_name[ttype], 'tstring:', tstring,
324-
## file=sys.stderr)
325327
self.__state(ttype, tstring, stup[0])
328+
self.__prev_token = (ttype, tstring, stup, etup, line)
326329

327330
def __waiting(self, ttype, tstring, lineno):
328331
opts = self.__options
@@ -341,7 +344,10 @@ def __waiting(self, ttype, tstring, lineno):
341344
if ttype == tokenize.NAME and tstring in ('class', 'def'):
342345
self.__state = self.__suiteseen
343346
return
344-
if ttype == tokenize.NAME and tstring in opts.keywords:
347+
if (
348+
ttype == tokenize.NAME and tstring in opts.keywords
349+
and (not self.__prev_token or not _is_def_or_class_keyword(self.__prev_token))
350+
):
345351
self.__state = self.__keywordseen
346352
return
347353
if ttype == tokenize.STRING:

0 commit comments

Comments
 (0)