Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 31 additions & 5 deletions Lib/test/test_tools/test_i18n.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,17 +87,26 @@ def assert_POT_equal(self, expected, actual):
self.maxDiff = None
self.assertEqual(normalize_POT_file(expected), normalize_POT_file(actual))

def extract_docstrings_from_str(self, module_content):
""" utility: return all msgids extracted from module_content """
filename = 'test_docstrings.py'
with temp_cwd(None) as cwd:
def extract_from_str(self, module_content, *, args=(), strict=True):
filename = 'test.py'
with temp_cwd(None):
with open(filename, 'w', encoding='utf-8') as fp:
fp.write(module_content)
assert_python_ok('-Xutf8', self.script, '-D', filename)
res = assert_python_ok('-Xutf8', self.script, *args, filename)
if strict:
self.assertEqual(res.err, b'')
with open('messages.pot', encoding='utf-8') as fp:
data = fp.read()
return self.get_msgids(data)

def extract_docstrings_from_str(self, module_content):
"""Return all docstrings extracted from module_content."""
return self.extract_from_str(module_content, args=('--docstrings',), strict=False)

def extract_messages_from_str(self, module_content):
"""Return all msgids extracted from module_content."""
return self.extract_from_str(module_content)

def test_header(self):
"""Make sure the required fields are in the header, according to:
http://www.gnu.org/software/gettext/manual/gettext.html#Header-Entry
Expand Down Expand Up @@ -344,6 +353,23 @@ def test_calls_in_fstring_with_partially_wrong_expression(self):
self.assertNotIn('foo', msgids)
self.assertIn('bar', msgids)

def test_function_and_class_names(self):
"""Test that function and class names are not mistakenly extracted."""
msgids = self.extract_messages_from_str(dedent('''\
def _(x):
pass

def _(x="foo"):
pass

async def _(x):
pass

class _(object):
pass
'''))
self.assertEqual(msgids, [''])

def test_pygettext_output(self):
"""Test that the pygettext output exactly matches snapshots."""
for input_file in DATA_DIR.glob('*.py'):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fix extraction warnings in :program:`pygettext.py` caused by mistaking
function definitions for function calls.
21 changes: 13 additions & 8 deletions Tools/i18n/pygettext.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
#! /usr/bin/env python3
# -*- coding: iso-8859-1 -*-
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's no other file that uses this encoding, I think it's safe (and more practical) to use utf-8.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not related change, so please keep the coding cookie.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it! I'll revert :) Would you accept a separate (perhaps not backported) PR that removes the coding and the commented-out code or do you think it's not worth it?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll accept it if there are pygettext tests for files with non-UTF-8 encoding.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair enough, I'll add it to my todo list :)

# Originally written by Barry Warsaw <[email protected]>
#
# Minimally patched to make it even more xgettext compatible
# by Peter Funk <[email protected]>
#
# 2002-11-22 Jürgen Hermann <[email protected]>
# 2002-11-22 Jürgen Hermann <[email protected]>
# Added checks that _() only contains string literals, and
# command line args are resolved to module lists, i.e. you
# can now pass a filename, a module or package name, or a
Expand Down Expand Up @@ -207,7 +206,7 @@ def make_escapes(pass_nonascii):
global escapes, escape
if pass_nonascii:
# Allow non-ascii characters to pass through so that e.g. 'msgid
# "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we
# "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we
# escape any character outside the 32..126 range.
mod = 128
escape = escape_ascii
Expand Down Expand Up @@ -306,6 +305,11 @@ def getFilesForName(name):
return []


def _is_def_or_class_keyword(token):
ttype, tstring, *_ = token
return ttype == tokenize.NAME and tstring in ('def', 'class')


class TokenEater:
def __init__(self, options):
self.__options = options
Expand All @@ -316,13 +320,11 @@ def __init__(self, options):
self.__freshmodule = 1
self.__curfile = None
self.__enclosurecount = 0
self.__prev_token = None

def __call__(self, ttype, tstring, stup, etup, line):
# dispatch
## import token
## print('ttype:', token.tok_name[ttype], 'tstring:', tstring,
## file=sys.stderr)
self.__state(ttype, tstring, stup[0])
self.__prev_token = (ttype, tstring, stup, etup, line)

def __waiting(self, ttype, tstring, lineno):
opts = self.__options
Expand All @@ -341,7 +343,10 @@ def __waiting(self, ttype, tstring, lineno):
if ttype == tokenize.NAME and tstring in ('class', 'def'):
self.__state = self.__suiteseen
return
if ttype == tokenize.NAME and tstring in opts.keywords:
if (
ttype == tokenize.NAME and tstring in opts.keywords
and (not self.__prev_token or not _is_def_or_class_keyword(self.__prev_token))
):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new logic is, if we see one of the gettext keywords and the previous token is not def or class, only then we transition to __keywordseen.

self.__state = self.__keywordseen
return
if ttype == tokenize.STRING:
Expand Down
Loading