Skip to content
103 changes: 103 additions & 0 deletions Lib/test/test_tools/i18n_data/comments.pot
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
# SOME DESCRIPTIVE TITLE.
# Copyright (C) YEAR ORGANIZATION
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
#
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"POT-Creation-Date: 2000-01-01 00:00+0000\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <[email protected]>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: pygettext.py 1.5\n"


#: comments.py:4
msgid "foo"
msgstr ""

#. i18n: This is a translator comment
#: comments.py:7
msgid "bar"
msgstr ""

#. i18n: This is a translator comment
#. i18n: This is another translator comment
#: comments.py:11
msgid "baz"
msgstr ""

#. i18n: This is a translator comment
#. with multiple
#. lines
#: comments.py:16
msgid "qux"
msgstr ""

#. i18n: This is a translator comment
#: comments.py:21
msgid "quux"
msgstr ""

#. i18n: This is a translator comment
#. with multiple lines
#. i18n: This is another translator comment
#. with multiple lines
#: comments.py:27
msgid "corge"
msgstr ""

#: comments.py:31
msgid "grault"
msgstr ""

#. i18n: This is another translator comment
#: comments.py:36
msgid "garply"
msgstr ""

#: comments.py:40
msgid "george"
msgstr ""

#. i18n: This is another translator comment
#: comments.py:45
msgid "waldo"
msgstr ""

#. i18n: This is a translator comment
#. i18n: This is also a translator comment
#. i18n: This is another translator comment
#: comments.py:50
msgid "waldo2"
msgstr ""

#. i18n: This is a translator comment
#. i18n: This is another translator comment
#. i18n: This is yet another translator comment
#. i18n: This is a translator comment
#. with multiple lines
#: comments.py:53 comments.py:56 comments.py:59 comments.py:63
msgid "fred"
msgstr ""

#: comments.py:65
msgid "plugh"
msgstr ""

#: comments.py:67
msgid "foobar"
msgstr ""

#. i18n: This is a translator comment
#: comments.py:71
msgid "xyzzy"
msgstr ""

#: comments.py:72
msgid "thud"
msgstr ""

72 changes: 72 additions & 0 deletions Lib/test/test_tools/i18n_data/comments.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
from gettext import gettext as _

# Not a translator comment
_('foo')

# i18n: This is a translator comment
_('bar')

# i18n: This is a translator comment
# i18n: This is another translator comment
_('baz')

# i18n: This is a translator comment
# with multiple
# lines
_('qux')

# This comment should not be included because
# it does not start with the prefix
# i18n: This is a translator comment
_('quux')

# i18n: This is a translator comment
# with multiple lines
# i18n: This is another translator comment
# with multiple lines
_('corge')

# i18n: This comment should be ignored

_('grault')

# i18n: This comment should be ignored

# i18n: This is another translator comment
_('garply')

# i18n: comment should be ignored
x = 1
_('george')

# i18n: This comment should be ignored
x = 1
# i18n: This is another translator comment
_('waldo')

# i18n: This is a translator comment
x = 1 # i18n: This is also a translator comment
# i18n: This is another translator comment
_('waldo2')

# i18n: This is a translator comment
_('fred')

# i18n: This is another translator comment
_('fred')

# i18n: This is yet another translator comment
_('fred')

# i18n: This is a translator comment
# with multiple lines
_('fred')

_('plugh') # i18n: This comment should be ignored

_('foo' # i18n: This comment should be ignored
'bar') # i18n: This comment should be ignored

# i18n: This is a translator comment
_('xyzzy')
_('thud')
6 changes: 4 additions & 2 deletions Lib/test/test_tools/test_i18n.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,8 @@ def test_pygettext_output(self):
contents = input_file.read_text(encoding='utf-8')
with temp_cwd(None):
Path(input_file.name).write_text(contents)
assert_python_ok('-Xutf8', self.script, '--docstrings', input_file.name)
assert_python_ok('-Xutf8', self.script, '--docstrings',
'--add-comments=i18n:', input_file.name)
output = Path('messages.pot').read_text(encoding='utf-8')

expected = output_file.read_text(encoding='utf-8')
Expand Down Expand Up @@ -438,7 +439,8 @@ def update_POT_snapshots():
contents = input_file.read_bytes()
with temp_cwd(None):
Path(input_file.name).write_bytes(contents)
assert_python_ok('-Xutf8', Test_pygettext.script, '--docstrings', input_file.name)
assert_python_ok('-Xutf8', Test_pygettext.script, '--docstrings',
'--add-comments=i18n:', input_file.name)
output = Path('messages.pot').read_text(encoding='utf-8')

output = normalize_POT_file(output)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add support for translator comments in :program:`pygettext.py`.
100 changes: 86 additions & 14 deletions Tools/i18n/pygettext.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@
--extract-all
Extract all strings.

-cTAG
--add-comments=TAG
Extract translator comments. Comments must start with TAG and
must precede the gettext call. Multiple -cTAG options are allowed.
In that case, any comment matching any of the TAGs will be extracted.

-d name
--default-domain=name
Rename the default output file from messages.pot to name.pot.
Expand Down Expand Up @@ -140,7 +146,9 @@
import os
import sys
import time
import tokenize
from dataclasses import dataclass, field
from io import BytesIO
from operator import itemgetter

__version__ = '1.5'
Expand Down Expand Up @@ -301,12 +309,28 @@ class Message:
msgctxt: str | None
locations: set[Location] = field(default_factory=set)
is_docstring: bool = False
comments: list[str] = field(default_factory=list)

def add_location(self, filename, lineno, msgid_plural=None, *, is_docstring=False):
def add_location(self, filename, lineno, msgid_plural=None, *,
is_docstring=False, comments=None):
if self.msgid_plural is None:
self.msgid_plural = msgid_plural
self.locations.add(Location(filename, lineno))
self.is_docstring |= is_docstring
if comments:
self.comments.extend(comments)


def get_source_comments(source):
"""
Return a dictionary mapping line numbers to
comments in the source code.
"""
comments = {}
for token in tokenize.tokenize(BytesIO(source).readline):
if token.type == tokenize.COMMENT:
comments[token.start[0]] = token.string.removeprefix('#').strip()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How does xgettext handle multiple #s?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

xgettext extracts all of the following comments, while babel does not extract any:

## i18n: comment
_('foo')

# # i18n: comment
_('bar')

## #    #  i18n: comment
_('thud')

I think we can be permissive here and follow what xgettext does.

return comments


class GettextVisitor(ast.NodeVisitor):
Expand All @@ -315,10 +339,17 @@ def __init__(self, options):
self.options = options
self.filename = None
self.messages = {}
self.comments = {}

def visit_file(self, source, filename):
try:
module_tree = ast.parse(source)
except SyntaxError:
return

def visit_file(self, node, filename):
self.filename = filename
self.visit(node)
self.comments = get_source_comments(source)
self.visit(module_tree)

def visit_Module(self, node):
self._extract_docstring(node)
Expand Down Expand Up @@ -371,14 +402,51 @@ def _extract_message(self, node):
msg_data[arg_type] = arg.value

lineno = node.lineno
self._add_message(lineno, **msg_data)
comments = self._extract_comments(node)
self._add_message(lineno, **msg_data, comments=comments)

def _extract_comments(self, node):
"""Extract translator comments.

Translator comments must precede the gettext call and
start with one of the comment prefixes defined by
--add-comments=TAG. See the tests for examples.
"""
if not self.options.comment_tags:
return []

comments = []
lineno = node.lineno - 1
# Collect an unbroken sequence of comments starting from
# the line above the gettext call.
while lineno >= 1:
comment = self.comments.get(lineno)
if comment is None:
break
comments.append(comment)
lineno -= 1

# Find the first translator comment in the sequence and
# return all comments starting from that comment.
comments = comments[::-1]
first_index = next((i for i, comment in enumerate(comments)
if self._is_translator_comment(comment)), None)
if first_index is None:
return []
return comments[first_index:]

def _is_translator_comment(self, comment):
return comment.startswith(tuple(self.options.comment_tags))

def _add_message(
self, lineno, msgid, msgid_plural=None, msgctxt=None, *,
is_docstring=False):
is_docstring=False, comments=None):
if msgid in self.options.toexclude:
return

if not comments:
comments = []

key = self._key_for(msgid, msgctxt)
message = self.messages.get(key)
if message:
Expand All @@ -387,6 +455,7 @@ def _add_message(
lineno,
msgid_plural,
is_docstring=is_docstring,
comments=comments,
)
else:
self.messages[key] = Message(
Expand All @@ -395,6 +464,7 @@ def _add_message(
msgctxt=msgctxt,
locations={Location(self.filename, lineno)},
is_docstring=is_docstring,
comments=comments,
)

@staticmethod
Expand Down Expand Up @@ -434,6 +504,10 @@ def write_pot_file(messages, options, fp):

for key, locations in sorted_keys:
msg = messages[key]

for comment in msg.comments:
print(f'#. {comment}', file=fp)

if options.writelocations:
# location comments are different b/w Solaris and GNU:
if options.locationstyle == options.SOLARIS:
Expand Down Expand Up @@ -472,9 +546,9 @@ def main():
try:
opts, args = getopt.getopt(
sys.argv[1:],
'ad:DEhk:Kno:p:S:Vvw:x:X:',
['extract-all', 'default-domain=', 'escape', 'help',
'keyword=', 'no-default-keywords',
'ac:d:DEhk:Kno:p:S:Vvw:x:X:',
['extract-all', 'add-comments=', 'default-domain=', 'escape',
'help', 'keyword=', 'no-default-keywords',
'add-location', 'no-location', 'output=', 'output-dir=',
'style=', 'verbose', 'version', 'width=', 'exclude-file=',
'docstrings', 'no-docstrings',
Expand All @@ -500,6 +574,7 @@ class Options:
excludefilename = ''
docstrings = 0
nodocstrings = {}
comment_tags = set()

options = Options()
locations = {'gnu' : options.GNU,
Expand All @@ -512,6 +587,8 @@ class Options:
usage(0)
elif opt in ('-a', '--extract-all'):
options.extractall = 1
elif opt in ('-c', '--add-comments'):
options.comment_tags.add(arg)
elif opt in ('-d', '--default-domain'):
options.outfile = arg + '.pot'
elif opt in ('-E', '--escape'):
Expand Down Expand Up @@ -599,12 +676,7 @@ class Options:
with open(filename, 'rb') as fp:
source = fp.read()

try:
module_tree = ast.parse(source)
except SyntaxError:
continue

visitor.visit_file(module_tree, filename)
visitor.visit_file(source, filename)

# write the output
if options.outfile == '-':
Expand Down
Loading