Skip to content

Commit 7dd4ff3

Browse files
committed
Add a script to produce translation information:
- Extract a list of translation contributors from the commit history. - Provide a list of translators including the locales (languages) to which they contributed. - Provide a list of translators and the total translation information (number of strings translated, total number of strings, and percent completion)for each local (language).
1 parent 8b67732 commit 7dd4ff3

File tree

1 file changed

+272
-0
lines changed

1 file changed

+272
-0
lines changed

translators.py

Lines changed: 272 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,272 @@
1+
#!/usr/bin/env python3
2+
"""Python script used to provide a list of translation contributors and
3+
translation completion information for each locale (language).
4+
"""
5+
# Version 1.0
6+
# Copyright (C) 2025 Bob Swift
7+
8+
import os
9+
import re
10+
import subprocess
11+
12+
from conf import locale_dirs
13+
14+
15+
# Set to True to print the lines from the gitlog output and the list
16+
# of authors for each file checked. This is a very lengthy output
17+
# and is best redirected to a log file for review.
18+
DEBUG = False
19+
20+
# Aliases to match to avoid duplicate credits under different names.
21+
ALIASES = {
22+
'bob': 'Bob Swift',
23+
'phw': 'Philipp Wolfer',
24+
'rdswift': 'Bob Swift',
25+
}
26+
27+
# Contributor names to ignore (lower case)
28+
IGNORE = {
29+
'anonymous',
30+
'hosted weblate',
31+
'languages add-on',
32+
'weblate',
33+
}
34+
35+
# Used to restrict the applicable languages for selected authors. This is
36+
# necessary because some authors appear on commits for all translation files,
37+
# even if they did not contribute a translation for a locale (language).
38+
OK_LANGUAGES = {
39+
'Bob Swift': {'en', 'fr'},
40+
'Philipp Wolfer': {'de'},
41+
}
42+
43+
# Domains to omit from the translator credits.
44+
BAD_DOMAINS = set(['hostux.ninja'])
45+
46+
# Regular expressions used
47+
RE_LANGUAGE = re.compile(r'^.*/(?P<language>[^/]+)/LC_MESSAGES')
48+
RE_GITLOG = re.compile(r'^(?P<email>[^¤]*)¤(?P<name>.*)$')
49+
RE_TEAM = re.compile(r'^"Language-Team: (?P<team>[^<\\]*)')
50+
51+
52+
######################################################################################
53+
54+
def get_domain(email: str) -> str:
55+
"""Extract the domain portion of an email address.
56+
57+
Args:
58+
email (str): Email address to process.
59+
60+
Returns:
61+
str: Domain portion of the address if it exists, otherwise an empty string.
62+
"""
63+
return email.split('@', maxsplit=1)[1].strip() if '@' in email else ''
64+
65+
66+
######################################################################################
67+
68+
def extract_authors_from_gitlog(path: str, debug: bool = False) -> set:
69+
"""Read the git commit log and extract the list of authors for a file.
70+
71+
Args:
72+
path (str): Path of the file to check.
73+
debug (bool, optional): Print debug information while processing. Defaults to False.
74+
75+
Returns:
76+
set: Set containing the authors for the file.
77+
"""
78+
79+
authors = set()
80+
cmd = ['git', 'log', r'--pretty=format:%aE¤%aN', r'--', path]
81+
result = subprocess.run(cmd, stdout=subprocess.PIPE, timeout=30, check=False)
82+
if result.returncode == 0:
83+
for line in result.stdout.decode('utf-8').split("\n"):
84+
if debug:
85+
print(f"Checking: {line}")
86+
matched = RE_GITLOG.search(line)
87+
if matched:
88+
author = matched.group('name')
89+
email = matched.group('email')
90+
# Get standard name for the author if there is an alias.
91+
for c in (f"{author} <{email}>", email, author):
92+
if c in ALIASES:
93+
author = ALIASES[c]
94+
break
95+
# Only add author if name and email domain are not blocked.
96+
if author.lower() not in IGNORE and get_domain(email) not in BAD_DOMAINS:
97+
authors.add(author)
98+
if debug:
99+
print(f"Authors: {', '.join(sorted(authors))}")
100+
return authors
101+
102+
103+
######################################################################################
104+
105+
def get_translation_counts(file: str, translation_counts: tuple, language: str, language_titles: dict) -> tuple:
106+
"""Update the counts of total strings to translate and translated strings
107+
with counts from the specified file. Also updates the language title dictionary.
108+
109+
Args:
110+
file (str): Translation file to process.
111+
translation_counts (tuple): Starting value of (total translation stings, number of translated strings) for the language.
112+
language (str): Localization (language) code for the file.
113+
language_titles (dict): Language titles dictionary to update.
114+
115+
Returns:
116+
tuple: Updated tuple of (total translation stings, number of translated strings) for the language.
117+
"""
118+
total, translated = translation_counts
119+
processing = False
120+
# Read the translation file into an array to allow accessing a line multiple times.
121+
with open(file, 'r', encoding='utf8') as f:
122+
lines = f.readlines()
123+
124+
line_count = len(lines)
125+
line_num = 0
126+
while line_num < line_count:
127+
line = str(lines[line_num]).strip()
128+
line_num += 1
129+
130+
# Ignore blank lines
131+
if not line:
132+
continue
133+
134+
# Get the locale (language) title if it isn't already set.
135+
if line.startswith('"Language-Team:') and not language_titles[language]:
136+
matches = RE_TEAM.match(line)
137+
text = matches.group('team').strip()
138+
if text and text.lower() != 'none':
139+
language_titles[language] = text
140+
141+
# Get the translation key.
142+
if line.startswith('msgid "'):
143+
processing = False
144+
text = line.strip()[7:-1]
145+
# Append to text from continuation lines.
146+
while line_num < line_count and str(lines[line_num]).strip() and str(lines[line_num]).startswith('"'):
147+
text += str(lines[line_num]).strip()[1:-1]
148+
line_num += 1
149+
# Only update count if the translation key is not empty.
150+
if text.strip():
151+
total += 1
152+
processing = True
153+
154+
# Get the translation value.
155+
if processing and line.startswith('msgstr "'):
156+
text = line.strip()[8:-1]
157+
# Append to text from continuation lines.
158+
while line_num < line_count and str(lines[line_num]).strip() and str(lines[line_num]).startswith('"'):
159+
text += str(lines[line_num]).strip()[1:-1]
160+
line_num += 1
161+
# Only update count if the translation value is not empty.
162+
if text.strip():
163+
translated += 1
164+
processing = False
165+
166+
return total, translated
167+
168+
169+
######################################################################################
170+
171+
def get_po_files():
172+
"""Gets the translation files to process.
173+
174+
Yields:
175+
Iterable: Iterable of tuples: (locale, file path).
176+
"""
177+
for base_path in locale_dirs:
178+
for _path, _dirs, _files in os.walk(base_path):
179+
matches = RE_LANGUAGE.search(_path)
180+
if not matches:
181+
continue
182+
language = matches.group('language')
183+
for _file in _files:
184+
# Only include translation *.po files.
185+
if not _file.endswith('.po'):
186+
continue
187+
filepath = os.path.join(_path, _file)
188+
189+
yield language, filepath
190+
191+
192+
######################################################################################
193+
194+
def main() -> None:
195+
"""Main processing method. Reviews the git history for all translation files and
196+
prints:
197+
198+
1) A list of all translators and the locales they translated; and
199+
2) A list of the locales that have translations, and the translators contributing
200+
to that locale.
201+
"""
202+
translators = {}
203+
languages = {}
204+
completion = {}
205+
language_titles = {}
206+
207+
for item in get_po_files():
208+
language, filepath = item
209+
210+
text = f"Processing: {filepath}{' ' * 79}"[:79]
211+
print(f"{text}", end='\r', flush=True)
212+
213+
if language not in language_titles:
214+
language_titles[language] = ''
215+
216+
# Tuple of (total translation stings, number of translated strings)
217+
completion[language] = get_translation_counts(
218+
filepath, completion[language] if language in completion else (0, 0),
219+
language, language_titles)
220+
221+
# Example of only displaying debug output for a single locale.
222+
# authors = extract_authors_from_gitlog(filepath, debug=language == 'pt_BR')
223+
authors = extract_authors_from_gitlog(filepath, debug=DEBUG)
224+
for author in authors:
225+
# Don't add authors that have language restrictions
226+
if author in OK_LANGUAGES and not any(language.startswith(i) for i in OK_LANGUAGES[author]):
227+
continue
228+
229+
if author not in translators:
230+
translators[author] = set()
231+
translators[author].add(language)
232+
233+
if language not in languages:
234+
languages[language] = set()
235+
languages[language].add(author)
236+
237+
print(' ' * 79, end='\r', flush=True)
238+
239+
def _name_sorter(key: str) -> str:
240+
"""Parses the name to return a sort key based on last name.
241+
242+
Args:
243+
key (str): Name to parse.
244+
245+
Returns:
246+
str: Lower case name in the format "last name, first name".
247+
"""
248+
parts = str(key).lower().split()
249+
return f"{parts[-1]}{(', ' + ' '.join(parts[:-1])) if len(parts) > 1 else ''}"
250+
251+
print("\nTranslators:")
252+
for author in sorted(translators.keys(), key=_name_sorter):
253+
print(f" - {author} ({', '.join(sorted(translators[author]))})")
254+
255+
print(f"\n{'-' * 79}")
256+
257+
for language in sorted(languages.keys()):
258+
total, translated = completion[language]
259+
completed = round(100 * translated / total, 1)
260+
print(
261+
f"\nLanguage: [{language}] "
262+
f"{language_titles[language] if language in language_titles and language_titles[language] else 'Unknown Language'}\n"
263+
f"Progress: Translated {translated:,} of {total:,} strings ({completed}%)"
264+
)
265+
for author in sorted(languages[language], key=_name_sorter):
266+
print(f" - {author}")
267+
268+
269+
######################################################################################
270+
271+
if __name__ == '__main__':
272+
main()

0 commit comments

Comments
 (0)