|
| 1 | +#!/usr/bin/env python3 |
| 2 | +"""Python script used to provide a list of translation contributors and |
| 3 | +translation completion information for each locale (language). |
| 4 | +""" |
| 5 | +# Version 1.0 |
| 6 | +# Copyright (C) 2025 Bob Swift |
| 7 | + |
| 8 | +import os |
| 9 | +import re |
| 10 | +import subprocess |
| 11 | + |
| 12 | +from conf import locale_dirs |
| 13 | + |
| 14 | + |
| 15 | +# Set to True to print the lines from the gitlog output and the list |
| 16 | +# of authors for each file checked. This is a very lengthy output |
| 17 | +# and is best redirected to a log file for review. |
| 18 | +DEBUG = False |
| 19 | + |
| 20 | +# Aliases to match to avoid duplicate credits under different names. |
| 21 | +ALIASES = { |
| 22 | + 'bob': 'Bob Swift', |
| 23 | + 'phw': 'Philipp Wolfer', |
| 24 | + 'rdswift': 'Bob Swift', |
| 25 | +} |
| 26 | + |
| 27 | +# Contributor names to ignore (lower case) |
| 28 | +IGNORE = { |
| 29 | + 'anonymous', |
| 30 | + 'hosted weblate', |
| 31 | + 'languages add-on', |
| 32 | + 'weblate', |
| 33 | +} |
| 34 | + |
| 35 | +# Used to restrict the applicable languages for selected authors. This is |
| 36 | +# necessary because some authors appear on commits for all translation files, |
| 37 | +# even if they did not contribute a translation for a locale (language). |
| 38 | +OK_LANGUAGES = { |
| 39 | + 'Bob Swift': {'en', 'fr'}, |
| 40 | + 'Philipp Wolfer': {'de'}, |
| 41 | +} |
| 42 | + |
| 43 | +# Domains to omit from the translator credits. |
| 44 | +BAD_DOMAINS = set(['hostux.ninja']) |
| 45 | + |
| 46 | +# Regular expressions used |
| 47 | +RE_LANGUAGE = re.compile(r'^.*/(?P<language>[^/]+)/LC_MESSAGES') |
| 48 | +RE_GITLOG = re.compile(r'^(?P<email>[^¤]*)¤(?P<name>.*)$') |
| 49 | +RE_TEAM = re.compile(r'^"Language-Team: (?P<team>[^<\\]*)') |
| 50 | + |
| 51 | + |
| 52 | +###################################################################################### |
| 53 | + |
| 54 | +def get_domain(email: str) -> str: |
| 55 | + """Extract the domain portion of an email address. |
| 56 | +
|
| 57 | + Args: |
| 58 | + email (str): Email address to process. |
| 59 | +
|
| 60 | + Returns: |
| 61 | + str: Domain portion of the address if it exists, otherwise an empty string. |
| 62 | + """ |
| 63 | + return email.split('@', maxsplit=1)[1].strip() if '@' in email else '' |
| 64 | + |
| 65 | + |
| 66 | +###################################################################################### |
| 67 | + |
| 68 | +def extract_authors_from_gitlog(path: str, debug: bool = False) -> set: |
| 69 | + """Read the git commit log and extract the list of authors for a file. |
| 70 | +
|
| 71 | + Args: |
| 72 | + path (str): Path of the file to check. |
| 73 | + debug (bool, optional): Print debug information while processing. Defaults to False. |
| 74 | +
|
| 75 | + Returns: |
| 76 | + set: Set containing the authors for the file. |
| 77 | + """ |
| 78 | + |
| 79 | + authors = set() |
| 80 | + cmd = ['git', 'log', r'--pretty=format:%aE¤%aN', r'--', path] |
| 81 | + result = subprocess.run(cmd, stdout=subprocess.PIPE, timeout=30, check=False) |
| 82 | + if result.returncode == 0: |
| 83 | + for line in result.stdout.decode('utf-8').split("\n"): |
| 84 | + if debug: |
| 85 | + print(f"Checking: {line}") |
| 86 | + matched = RE_GITLOG.search(line) |
| 87 | + if matched: |
| 88 | + author = matched.group('name') |
| 89 | + email = matched.group('email') |
| 90 | + # Get standard name for the author if there is an alias. |
| 91 | + for c in (f"{author} <{email}>", email, author): |
| 92 | + if c in ALIASES: |
| 93 | + author = ALIASES[c] |
| 94 | + break |
| 95 | + # Only add author if name and email domain are not blocked. |
| 96 | + if author.lower() not in IGNORE and get_domain(email) not in BAD_DOMAINS: |
| 97 | + authors.add(author) |
| 98 | + if debug: |
| 99 | + print(f"Authors: {', '.join(sorted(authors))}") |
| 100 | + return authors |
| 101 | + |
| 102 | + |
| 103 | +###################################################################################### |
| 104 | + |
| 105 | +def get_translation_counts(file: str, translation_counts: tuple, language: str, language_titles: dict) -> tuple: |
| 106 | + """Update the counts of total strings to translate and translated strings |
| 107 | + with counts from the specified file. Also updates the language title dictionary. |
| 108 | +
|
| 109 | + Args: |
| 110 | + file (str): Translation file to process. |
| 111 | + translation_counts (tuple): Starting value of (total translation stings, number of translated strings) for the language. |
| 112 | + language (str): Localization (language) code for the file. |
| 113 | + language_titles (dict): Language titles dictionary to update. |
| 114 | +
|
| 115 | + Returns: |
| 116 | + tuple: Updated tuple of (total translation stings, number of translated strings) for the language. |
| 117 | + """ |
| 118 | + total, translated = translation_counts |
| 119 | + processing = False |
| 120 | + # Read the translation file into an array to allow accessing a line multiple times. |
| 121 | + with open(file, 'r', encoding='utf8') as f: |
| 122 | + lines = f.readlines() |
| 123 | + |
| 124 | + line_count = len(lines) |
| 125 | + line_num = 0 |
| 126 | + while line_num < line_count: |
| 127 | + line = str(lines[line_num]).strip() |
| 128 | + line_num += 1 |
| 129 | + |
| 130 | + # Ignore blank lines |
| 131 | + if not line: |
| 132 | + continue |
| 133 | + |
| 134 | + # Get the locale (language) title if it isn't already set. |
| 135 | + if line.startswith('"Language-Team:') and not language_titles[language]: |
| 136 | + matches = RE_TEAM.match(line) |
| 137 | + text = matches.group('team').strip() |
| 138 | + if text and text.lower() != 'none': |
| 139 | + language_titles[language] = text |
| 140 | + |
| 141 | + # Get the translation key. |
| 142 | + if line.startswith('msgid "'): |
| 143 | + processing = False |
| 144 | + text = line.strip()[7:-1] |
| 145 | + # Append to text from continuation lines. |
| 146 | + while line_num < line_count and str(lines[line_num]).strip() and str(lines[line_num]).startswith('"'): |
| 147 | + text += str(lines[line_num]).strip()[1:-1] |
| 148 | + line_num += 1 |
| 149 | + # Only update count if the translation key is not empty. |
| 150 | + if text.strip(): |
| 151 | + total += 1 |
| 152 | + processing = True |
| 153 | + |
| 154 | + # Get the translation value. |
| 155 | + if processing and line.startswith('msgstr "'): |
| 156 | + text = line.strip()[8:-1] |
| 157 | + # Append to text from continuation lines. |
| 158 | + while line_num < line_count and str(lines[line_num]).strip() and str(lines[line_num]).startswith('"'): |
| 159 | + text += str(lines[line_num]).strip()[1:-1] |
| 160 | + line_num += 1 |
| 161 | + # Only update count if the translation value is not empty. |
| 162 | + if text.strip(): |
| 163 | + translated += 1 |
| 164 | + processing = False |
| 165 | + |
| 166 | + return total, translated |
| 167 | + |
| 168 | + |
| 169 | +###################################################################################### |
| 170 | + |
| 171 | +def get_po_files(): |
| 172 | + """Gets the translation files to process. |
| 173 | +
|
| 174 | + Yields: |
| 175 | + Iterable: Iterable of tuples: (locale, file path). |
| 176 | + """ |
| 177 | + for base_path in locale_dirs: |
| 178 | + for _path, _dirs, _files in os.walk(base_path): |
| 179 | + matches = RE_LANGUAGE.search(_path) |
| 180 | + if not matches: |
| 181 | + continue |
| 182 | + language = matches.group('language') |
| 183 | + for _file in _files: |
| 184 | + # Only include translation *.po files. |
| 185 | + if not _file.endswith('.po'): |
| 186 | + continue |
| 187 | + filepath = os.path.join(_path, _file) |
| 188 | + |
| 189 | + yield language, filepath |
| 190 | + |
| 191 | + |
| 192 | +###################################################################################### |
| 193 | + |
| 194 | +def main() -> None: |
| 195 | + """Main processing method. Reviews the git history for all translation files and |
| 196 | + prints: |
| 197 | +
|
| 198 | + 1) A list of all translators and the locales they translated; and |
| 199 | + 2) A list of the locales that have translations, and the translators contributing |
| 200 | + to that locale. |
| 201 | + """ |
| 202 | + translators = {} |
| 203 | + languages = {} |
| 204 | + completion = {} |
| 205 | + language_titles = {} |
| 206 | + |
| 207 | + for item in get_po_files(): |
| 208 | + language, filepath = item |
| 209 | + |
| 210 | + text = f"Processing: {filepath}{' ' * 79}"[:79] |
| 211 | + print(f"{text}", end='\r', flush=True) |
| 212 | + |
| 213 | + if language not in language_titles: |
| 214 | + language_titles[language] = '' |
| 215 | + |
| 216 | + # Tuple of (total translation stings, number of translated strings) |
| 217 | + completion[language] = get_translation_counts( |
| 218 | + filepath, completion[language] if language in completion else (0, 0), |
| 219 | + language, language_titles) |
| 220 | + |
| 221 | + # Example of only displaying debug output for a single locale. |
| 222 | + # authors = extract_authors_from_gitlog(filepath, debug=language == 'pt_BR') |
| 223 | + authors = extract_authors_from_gitlog(filepath, debug=DEBUG) |
| 224 | + for author in authors: |
| 225 | + # Don't add authors that have language restrictions |
| 226 | + if author in OK_LANGUAGES and not any(language.startswith(i) for i in OK_LANGUAGES[author]): |
| 227 | + continue |
| 228 | + |
| 229 | + if author not in translators: |
| 230 | + translators[author] = set() |
| 231 | + translators[author].add(language) |
| 232 | + |
| 233 | + if language not in languages: |
| 234 | + languages[language] = set() |
| 235 | + languages[language].add(author) |
| 236 | + |
| 237 | + print(' ' * 79, end='\r', flush=True) |
| 238 | + |
| 239 | + def _name_sorter(key: str) -> str: |
| 240 | + """Parses the name to return a sort key based on last name. |
| 241 | +
|
| 242 | + Args: |
| 243 | + key (str): Name to parse. |
| 244 | +
|
| 245 | + Returns: |
| 246 | + str: Lower case name in the format "last name, first name". |
| 247 | + """ |
| 248 | + parts = str(key).lower().split() |
| 249 | + return f"{parts[-1]}{(', ' + ' '.join(parts[:-1])) if len(parts) > 1 else ''}" |
| 250 | + |
| 251 | + print("\nTranslators:") |
| 252 | + for author in sorted(translators.keys(), key=_name_sorter): |
| 253 | + print(f" - {author} ({', '.join(sorted(translators[author]))})") |
| 254 | + |
| 255 | + print(f"\n{'-' * 79}") |
| 256 | + |
| 257 | + for language in sorted(languages.keys()): |
| 258 | + total, translated = completion[language] |
| 259 | + completed = round(100 * translated / total, 1) |
| 260 | + print( |
| 261 | + f"\nLanguage: [{language}] " |
| 262 | + f"{language_titles[language] if language in language_titles and language_titles[language] else 'Unknown Language'}\n" |
| 263 | + f"Progress: Translated {translated:,} of {total:,} strings ({completed}%)" |
| 264 | + ) |
| 265 | + for author in sorted(languages[language], key=_name_sorter): |
| 266 | + print(f" - {author}") |
| 267 | + |
| 268 | + |
| 269 | +###################################################################################### |
| 270 | + |
| 271 | +if __name__ == '__main__': |
| 272 | + main() |
0 commit comments