|
1 | 1 | #!/usr/bin/env python3
|
2 |
| -import polib |
3 |
| -import os |
| 2 | + |
| 3 | +import argparse |
4 | 4 | import re
|
5 |
| -import sys |
6 |
| - |
7 |
| - |
8 |
| -tags = set() |
9 |
| -ignore_tags = {'<1>', '<2>', '<3>', '<4>', '<f>', '<n>', |
10 |
| - '<q>', '<r>', '<s>', '<u>', '<more>', '<empty>'} |
11 |
| -pattern = re.compile(r'<[a-z/0-9_]+>') |
12 |
| - |
13 |
| - |
14 |
| -def init_tags(): |
15 |
| - global tags |
16 |
| - pofile = polib.pofile("./lang/po/cataclysm-dda.pot") |
17 |
| - for entry in pofile: |
18 |
| - for tag in set(pattern.findall(entry.msgid)): |
19 |
| - tags.add(tag) |
20 |
| - tags = tags - ignore_tags |
21 |
| - |
22 |
| - |
23 |
| -def extract_tags(msg): |
24 |
| - matches = pattern.findall(msg) |
25 |
| - result = set() |
26 |
| - for match in matches: |
27 |
| - if match in tags: |
28 |
| - result.add(match) |
29 |
| - return result |
30 |
| - |
31 |
| - |
32 |
| -def check_message(entry): |
33 |
| - msgid = entry.msgid |
34 |
| - msgstr = entry.msgstr |
35 |
| - if not msgstr: |
36 |
| - return set() |
37 |
| - tags_msgid = extract_tags(msgid) |
38 |
| - tags_msgstr = extract_tags(msgstr) |
39 |
| - return tags_msgid - tags_msgstr |
40 |
| - |
41 |
| - |
42 |
| -def check_po_file(file): |
43 |
| - pofile = polib.pofile(file) |
44 |
| - errors = 0 |
45 |
| - for entry in pofile.translated_entries(): |
46 |
| - missing_tags = check_message(entry) |
47 |
| - if missing_tags: |
48 |
| - print("Tag(s) {} missing in translation: {} => {}".format( |
49 |
| - missing_tags, |
50 |
| - entry.msgid.replace("\n", "\\n"), |
51 |
| - entry.msgstr.replace("\n", "\\n"))) |
52 |
| - errors += 1 |
53 |
| - return errors |
54 |
| - |
55 |
| - |
56 |
| -init_tags() |
57 |
| -po_files = [] |
58 |
| -for file in sorted(os.listdir("lang/po")): |
59 |
| - if file.endswith(".po") and not file.endswith("en.po"): |
60 |
| - po_files.append(file) |
61 |
| -files_to_check = [] |
62 |
| -if len(sys.argv) == 1: |
63 |
| - files_to_check = po_files |
64 |
| -else: |
65 |
| - for i in range(1, len(sys.argv)): |
66 |
| - if sys.argv[i] + ".po" in po_files: |
67 |
| - files_to_check.append(sys.argv[i] + ".po") |
| 5 | +import pathlib |
| 6 | + |
| 7 | +import polib |
| 8 | + |
| 9 | + |
| 10 | +CHECK_TAGS = False |
| 11 | +CHECK_ELLIPSES = False |
| 12 | + |
| 13 | +PATTERN_TAG = re.compile(r"<[a-z/0-9_]+>") |
| 14 | +PATTERN_PRINTF = re.compile(r"%[0-9$.]*[diufegxscp]") |
| 15 | + |
| 16 | +PO_PATH = pathlib.Path("lang/po/") |
| 17 | +PO_FILES = dict([f.stem, f] for f in sorted(PO_PATH.glob("*.po"))) |
| 18 | +# PO_FILES = { "ar": Path("lang/po/ar.po").., } |
| 19 | + |
| 20 | + |
| 21 | +# tags that can be safely omitted in translation |
| 22 | +TAGS_OPTIONAL = ( |
| 23 | + "<zombie>", "<zombies>", |
| 24 | + "<name_b>", "<name_g>", |
| 25 | + "<freaking>", "<the_cataclysm>", |
| 26 | + "<granny_name_g>", |
| 27 | + "<mypronoun>", "<mypossesivepronoun>", |
| 28 | +) |
| 29 | + |
| 30 | +# known false-positive strings. FIXME? |
| 31 | +SKIP_STRINGS = ( |
| 32 | + "<empty>", |
| 33 | + "Even if you climb down safely, you will fall " |
| 34 | + "<color_yellow>at least %d story</color>.", |
| 35 | + "Pointed in your direction, the %s emits an IFF warning beep.", |
| 36 | + "%s points in your direction and emits an IFF warning beep.", |
| 37 | + "You hear a warning beep.", |
| 38 | +) |
| 39 | + |
| 40 | + |
| 41 | +def _f(input_list): |
| 42 | + """Formats list""" |
| 43 | + """ ['a', 'b'] => `a b` """ |
| 44 | + |
| 45 | + if not input_list: |
| 46 | + return "none" |
| 47 | + if type(input_list) is str: |
| 48 | + return f"`{input_list}`" |
| 49 | + return f"`{' '.join(input_list)}`" |
| 50 | + |
| 51 | + |
| 52 | +########################## |
| 53 | +# ELLIPSES CHECK # |
| 54 | +########################## |
| 55 | + |
| 56 | + |
| 57 | +def check_ellipses(entry): |
| 58 | + """Checks if translations use three dots instead of the ellipsis symbol""" |
| 59 | + |
| 60 | + msgstr = entry.msgstr or entry.msgstr_plural[0] |
| 61 | + if ("…" in entry.msgid) and \ |
| 62 | + ("..." in msgstr): |
| 63 | + return [" * It's recommended to use the `…` symbol " |
| 64 | + "instead of three dots."] |
| 65 | + return [] |
| 66 | + |
| 67 | + |
| 68 | +########################## |
| 69 | +# PRINTF CHECK # |
| 70 | +########################## |
| 71 | + |
| 72 | + |
| 73 | +def _is_mixed(tags): |
| 74 | + """Checks if the list of tags contains mixed arguments,""" |
| 75 | + """non-positional %s and positional %1$s at the same time.""" |
| 76 | + |
| 77 | + have_positional = 0 |
| 78 | + have_nonpositional = 0 |
| 79 | + |
| 80 | + for tag in tags: |
| 81 | + if "$" in tag: |
| 82 | + have_positional += 1 |
68 | 83 | else:
|
69 |
| - print("Warning: Unknown language", sys.argv[i]) |
70 |
| -num_errors = 0 |
71 |
| -for file in sorted(files_to_check): |
72 |
| - print("Checking {}".format(file)) |
73 |
| - num_errors += check_po_file("lang/po/" + file) |
| 84 | + have_nonpositional += 1 |
| 85 | + |
| 86 | + return have_positional and have_nonpositional |
| 87 | + |
| 88 | + |
| 89 | +def _make_pos_arg(tags): |
| 90 | + """Changes arguments to positional ones.""" |
| 91 | + |
| 92 | + pos_tags = [] |
| 93 | + |
| 94 | + for i, tag in enumerate(tags, 1): |
| 95 | + if "$" in tag: |
| 96 | + pos_tags.append(tag) |
| 97 | + continue |
| 98 | + tag = f"%{i}${tag.split('%')[1]}" |
| 99 | + pos_tags.append(tag) |
| 100 | + |
| 101 | + return sorted(pos_tags) |
| 102 | + |
| 103 | + |
| 104 | +def check_printf(entry): |
| 105 | + """Checks whether formatting arguments %s/%1$d""" |
| 106 | + """in the source string and in the translation match.""" |
| 107 | + |
| 108 | + # error messages |
| 109 | + msgs = [] |
| 110 | + |
| 111 | + # find all <tags> in the original string |
| 112 | + tags_msgid = PATTERN_PRINTF.findall(entry.msgid) |
| 113 | + # find all <tags> in the translated string |
| 114 | + if entry.msgstr: |
| 115 | + tags_msgstr = PATTERN_PRINTF.findall(entry.msgstr) |
| 116 | + # find all <tags> for the plural translated strings |
| 117 | + elif entry.msgstr_plural: |
| 118 | + tags_msgstr_pl = [PATTERN_PRINTF.findall(v) |
| 119 | + for v in entry.msgstr_plural.values()] |
| 120 | + tags_msgstr = tags_msgstr_pl[0] |
| 121 | + |
| 122 | + # assume that all strings should have the same set of arguments |
| 123 | + for tag in tags_msgstr_pl: |
| 124 | + if tag != tags_msgstr: |
| 125 | + msgs.append(" * Plural strings have" |
| 126 | + " different sets of arguments;") |
| 127 | + break |
| 128 | + |
| 129 | + # strings must not have %s and %1$s at the same time |
| 130 | + if _is_mixed(tags_msgid) or _is_mixed(tags_msgstr): |
| 131 | + msgs.append(" * Cannot mix positional and non-positional " |
| 132 | + "arguments in format string;") |
| 133 | + if msgs: |
| 134 | + return msgs |
| 135 | + |
| 136 | + # if everything else is good |
| 137 | + if tags_msgid == tags_msgstr: |
| 138 | + return msgs |
| 139 | + # and if everything is ok when replacing with positional arguments |
| 140 | + tags_msgid_pos = _make_pos_arg(tags_msgid) |
| 141 | + if tags_msgid_pos == _make_pos_arg(tags_msgstr): |
| 142 | + return msgs |
| 143 | + |
| 144 | + # check if the number of arguments matches |
| 145 | + if len(tags_msgid) == len(tags_msgstr): |
| 146 | + # if non-positional arguments are swapped |
| 147 | + # then suggest positional syntax |
| 148 | + if sorted(tags_msgid) == sorted(tags_msgstr): |
| 149 | + msgs.append(" * The types of arguments differ;\n" |
| 150 | + " Use positional syntax:" |
| 151 | + f" {_f(tags_msgid)} => {_f(tags_msgid_pos)};") |
| 152 | + # otherwise it's most likely just a typo |
| 153 | + else: |
| 154 | + msgs.append(" * The types of arguments differ:" |
| 155 | + f" {_f(tags_msgid)} and {_f(tags_msgstr)};") |
| 156 | + else: |
| 157 | + msgs.append(" * The number of arguments differ:" |
| 158 | + f" {_f(tags_msgid)} and {_f(tags_msgstr)};") |
| 159 | + |
| 160 | + return msgs |
| 161 | + |
| 162 | + |
| 163 | +########################## |
| 164 | +# TAGS CHECK # |
| 165 | +########################## |
| 166 | + |
| 167 | + |
| 168 | +def check_tags(entry): |
| 169 | + """Checks if <tags> in the source string match those in the translation.""" |
| 170 | + |
| 171 | + # error messages |
| 172 | + msgs = [] |
| 173 | + |
| 174 | + # find all <tags> in the original string |
| 175 | + tags_msgid = sorted(PATTERN_TAG.findall(entry.msgid)) |
| 176 | + # find all <tags> in the translated string |
| 177 | + if entry.msgstr: |
| 178 | + tags_msgstr = sorted(PATTERN_TAG.findall(entry.msgstr)) |
| 179 | + # find all <tags> for the plural translated strings |
| 180 | + elif entry.msgstr_plural: |
| 181 | + tags_msgstr_pl = [sorted(PATTERN_TAG.findall(v)) |
| 182 | + for v in entry.msgstr_plural.values()] |
| 183 | + tags_msgstr = tags_msgstr_pl[0] |
| 184 | + |
| 185 | + # assume that all strings should have the same set of tags |
| 186 | + for tag in tags_msgstr_pl: |
| 187 | + if tag != tags_msgstr: |
| 188 | + msgs.append(" * Plural strings have different sets of tags;") |
| 189 | + break |
| 190 | + |
| 191 | + # if everything else is good |
| 192 | + if tags_msgid == tags_msgstr: |
| 193 | + return msgs |
| 194 | + |
| 195 | + # remove common <tags> |
| 196 | + # both lists will contain only tags that are unique to them |
| 197 | + for tag in reversed(tags_msgid): |
| 198 | + if tag in reversed(tags_msgstr): |
| 199 | + tags_msgid.remove(tag) |
| 200 | + tags_msgstr.remove(tag) |
| 201 | + |
| 202 | + # filter optional <tags> |
| 203 | + for tag in reversed(tags_msgid): |
| 204 | + if tag in TAGS_OPTIONAL: |
| 205 | + if CHECK_TAGS: |
| 206 | + msgs.append(f" * Missing OPTIONAL tag(s): {_f(tag)};") |
| 207 | + tags_msgid.remove(tag) |
| 208 | + |
| 209 | + # gather error messages to the list and return it |
| 210 | + if tags_msgid: |
| 211 | + msgs.append(f" * Missing tag(s): {_f(tags_msgid)};") |
| 212 | + if tags_msgstr: |
| 213 | + msgs.append(f" * Bad/extra tag(s): {_f(tags_msgstr)};") |
| 214 | + return msgs |
| 215 | + |
| 216 | + |
| 217 | +########################## |
| 218 | +# MAIN FUNCTIONS # |
| 219 | +########################## |
| 220 | + |
| 221 | + |
| 222 | +def check_po_file(input_path): |
| 223 | + """Loads a file and checks each line.""" |
| 224 | + |
| 225 | + print(f"## Checking {input_path}") |
| 226 | + po = polib.pofile(input_path) |
| 227 | + |
| 228 | + error_list = [] |
| 229 | + |
| 230 | + for entry in po.translated_entries(): |
| 231 | + |
| 232 | + # skip known false-positive strings |
| 233 | + if entry.msgid in SKIP_STRINGS: |
| 234 | + continue |
| 235 | + |
| 236 | + msgs = [] |
| 237 | + if error := check_tags(entry): |
| 238 | + msgs += error |
| 239 | + if error := check_printf(entry): |
| 240 | + msgs += error |
| 241 | + if CHECK_ELLIPSES: |
| 242 | + if error := check_ellipses(entry): |
| 243 | + msgs += error |
| 244 | + |
| 245 | + if msgs: |
| 246 | + msgs.append(f"```\n{entry}```") |
| 247 | + error_list.append("\n".join(msgs)) |
| 248 | + |
| 249 | + if error_list: |
| 250 | + print(f" > Total: {len(error_list)} warnings.") |
| 251 | + print("\n".join(error_list), end="") |
| 252 | + else: |
| 253 | + print(" * Everything looks fine.") |
74 | 254 | print()
|
75 |
| -exit(num_errors) |
| 255 | + |
| 256 | + |
| 257 | +def main(locale_list): |
| 258 | + """Main function.""" |
| 259 | + |
| 260 | + if not pathlib.Path(".").cwd().match("Cataclysm-DDA/"): |
| 261 | + print("You must run the script from" |
| 262 | + " the root directory 'Cataclysm-DDA/'") |
| 263 | + return |
| 264 | + |
| 265 | + if locale_list: |
| 266 | + for locale in locale_list: |
| 267 | + if f := PO_FILES.get(locale): |
| 268 | + check_po_file(f) |
| 269 | + else: |
| 270 | + print(f"## Can't find locale '{locale}'. Skip.") |
| 271 | + else: |
| 272 | + for f in PO_FILES.values(): |
| 273 | + check_po_file(f) |
| 274 | + |
| 275 | + |
| 276 | +def _parse_args(): |
| 277 | + argparser = argparse.ArgumentParser( |
| 278 | + description="Validates translation files.", |
| 279 | + epilog=f"Available locales: {', '.join(PO_FILES.keys())}") |
| 280 | + argparser.add_argument("locale", nargs="*", |
| 281 | + metavar="locale-name", |
| 282 | + help="locale or list of locales") |
| 283 | + argparser.add_argument("-c1", "--check-tags", action="store_true", |
| 284 | + help="also check optional tags") |
| 285 | + argparser.add_argument("-c2", "--check-ellipses", action="store_true", |
| 286 | + help="also check ellipses") |
| 287 | + return argparser.parse_args() |
| 288 | + |
| 289 | + |
| 290 | +if __name__ == "__main__": |
| 291 | + args = _parse_args() |
| 292 | + CHECK_TAGS = args.check_tags |
| 293 | + CHECK_ELLIPSES = args.check_ellipses |
| 294 | + main(args.locale) |
0 commit comments