Skip to content

Commit 6d28f7d

Browse files
authored
Update translation validator script (#82697)
2 parents 38de542 + 12ea68e commit 6d28f7d

File tree

1 file changed

+290
-71
lines changed

1 file changed

+290
-71
lines changed

tools/check_translation_tags.py

100644100755
Lines changed: 290 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -1,75 +1,294 @@
11
#!/usr/bin/env python3
2-
import polib
3-
import os
2+
3+
import argparse
44
import re
5-
import sys
6-
7-
8-
tags = set()
9-
ignore_tags = {'<1>', '<2>', '<3>', '<4>', '<f>', '<n>',
10-
'<q>', '<r>', '<s>', '<u>', '<more>', '<empty>'}
11-
pattern = re.compile(r'<[a-z/0-9_]+>')
12-
13-
14-
def init_tags():
15-
global tags
16-
pofile = polib.pofile("./lang/po/cataclysm-dda.pot")
17-
for entry in pofile:
18-
for tag in set(pattern.findall(entry.msgid)):
19-
tags.add(tag)
20-
tags = tags - ignore_tags
21-
22-
23-
def extract_tags(msg):
24-
matches = pattern.findall(msg)
25-
result = set()
26-
for match in matches:
27-
if match in tags:
28-
result.add(match)
29-
return result
30-
31-
32-
def check_message(entry):
33-
msgid = entry.msgid
34-
msgstr = entry.msgstr
35-
if not msgstr:
36-
return set()
37-
tags_msgid = extract_tags(msgid)
38-
tags_msgstr = extract_tags(msgstr)
39-
return tags_msgid - tags_msgstr
40-
41-
42-
def check_po_file(file):
43-
pofile = polib.pofile(file)
44-
errors = 0
45-
for entry in pofile.translated_entries():
46-
missing_tags = check_message(entry)
47-
if missing_tags:
48-
print("Tag(s) {} missing in translation: {} => {}".format(
49-
missing_tags,
50-
entry.msgid.replace("\n", "\\n"),
51-
entry.msgstr.replace("\n", "\\n")))
52-
errors += 1
53-
return errors
54-
55-
56-
init_tags()
57-
po_files = []
58-
for file in sorted(os.listdir("lang/po")):
59-
if file.endswith(".po") and not file.endswith("en.po"):
60-
po_files.append(file)
61-
files_to_check = []
62-
if len(sys.argv) == 1:
63-
files_to_check = po_files
64-
else:
65-
for i in range(1, len(sys.argv)):
66-
if sys.argv[i] + ".po" in po_files:
67-
files_to_check.append(sys.argv[i] + ".po")
5+
import pathlib
6+
7+
import polib
8+
9+
10+
CHECK_TAGS = False
11+
CHECK_ELLIPSES = False
12+
13+
PATTERN_TAG = re.compile(r"<[a-z/0-9_]+>")
14+
PATTERN_PRINTF = re.compile(r"%[0-9$.]*[diufegxscp]")
15+
16+
PO_PATH = pathlib.Path("lang/po/")
17+
PO_FILES = dict([f.stem, f] for f in sorted(PO_PATH.glob("*.po")))
18+
# PO_FILES = { "ar": Path("lang/po/ar.po").., }
19+
20+
21+
# tags that can be safely omitted in translation
22+
TAGS_OPTIONAL = (
23+
"<zombie>", "<zombies>",
24+
"<name_b>", "<name_g>",
25+
"<freaking>", "<the_cataclysm>",
26+
"<granny_name_g>",
27+
"<mypronoun>", "<mypossesivepronoun>",
28+
)
29+
30+
# known false-positive strings. FIXME?
31+
SKIP_STRINGS = (
32+
"<empty>",
33+
"Even if you climb down safely, you will fall "
34+
"<color_yellow>at least %d story</color>.",
35+
"Pointed in your direction, the %s emits an IFF warning beep.",
36+
"%s points in your direction and emits an IFF warning beep.",
37+
"You hear a warning beep.",
38+
)
39+
40+
41+
def _f(input_list):
42+
"""Formats list"""
43+
""" ['a', 'b'] => `a b` """
44+
45+
if not input_list:
46+
return "none"
47+
if type(input_list) is str:
48+
return f"`{input_list}`"
49+
return f"`{' '.join(input_list)}`"
50+
51+
52+
##########################
53+
# ELLIPSES CHECK #
54+
##########################
55+
56+
57+
def check_ellipses(entry):
58+
"""Checks if translations use three dots instead of the ellipsis symbol"""
59+
60+
msgstr = entry.msgstr or entry.msgstr_plural[0]
61+
if ("…" in entry.msgid) and \
62+
("..." in msgstr):
63+
return [" * It's recommended to use the `…` symbol "
64+
"instead of three dots."]
65+
return []
66+
67+
68+
##########################
69+
# PRINTF CHECK #
70+
##########################
71+
72+
73+
def _is_mixed(tags):
74+
"""Checks if the list of tags contains mixed arguments,"""
75+
"""non-positional %s and positional %1$s at the same time."""
76+
77+
have_positional = 0
78+
have_nonpositional = 0
79+
80+
for tag in tags:
81+
if "$" in tag:
82+
have_positional += 1
6883
else:
69-
print("Warning: Unknown language", sys.argv[i])
70-
num_errors = 0
71-
for file in sorted(files_to_check):
72-
print("Checking {}".format(file))
73-
num_errors += check_po_file("lang/po/" + file)
84+
have_nonpositional += 1
85+
86+
return have_positional and have_nonpositional
87+
88+
89+
def _make_pos_arg(tags):
90+
"""Changes arguments to positional ones."""
91+
92+
pos_tags = []
93+
94+
for i, tag in enumerate(tags, 1):
95+
if "$" in tag:
96+
pos_tags.append(tag)
97+
continue
98+
tag = f"%{i}${tag.split('%')[1]}"
99+
pos_tags.append(tag)
100+
101+
return sorted(pos_tags)
102+
103+
104+
def check_printf(entry):
105+
"""Checks whether formatting arguments %s/%1$d"""
106+
"""in the source string and in the translation match."""
107+
108+
# error messages
109+
msgs = []
110+
111+
# find all <tags> in the original string
112+
tags_msgid = PATTERN_PRINTF.findall(entry.msgid)
113+
# find all <tags> in the translated string
114+
if entry.msgstr:
115+
tags_msgstr = PATTERN_PRINTF.findall(entry.msgstr)
116+
# find all <tags> for the plural translated strings
117+
elif entry.msgstr_plural:
118+
tags_msgstr_pl = [PATTERN_PRINTF.findall(v)
119+
for v in entry.msgstr_plural.values()]
120+
tags_msgstr = tags_msgstr_pl[0]
121+
122+
# assume that all strings should have the same set of arguments
123+
for tag in tags_msgstr_pl:
124+
if tag != tags_msgstr:
125+
msgs.append(" * Plural strings have"
126+
" different sets of arguments;")
127+
break
128+
129+
# strings must not have %s and %1$s at the same time
130+
if _is_mixed(tags_msgid) or _is_mixed(tags_msgstr):
131+
msgs.append(" * Cannot mix positional and non-positional "
132+
"arguments in format string;")
133+
if msgs:
134+
return msgs
135+
136+
# if everything else is good
137+
if tags_msgid == tags_msgstr:
138+
return msgs
139+
# and if everything is ok when replacing with positional arguments
140+
tags_msgid_pos = _make_pos_arg(tags_msgid)
141+
if tags_msgid_pos == _make_pos_arg(tags_msgstr):
142+
return msgs
143+
144+
# check if the number of arguments matches
145+
if len(tags_msgid) == len(tags_msgstr):
146+
# if non-positional arguments are swapped
147+
# then suggest positional syntax
148+
if sorted(tags_msgid) == sorted(tags_msgstr):
149+
msgs.append(" * The types of arguments differ;\n"
150+
" Use positional syntax:"
151+
f" {_f(tags_msgid)} => {_f(tags_msgid_pos)};")
152+
# otherwise it's most likely just a typo
153+
else:
154+
msgs.append(" * The types of arguments differ:"
155+
f" {_f(tags_msgid)} and {_f(tags_msgstr)};")
156+
else:
157+
msgs.append(" * The number of arguments differ:"
158+
f" {_f(tags_msgid)} and {_f(tags_msgstr)};")
159+
160+
return msgs
161+
162+
163+
##########################
164+
# TAGS CHECK #
165+
##########################
166+
167+
168+
def check_tags(entry):
169+
"""Checks if <tags> in the source string match those in the translation."""
170+
171+
# error messages
172+
msgs = []
173+
174+
# find all <tags> in the original string
175+
tags_msgid = sorted(PATTERN_TAG.findall(entry.msgid))
176+
# find all <tags> in the translated string
177+
if entry.msgstr:
178+
tags_msgstr = sorted(PATTERN_TAG.findall(entry.msgstr))
179+
# find all <tags> for the plural translated strings
180+
elif entry.msgstr_plural:
181+
tags_msgstr_pl = [sorted(PATTERN_TAG.findall(v))
182+
for v in entry.msgstr_plural.values()]
183+
tags_msgstr = tags_msgstr_pl[0]
184+
185+
# assume that all strings should have the same set of tags
186+
for tag in tags_msgstr_pl:
187+
if tag != tags_msgstr:
188+
msgs.append(" * Plural strings have different sets of tags;")
189+
break
190+
191+
# if everything else is good
192+
if tags_msgid == tags_msgstr:
193+
return msgs
194+
195+
# remove common <tags>
196+
# both lists will contain only tags that are unique to them
197+
for tag in reversed(tags_msgid):
198+
if tag in reversed(tags_msgstr):
199+
tags_msgid.remove(tag)
200+
tags_msgstr.remove(tag)
201+
202+
# filter optional <tags>
203+
for tag in reversed(tags_msgid):
204+
if tag in TAGS_OPTIONAL:
205+
if CHECK_TAGS:
206+
msgs.append(f" * Missing OPTIONAL tag(s): {_f(tag)};")
207+
tags_msgid.remove(tag)
208+
209+
# gather error messages to the list and return it
210+
if tags_msgid:
211+
msgs.append(f" * Missing tag(s): {_f(tags_msgid)};")
212+
if tags_msgstr:
213+
msgs.append(f" * Bad/extra tag(s): {_f(tags_msgstr)};")
214+
return msgs
215+
216+
217+
##########################
218+
# MAIN FUNCTIONS #
219+
##########################
220+
221+
222+
def check_po_file(input_path):
223+
"""Loads a file and checks each line."""
224+
225+
print(f"## Checking {input_path}")
226+
po = polib.pofile(input_path)
227+
228+
error_list = []
229+
230+
for entry in po.translated_entries():
231+
232+
# skip known false-positive strings
233+
if entry.msgid in SKIP_STRINGS:
234+
continue
235+
236+
msgs = []
237+
if error := check_tags(entry):
238+
msgs += error
239+
if error := check_printf(entry):
240+
msgs += error
241+
if CHECK_ELLIPSES:
242+
if error := check_ellipses(entry):
243+
msgs += error
244+
245+
if msgs:
246+
msgs.append(f"```\n{entry}```")
247+
error_list.append("\n".join(msgs))
248+
249+
if error_list:
250+
print(f" > Total: {len(error_list)} warnings.")
251+
print("\n".join(error_list), end="")
252+
else:
253+
print(" * Everything looks fine.")
74254
print()
75-
exit(num_errors)
255+
256+
257+
def main(locale_list):
258+
"""Main function."""
259+
260+
if not pathlib.Path(".").cwd().match("Cataclysm-DDA/"):
261+
print("You must run the script from"
262+
" the root directory 'Cataclysm-DDA/'")
263+
return
264+
265+
if locale_list:
266+
for locale in locale_list:
267+
if f := PO_FILES.get(locale):
268+
check_po_file(f)
269+
else:
270+
print(f"## Can't find locale '{locale}'. Skip.")
271+
else:
272+
for f in PO_FILES.values():
273+
check_po_file(f)
274+
275+
276+
def _parse_args():
277+
argparser = argparse.ArgumentParser(
278+
description="Validates translation files.",
279+
epilog=f"Available locales: {', '.join(PO_FILES.keys())}")
280+
argparser.add_argument("locale", nargs="*",
281+
metavar="locale-name",
282+
help="locale or list of locales")
283+
argparser.add_argument("-c1", "--check-tags", action="store_true",
284+
help="also check optional tags")
285+
argparser.add_argument("-c2", "--check-ellipses", action="store_true",
286+
help="also check ellipses")
287+
return argparser.parse_args()
288+
289+
290+
if __name__ == "__main__":
291+
args = _parse_args()
292+
CHECK_TAGS = args.check_tags
293+
CHECK_ELLIPSES = args.check_ellipses
294+
main(args.locale)

0 commit comments

Comments
 (0)