Skip to content

Commit 9f7f123

Browse files
committed
Refactor check_spelling script to receive CLI arguments
1 parent 659f543 commit 9f7f123

File tree

1 file changed

+187
-16
lines changed

1 file changed

+187
-16
lines changed

scripts/check_spelling.py

Lines changed: 187 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,131 @@
11
"""
22
Script to check the spelling of one, many or all .po files based
33
on the custom dictionaries under the 'dictionaries/' directory.
4+
5+
Gives the option to print the detected errors and add new entries to the dictionary file.
6+
7+
Display information about usage with `python scripts/check_spelling.py --help`
48
"""
59

10+
import argparse
11+
import contextlib
12+
import functools
613
import os
14+
import multiprocessing
715
from pathlib import Path
816
import sys
917
import tempfile
1018

1119
import pospell
1220

1321

14-
def check_spell(po_files=None):
22+
def main():
23+
parser = create_parser()
24+
args = parser.parse_args()
25+
26+
errors = check_spell(args.po_files)
27+
28+
if args.print_errors:
29+
print_errors(errors)
30+
31+
if args.write_entries:
32+
write_new_entries({e[-1] for e in errors})
33+
34+
sys.exit(0 if len(errors) == 0 else -1)
35+
36+
37+
def create_parser():
1538
"""
16-
Check spell in the given list of po_files and log the spell errors details.
39+
Creates and configures the command line argument parser.
40+
41+
returns:
42+
- argparse.ArgumentParser: the argument parser containing the passed arguments and flags.
43+
"""
44+
parser = argparse.ArgumentParser(
45+
usage="python check_spelling.py [options]",
46+
description="spell-check translated .po files and add new entries to the dictionary if needed.",
47+
)
48+
49+
parser.add_argument(
50+
"-p",
51+
"--print-errors",
52+
action="store_true",
53+
dest="print_errors",
54+
help="print the detected errors of the spell-check",
55+
)
56+
parser.add_argument(
57+
"-w",
58+
"--write-entries",
59+
action="store_true",
60+
dest="write_entries",
61+
help="write the new detected entries in the dictionary file",
62+
)
63+
parser.add_argument(
64+
"-f",
65+
"--po-files",
66+
dest="po_files",
67+
nargs="*",
68+
default=list(),
69+
help="list of .po files to spell-check, if not given checks all po files",
70+
)
71+
72+
return parser
1773

18-
If no po_files are given, check spell in all files.
74+
75+
def check_spell(po_files):
76+
"""
77+
Check spell in the given list of po_files.
1978
2079
args:
2180
po_files: list of po_files paths.
2281
2382
returns:
24-
- int: spell errors count.
83+
- list: list of tuples containing detected errors.
84+
"""
85+
entries = read_dictionary_entries()
86+
87+
with write_entries_to_tmp_file(entries) as named_tmp_file:
2588

89+
# Run pospell either against all files or the files given on the command line
90+
if len(po_files) == 0:
91+
po_files = Path(".").glob("*/*.po")
92+
93+
detected_errors = detect_errors(po_files, named_tmp_file.name)
94+
return detected_errors
95+
96+
97+
def read_dictionary_entries():
98+
"""
99+
Read the entries in the dictionary files under `dictionaries` directory.
100+
101+
returns:
102+
- set: a set of string entries
26103
"""
27-
# Read custom dictionaries
28104
entries = set()
29-
for filename in Path("dictionaries").glob("*.txt"):
105+
dictionaries = Path("dictionaries").glob("*.txt")
106+
107+
for filename in dictionaries:
30108
with open(filename, "r") as f:
31109
entries.update(
32110
stripped_line
33111
for stripped_line in (line.strip() for line in f.readlines())
34112
if stripped_line
35113
)
36114

37-
# Write merged dictionary file
115+
return entries
116+
117+
118+
@contextlib.contextmanager
119+
def write_entries_to_tmp_file(entries):
120+
"""
121+
Write the given entries to a named temporary file and yield the file.
122+
123+
args:
124+
entries: a set of entries (strings) to write to the temporary file.
125+
126+
returns:
127+
- tempfile.NamedTemporaryFile: the temporary file with the given entries.
128+
"""
38129
with tempfile.NamedTemporaryFile(suffix="_merged_dict.txt") as named_tmp_file:
39130
for e in entries:
40131
named_tmp_file.write(f"{e}\n".encode())
@@ -44,17 +135,97 @@ def check_spell(po_files=None):
44135

45136
named_tmp_file.seek(0)
46137

47-
# Run pospell either against all files or the file given on the command line
48-
if not po_files:
49-
po_files = Path(".").glob("*/*.po")
138+
yield named_tmp_file
139+
140+
141+
# Clone of pospell.spell_check tailored to current needs.
142+
# source: https://git.afpy.org/AFPy/pospell/src/branch/main/pospell.py
143+
def detect_errors(po_files, personal_dict):
144+
"""
145+
Check for spelling mistakes in the given po_files.
146+
147+
args:
148+
po_files: list of strings or Path objects pointing to po files.
149+
personal_dict: name of file containing dictionary entries.
150+
151+
returns:
152+
- list: a list of tuples with the detected errors
153+
"""
154+
# Pool.__exit__ calls terminate() instead of close(), we need the latter,
155+
# which ensures the processes' atexit handlers execute fully, which in
156+
# turn lets coverage write the sub-processes' coverage information
157+
jobs = os.cpu_count()
158+
pool = multiprocessing.Pool(jobs)
50159

51-
detected_errors = pospell.spell_check(
52-
po_files, personal_dict=named_tmp_file.name, language="el_GR"
160+
try:
161+
input_lines = pospell.flatten(
162+
pool.map(
163+
functools.partial(pospell.po_to_text, drop_capitalized=False),
164+
po_files,
165+
)
53166
)
54-
return detected_errors
167+
168+
if not input_lines:
169+
return []
170+
171+
# Distribute input lines across workers
172+
lines_per_job = (len(input_lines) + jobs - 1) // jobs
173+
chunked_inputs = [
174+
input_lines[i : i + lines_per_job]
175+
for i in range(0, len(input_lines), lines_per_job)
176+
]
177+
errors = pospell.flatten(
178+
pool.map(
179+
functools.partial(pospell.run_hunspell, "el_GR", personal_dict),
180+
chunked_inputs,
181+
)
182+
)
183+
finally:
184+
pool.close()
185+
pool.join()
186+
187+
return errors
188+
189+
190+
def print_errors(errors):
191+
"""
192+
Print the given errors with the following format:
193+
filename:linenumber:word
194+
195+
args:
196+
errors: list of tuples with detected errors.
197+
"""
198+
if len(errors) > 0:
199+
print("\nDetected errors:")
200+
201+
for error in errors:
202+
print("\t" + ":".join(map(str, error)))
203+
204+
205+
def write_new_entries(new_entries):
206+
"""
207+
Write the given entries to the dictionary file respecting the
208+
alphabetical sorting.
209+
210+
args:
211+
new_entries: set of entries (strings) to write to the dictionary file.
212+
"""
213+
entries = read_dictionary_entries()
214+
215+
entries.update(new_entries)
216+
entries = list(entries)
217+
entries.sort()
218+
219+
with open(Path("dictionaries", "main.txt"), "w") as file:
220+
for e in entries:
221+
file.write(e+"\n")
222+
223+
if len(new_entries) > 0:
224+
print("\nWrote the below new entries to main.txt:")
225+
226+
for e in new_entries:
227+
print(f"\t {e}")
55228

56229

57230
if __name__ == "__main__":
58-
po_files = sys.argv[1:]
59-
errors = check_spell(po_files)
60-
sys.exit(0 if errors == 0 else -1)
231+
main()

0 commit comments

Comments
 (0)