1
1
"""
2
2
Script to check the spelling of one, many or all .po files based
3
3
on the custom dictionaries under the 'dictionaries/' directory.
4
+
5
+ Gives the option to print the detected errors and add new entries to the dictionary file.
6
+
7
+ Display information about usage with `python scripts/check_spelling.py --help`
4
8
"""
5
9
10
+ import argparse
11
+ import contextlib
12
+ import functools
6
13
import os
14
+ import multiprocessing
7
15
from pathlib import Path
8
16
import sys
9
17
import tempfile
10
18
11
19
import pospell
12
20
13
21
14
- def check_spell (po_files = None ):
22
+ def main ():
23
+ parser = create_parser ()
24
+ args = parser .parse_args ()
25
+
26
+ errors = check_spell (args .po_files )
27
+
28
+ if args .print_errors :
29
+ print_errors (errors )
30
+
31
+ if args .write_entries :
32
+ write_new_entries ({e [- 1 ] for e in errors })
33
+
34
+ sys .exit (0 if len (errors ) == 0 else - 1 )
35
+
36
+
37
+ def create_parser ():
15
38
"""
16
- Check spell in the given list of po_files and log the spell errors details.
39
+ Creates and configures the command line argument parser.
40
+
41
+ returns:
42
+ - argparse.ArgumentParser: the argument parser containing the passed arguments and flags.
43
+ """
44
+ parser = argparse .ArgumentParser (
45
+ usage = "python check_spelling.py [options]" ,
46
+ description = "spell-check translated .po files and add new entries to the dictionary if needed." ,
47
+ )
48
+
49
+ parser .add_argument (
50
+ "-p" ,
51
+ "--print-errors" ,
52
+ action = "store_true" ,
53
+ dest = "print_errors" ,
54
+ help = "print the detected errors of the spell-check" ,
55
+ )
56
+ parser .add_argument (
57
+ "-w" ,
58
+ "--write-entries" ,
59
+ action = "store_true" ,
60
+ dest = "write_entries" ,
61
+ help = "write the new detected entries in the dictionary file" ,
62
+ )
63
+ parser .add_argument (
64
+ "-f" ,
65
+ "--po-files" ,
66
+ dest = "po_files" ,
67
+ nargs = "*" ,
68
+ default = list (),
69
+ help = "list of .po files to spell-check, if not given checks all po files" ,
70
+ )
71
+
72
+ return parser
17
73
18
- If no po_files are given, check spell in all files.
74
+
75
+ def check_spell (po_files ):
76
+ """
77
+ Check spell in the given list of po_files.
19
78
20
79
args:
21
80
po_files: list of po_files paths.
22
81
23
82
returns:
24
- - int: spell errors count.
83
+ - list: list of tuples containing detected errors.
84
+ """
85
+ entries = read_dictionary_entries ()
86
+
87
+ with write_entries_to_tmp_file (entries ) as named_tmp_file :
25
88
89
+ # Run pospell either against all files or the files given on the command line
90
+ if len (po_files ) == 0 :
91
+ po_files = Path ("." ).glob ("*/*.po" )
92
+
93
+ detected_errors = detect_errors (po_files , named_tmp_file .name )
94
+ return detected_errors
95
+
96
+
97
+ def read_dictionary_entries ():
98
+ """
99
+ Read the entries in the dictionary files under `dictionaries` directory.
100
+
101
+ returns:
102
+ - set: a set of string entries
26
103
"""
27
- # Read custom dictionaries
28
104
entries = set ()
29
- for filename in Path ("dictionaries" ).glob ("*.txt" ):
105
+ dictionaries = Path ("dictionaries" ).glob ("*.txt" )
106
+
107
+ for filename in dictionaries :
30
108
with open (filename , "r" ) as f :
31
109
entries .update (
32
110
stripped_line
33
111
for stripped_line in (line .strip () for line in f .readlines ())
34
112
if stripped_line
35
113
)
36
114
37
- # Write merged dictionary file
115
+ return entries
116
+
117
+
118
+ @contextlib .contextmanager
119
+ def write_entries_to_tmp_file (entries ):
120
+ """
121
+ Write the given entries to a named temporary file and yield the file.
122
+
123
+ args:
124
+ entries: a set of entries (strings) to write to the temporary file.
125
+
126
+ returns:
127
+ - tempfile.NamedTemporaryFile: the temporary file with the given entries.
128
+ """
38
129
with tempfile .NamedTemporaryFile (suffix = "_merged_dict.txt" ) as named_tmp_file :
39
130
for e in entries :
40
131
named_tmp_file .write (f"{ e } \n " .encode ())
@@ -44,17 +135,97 @@ def check_spell(po_files=None):
44
135
45
136
named_tmp_file .seek (0 )
46
137
47
- # Run pospell either against all files or the file given on the command line
48
- if not po_files :
49
- po_files = Path ("." ).glob ("*/*.po" )
138
+ yield named_tmp_file
139
+
140
+
141
+ # Clone of pospell.spell_check tailored to current needs.
142
+ # source: https://git.afpy.org/AFPy/pospell/src/branch/main/pospell.py
143
+ def detect_errors (po_files , personal_dict ):
144
+ """
145
+ Check for spelling mistakes in the given po_files.
146
+
147
+ args:
148
+ po_files: list of strings or Path objects pointing to po files.
149
+ personal_dict: name of file containing dictionary entries.
150
+
151
+ returns:
152
+ - list: a list of tuples with the detected errors
153
+ """
154
+ # Pool.__exit__ calls terminate() instead of close(), we need the latter,
155
+ # which ensures the processes' atexit handlers execute fully, which in
156
+ # turn lets coverage write the sub-processes' coverage information
157
+ jobs = os .cpu_count ()
158
+ pool = multiprocessing .Pool (jobs )
50
159
51
- detected_errors = pospell .spell_check (
52
- po_files , personal_dict = named_tmp_file .name , language = "el_GR"
160
+ try :
161
+ input_lines = pospell .flatten (
162
+ pool .map (
163
+ functools .partial (pospell .po_to_text , drop_capitalized = False ),
164
+ po_files ,
165
+ )
53
166
)
54
- return detected_errors
167
+
168
+ if not input_lines :
169
+ return []
170
+
171
+ # Distribute input lines across workers
172
+ lines_per_job = (len (input_lines ) + jobs - 1 ) // jobs
173
+ chunked_inputs = [
174
+ input_lines [i : i + lines_per_job ]
175
+ for i in range (0 , len (input_lines ), lines_per_job )
176
+ ]
177
+ errors = pospell .flatten (
178
+ pool .map (
179
+ functools .partial (pospell .run_hunspell , "el_GR" , personal_dict ),
180
+ chunked_inputs ,
181
+ )
182
+ )
183
+ finally :
184
+ pool .close ()
185
+ pool .join ()
186
+
187
+ return errors
188
+
189
+
190
+ def print_errors (errors ):
191
+ """
192
+ Print the given errors with the following format:
193
+ filename:linenumber:word
194
+
195
+ args:
196
+ errors: list of tuples with detected errors.
197
+ """
198
+ if len (errors ) > 0 :
199
+ print ("\n Detected errors:" )
200
+
201
+ for error in errors :
202
+ print ("\t " + ":" .join (map (str , error )))
203
+
204
+
205
+ def write_new_entries (new_entries ):
206
+ """
207
+ Write the given entries to the dictionary file respecting the
208
+ alphabetical sorting.
209
+
210
+ args:
211
+ new_entries: set of entries (strings) to write to the dictionary file.
212
+ """
213
+ entries = read_dictionary_entries ()
214
+
215
+ entries .update (new_entries )
216
+ entries = list (entries )
217
+ entries .sort ()
218
+
219
+ with open (Path ("dictionaries" , "main.txt" ), "w" ) as file :
220
+ for e in entries :
221
+ file .write (e + "\n " )
222
+
223
+ if len (new_entries ) > 0 :
224
+ print ("\n Wrote the below new entries to main.txt:" )
225
+
226
+ for e in new_entries :
227
+ print (f"\t { e } " )
55
228
56
229
57
230
if __name__ == "__main__" :
58
- po_files = sys .argv [1 :]
59
- errors = check_spell (po_files )
60
- sys .exit (0 if errors == 0 else - 1 )
231
+ main ()
0 commit comments