1
- """
2
- Script to update the custom dictionary 'main.txt' with new words from a given .po file.
3
-
4
- The script scans a specified .po file, ignoring certain metadata lines (e.g., lines starting with "#:").
5
- It extracts all unique Greek and English words, compares them against the custom dictionary
6
- under the 'dictionaries/' directory (sibling to the 'scripts/' directory), and adds any new words in alphabetical order.
7
- """
8
-
9
1
import sys
10
2
import os
11
3
import re
12
4
13
5
def scan_and_update (file_path ):
14
6
"""
15
- Scan the given .po file, extract words, and update the main dictionary.
7
+ Scan the given .po file, extract words from msgstr blocks , and update the main dictionary.
16
8
17
9
If the dictionary does not exist, it creates a new one.
18
10
@@ -47,21 +39,15 @@ def scan_and_update(file_path):
47
39
print (f"Input file { file_path } not found." )
48
40
return 0
49
41
50
- # Regular expression to ignore metadata lines like #: reference/executionmodel.rst:145
51
- ignore_pattern = re .compile (r"^#:" )
52
-
53
42
# Regular expression to include accented Greek letters
54
- word_pattern = re .compile (r'\b[a-zA-Zα-ωά-ώΑ-ΩΆ-Ώ ]+\b' , re .UNICODE )
43
+ word_pattern = re .compile (r'\b[a-zA-Z\u03B1-\u03C9\u0386-\u03CE ]+\b' , re .UNICODE )
55
44
56
45
new_words = set ()
57
46
entry_buffer = []
58
47
collecting_msgstr = False
59
48
60
- # Step 4: Extract words from the .po file
49
+ # Step 4: Extract words only from msgstr blocks
61
50
for line in lines :
62
- if ignore_pattern .match (line ):
63
- continue # Ignore metadata lines
64
-
65
51
# Handle msgstr entries
66
52
if line .startswith ("msgstr" ):
67
53
collecting_msgstr = True
@@ -81,7 +67,6 @@ def scan_and_update(file_path):
81
67
entry_buffer = []
82
68
else :
83
69
# Continue collecting multiline msgstr
84
- # Remove surrounding quotes and append
85
70
entry_buffer .append (line .strip ().strip ('"' ))
86
71
87
72
# Handle any remaining buffered text after the loop
@@ -115,4 +100,4 @@ def scan_and_update(file_path):
115
100
print (f"The provided path '{ file_path } ' is not a valid file." )
116
101
sys .exit (1 )
117
102
# Process the input file and update the dictionary
118
- new_word_count = scan_and_update (file_path )
103
+ new_word_count = scan_and_update (file_path )
0 commit comments