Skip to content

Commit 9bd8b84

Browse files
not sure if fixed
1 parent 50538f7 commit 9bd8b84

File tree

1 file changed

+4
-19
lines changed

1 file changed

+4
-19
lines changed

scripts/add_to_dictionary.py

Lines changed: 4 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,10 @@
1-
"""
2-
Script to update the custom dictionary 'main.txt' with new words from a given .po file.
3-
4-
The script scans a specified .po file, ignoring certain metadata lines (e.g., lines starting with "#:").
5-
It extracts all unique Greek and English words, compares them against the custom dictionary
6-
under the 'dictionaries/' directory (sibling to the 'scripts/' directory), and adds any new words in alphabetical order.
7-
"""
8-
91
import sys
102
import os
113
import re
124

135
def scan_and_update(file_path):
146
"""
15-
Scan the given .po file, extract words, and update the main dictionary.
7+
Scan the given .po file, extract words from msgstr blocks, and update the main dictionary.
168
179
If the dictionary does not exist, it creates a new one.
1810
@@ -47,21 +39,15 @@ def scan_and_update(file_path):
4739
print(f"Input file {file_path} not found.")
4840
return 0
4941

50-
# Regular expression to ignore metadata lines like #: reference/executionmodel.rst:145
51-
ignore_pattern = re.compile(r"^#:")
52-
5342
# Regular expression to include accented Greek letters
54-
word_pattern = re.compile(r'\b[a-zA-Zα-ωά-ώΑ-ΩΆ-Ώ]+\b', re.UNICODE)
43+
word_pattern = re.compile(r'\b[a-zA-Z\u03B1-\u03C9\u0386-\u03CE]+\b', re.UNICODE)
5544

5645
new_words = set()
5746
entry_buffer = []
5847
collecting_msgstr = False
5948

60-
# Step 4: Extract words from the .po file
49+
# Step 4: Extract words only from msgstr blocks
6150
for line in lines:
62-
if ignore_pattern.match(line):
63-
continue # Ignore metadata lines
64-
6551
# Handle msgstr entries
6652
if line.startswith("msgstr"):
6753
collecting_msgstr = True
@@ -81,7 +67,6 @@ def scan_and_update(file_path):
8167
entry_buffer = []
8268
else:
8369
# Continue collecting multiline msgstr
84-
# Remove surrounding quotes and append
8570
entry_buffer.append(line.strip().strip('"'))
8671

8772
# Handle any remaining buffered text after the loop
@@ -115,4 +100,4 @@ def scan_and_update(file_path):
115100
print(f"The provided path '{file_path}' is not a valid file.")
116101
sys.exit(1)
117102
# Process the input file and update the dictionary
118-
new_word_count = scan_and_update(file_path)
103+
new_word_count = scan_and_update(file_path)

0 commit comments

Comments
 (0)