-
Notifications
You must be signed in to change notification settings - Fork 3
Create check_langs_in_po.py #167
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Draft
rffontenelle
wants to merge
13
commits into
main
Choose a base branch
from
check-langs
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Draft
Changes from 6 commits
Commits
Show all changes
13 commits
Select commit
Hold shift + click to select a range
f7349d8
Create check_langs_in_po.py
rffontenelle 9b03e62
Add Ukrainian, fix lint issues, reorganize patterns parsing
rffontenelle 528a900
Remove leftovers of unnecessary plural-forms handling
rffontenelle fd1d74c
Lint
rffontenelle 098d61d
Lint
rffontenelle 6f07e31
Add ignore words
rffontenelle cc01eda
Add delete command
rffontenelle c44b84e
lint
rffontenelle 85a0d54
Replace hardcoded pattern with pyfranc and iso639 libs
rffontenelle 2b2020e
lint
rffontenelle 81275ea
Lint
rffontenelle fd59687
lint
rffontenelle 30c28b5
Lint
rffontenelle File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
#!/usr/bin/env python3 | ||
""" | ||
Check .po files for presence of specific language patterns in translated strings. | ||
Languages currently checked: Russian, Polish, Ukrainian. | ||
""" | ||
import argparse | ||
import re | ||
import polib | ||
from pathlib import Path | ||
|
||
# Character patterns | ||
RUSSIAN = r"\u0400-\u04FF" # Full Cyrillic block | ||
POLISH = r"ĄĆĘŁŃŚŹŻąćęłńśźż" | ||
UKRAINIAN = r"ҐЄІЇґєії" | ||
|
||
# Words to ignore if found in msgstr | ||
IGNORE_WORDS = [ | ||
"Charles-François", | ||
"Gruszczyński", | ||
"Jędrzejewski-Szmek", | ||
"Kołodziej", | ||
"Коренберг Марк", | ||
"Łukasz", | ||
"Łapkiewicz", | ||
"Марк Коренберг", | ||
"Michał", | ||
"Ożarowski", | ||
"Sławecki", | ||
"Stanisław", | ||
"Tvrtković", | ||
"Wołodźko", | ||
"Є", | ||
] | ||
|
||
|
||
def build_pattern(enable_russian=True, enable_polish=True, enable_ukrainian=True): | ||
""" | ||
Build a compiled regex pattern for the selected languages. | ||
""" | ||
parts = [] | ||
if enable_russian: | ||
parts.append(RUSSIAN) | ||
if enable_polish: | ||
parts.append(POLISH) | ||
if enable_ukrainian: | ||
parts.append(UKRAINIAN) | ||
if not parts: | ||
return None | ||
return re.compile(f"[{''.join(parts)}]") | ||
|
||
|
||
def should_ignore(text): | ||
""" | ||
Return True if the text contains any of the ignore words. | ||
""" | ||
for word in IGNORE_WORDS: | ||
if word in text: | ||
return True | ||
return False | ||
|
||
|
||
def find_matches_in_po(po_path, pattern): | ||
""" | ||
Search for matches in translated strings of a PO file. | ||
Skips entries with empty translations or containing ignored words. | ||
""" | ||
matches = [] | ||
if not pattern: | ||
return matches | ||
|
||
po = polib.pofile(po_path) | ||
for entry in po: | ||
# Skip if there is no translation at all | ||
if not entry.msgstr.strip(): | ||
continue | ||
|
||
if should_ignore(entry.msgstr): | ||
continue | ||
|
||
texts = [entry.msgstr] | ||
|
||
for text in texts: | ||
if text and pattern.search(text): | ||
matches.append((po_path, entry.linenum, text)) | ||
break # avoid multiple reports for the same entry | ||
return matches | ||
|
||
|
||
def main(): | ||
parser = argparse.ArgumentParser(description=__doc__) | ||
parser.add_argument( | ||
"paths", nargs="+", help="One or more PO files or directories to search" | ||
) | ||
parser.add_argument( | ||
"--no-russian", action="store_true", help="Disable Russian pattern checking." | ||
) | ||
parser.add_argument( | ||
"--no-polish", action="store_true", help="Disable Polish pattern checking." | ||
) | ||
parser.add_argument( | ||
"--no-ukrainian", | ||
action="store_true", | ||
help="Disable Ukrainian pattern checking." | ||
) | ||
|
||
args = parser.parse_args() | ||
|
||
pattern = build_pattern( | ||
enable_russian=not args.no_russian, | ||
enable_polish=not args.no_polish, | ||
enable_ukrainian=not args.no_ukrainian, | ||
) | ||
|
||
if not pattern: | ||
parser.error("All checks are disabled. Enable at least one language pattern.") | ||
|
||
paths = [] | ||
for arg in args.paths: | ||
p = Path(arg) | ||
if p.is_dir(): | ||
paths.extend(p.rglob("*.po")) | ||
elif p.is_file(): | ||
paths.append(p) | ||
else: | ||
print(f"Warning: {p} not found.") | ||
|
||
for path in paths: | ||
for po_path, linenum, text in find_matches_in_po(path, pattern): | ||
print(f"{po_path}:{linenum}: {text}") | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.