Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,9 @@ docker run -v /Users/username/translations:/input \
```bash
python -m pytest
```
```bash
docker run --rm -v $(pwd):/app -w /app --entrypoint python python-gpt-po -m pytest -v
```

## Documentation

Expand Down
1 change: 1 addition & 0 deletions python_gpt_po/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ def main():
model=model,
bulk_mode=args.bulk,
fuzzy=args.fuzzy,
fix_fuzzy=args.fix_fuzzy,
folder_language=args.folder_language
)

Expand Down
1 change: 1 addition & 0 deletions python_gpt_po/models/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,5 @@ class TranslationConfig:
model: str
bulk_mode: bool = False
fuzzy: bool = False
fix_fuzzy: bool = False
folder_language: bool = False
82 changes: 67 additions & 15 deletions python_gpt_po/services/translation_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,36 +336,38 @@ def scan_and_process_po_files(
logging.info("Discovered .po file: %s", po_file_path)

# Prepare the PO file, if it returns None then skip this file
po_file = self._prepare_po_file(po_file_path, languages)
if po_file is None:
po_file_result = self._prepare_po_file(po_file_path, languages)
if po_file_result is None:
logging.info("Skipping file %s due to language mismatch or other issues", po_file_path)
continue

# Process the file
self.process_po_file(po_file_path, languages, detail_languages)
# Process the file, passing the prepared po_file and file_lang
self.process_po_file(po_file_path, languages, detail_languages, po_file_result)

def process_po_file(
self,
po_file_path: str,
languages: List[str],
detail_languages: Optional[Dict[str, str]] = None
detail_languages: Optional[Dict[str, str]] = None,
po_file_result=None,
):
"""Processes a single .po file with translations."""
try:
po_file = self._prepare_po_file(po_file_path, languages)
if not po_file:
return
# Only prepare the po_file if not provided (for backward compatibility)
if po_file_result is None:
po_file_result = self._prepare_po_file(po_file_path, languages)
if po_file_result is None:
return

file_lang = self.po_file_handler.get_file_language(
po_file_path,
po_file,
languages,
self.config.folder_language
)
po_file, file_lang = po_file_result

# Get the detailed language name if available
detail_lang = detail_languages.get(file_lang) if detail_languages else None

if self.config.fix_fuzzy:
self.fix_fuzzy_entries(po_file, po_file_path, file_lang, detail_lang)
return

texts_to_translate = [entry.msgid for entry in po_file if not entry.msgstr.strip() and entry.msgid]
translations = self.get_translations(texts_to_translate, file_lang, po_file_path, detail_lang)

Expand All @@ -384,6 +386,9 @@ def process_po_file(
def _prepare_po_file(self, po_file_path: str, languages: List[str]):
"""Prepares the .po file for translation."""
if self.config.fuzzy:
logging.warning(
"Consider running with '--fix-fuzzy' to clean and update the fuzzy translations properly.",
)
self.po_file_handler.disable_fuzzy_translations(po_file_path)
po_file = polib.pofile(po_file_path)
file_lang = self.po_file_handler.get_file_language(
Expand All @@ -395,7 +400,7 @@ def _prepare_po_file(self, po_file_path: str, languages: List[str]):
if not file_lang:
logging.warning("Skipping .po file due to language mismatch: %s", po_file_path)
return None
return po_file
return po_file, file_lang

def get_translations(
self,
Expand Down Expand Up @@ -424,6 +429,22 @@ def _update_po_entries(
else:
self._handle_empty_translation(entry, target_language, detail_language)

def _update_fuzzy_po_entries(
self,
po_file,
translations: List[str],
entries_to_update: list
):
"""Update only fuzzy entries, remove 'fuzzy' flag, and log cleanly."""
for entry, translation in zip(entries_to_update, translations):
if translation.strip():
self.po_file_handler.update_po_entry(po_file, entry.msgid, translation)
if 'fuzzy' in entry.flags:
entry.flags.remove('fuzzy')
logging.info("Fixed fuzzy entry '%s' -> '%s'", entry.msgid, translation)
else:
logging.warning("Translation for fuzzy '%s' is still empty, leaving fuzzy.", entry.msgid)

def _handle_empty_translation(self, entry, target_language: str, detail_language: Optional[str] = None):
"""Handles cases where the initial translation is empty."""
logging.warning("Empty translation for '%s'. Attempting individual translation.", entry.msgid)
Expand Down Expand Up @@ -453,3 +474,34 @@ def _handle_untranslated_entries(self, po_file, target_language: str, detail_lan
)
else:
logging.error("Failed to translate '%s' after final attempt.", entry.msgid)

def fix_fuzzy_entries(
self,
po_file,
po_file_path: str,
target_language: str,
detail_language: Optional[str] = None,
):
"""Find and fix fuzzy entries in a PO file using AI translation."""
fuzzy_entries = [entry for entry in po_file if 'fuzzy' in entry.flags]

if not fuzzy_entries:
logging.info("No fuzzy entries found in %s", po_file_path)
return

logging.info("Found %d fuzzy entries to fix in %s", len(fuzzy_entries), po_file_path)

texts_to_translate = [entry.msgid for entry in fuzzy_entries]
translations = self.get_translations(texts_to_translate, target_language, po_file_path, detail_language)

self._update_fuzzy_po_entries(po_file, translations, entries_to_update=fuzzy_entries)

po_file.save(po_file_path)

self.po_file_handler.log_translation_status(
po_file_path,
texts_to_translate,
[entry.msgstr for entry in fuzzy_entries]
)

logging.info("Fuzzy fix completed for %s", po_file_path)
Empty file.
50 changes: 50 additions & 0 deletions python_gpt_po/tests/unit/test_translation_service_fuzzy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from unittest.mock import MagicMock, patch

import polib

from python_gpt_po.models.config import TranslationConfig
from python_gpt_po.models.enums import ModelProvider
from python_gpt_po.models.provider_clients import ProviderClients
from python_gpt_po.services.translation_service import TranslationService
from python_gpt_po.tests.test_multi_provider import SAMPLE_PO_CONTENT


def test_fix_fuzzy_entries_on_sample_po_content(tmp_path):
# Write SAMPLE_PO_CONTENT to a real temp file
po_file_path = tmp_path / "sample.po"
po_file_path.write_text(SAMPLE_PO_CONTENT, encoding="utf-8")

# Parse the file with polib
po_file = polib.pofile(str(po_file_path))

# Sanity check - confirm fuzzy is present
fuzzy_entries = [entry for entry in po_file if 'fuzzy' in entry.flags]
assert len(fuzzy_entries) == 1
assert fuzzy_entries[0].msgid == "This is a fuzzy translation"

# Setup dummy config
clients = ProviderClients()
config = TranslationConfig(
provider_clients=clients,
provider=ModelProvider.OPENAI,
model="gpt-4o",
bulk_mode=True,
fuzzy=False,
folder_language=False,
fix_fuzzy=True
)
service = TranslationService(config=config)

# Mock get_translations to return a valid translation
service.get_translations = MagicMock(return_value=["Ceci est une traduction correcte"])

# Patch save to avoid actual file I/O
with patch.object(po_file, 'save') as mock_save:
service.fix_fuzzy_entries(po_file, str(po_file_path), "fr")

# Validate that the entry was updated and fuzzy flag removed
assert fuzzy_entries[0].msgstr == "Ceci est une traduction correcte"
assert 'fuzzy' not in fuzzy_entries[0].flags

# Validate save was called
mock_save.assert_called_once_with(str(po_file_path))
16 changes: 11 additions & 5 deletions python_gpt_po/utils/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def parse_args():
provider_group = parser.add_argument_group('Provider Settings')
api_group = parser.add_argument_group('API Keys')
advanced_group = parser.add_argument_group('Advanced Options')
fuzzy_group = advanced_group.add_mutually_exclusive_group()

# Required arguments (not required if listing models)
required_group.add_argument(
Expand Down Expand Up @@ -135,11 +136,6 @@ def parse_args():
)

# Advanced options
advanced_group.add_argument(
"--fuzzy",
action="store_true",
help="Process fuzzy translations (remove fuzzy markers)"
)
advanced_group.add_argument(
"--bulk",
action="store_true",
Expand All @@ -152,6 +148,16 @@ def parse_args():
metavar="SIZE",
help="Number of strings to translate in each batch (default: 50)"
)
fuzzy_group.add_argument(
"--fuzzy",
action="store_true",
help="Remove fuzzy markers without translating (legacy behavior, risky)"
)
fuzzy_group.add_argument(
"--fix-fuzzy",
action="store_true",
help="Translate and clean fuzzy entries safely (recommended)"
)

# Version information
parser.add_argument(
Expand Down