diff --git a/README.md b/README.md index ef5e341..53618c5 100644 --- a/README.md +++ b/README.md @@ -179,6 +179,9 @@ docker run -v /Users/username/translations:/input \ ```bash python -m pytest ``` +```bash +docker run --rm -v $(pwd):/app -w /app --entrypoint python python-gpt-po -m pytest -v +``` ## Documentation diff --git a/python_gpt_po/main.py b/python_gpt_po/main.py index 95be454..7f45d49 100644 --- a/python_gpt_po/main.py +++ b/python_gpt_po/main.py @@ -184,6 +184,7 @@ def main(): model=model, bulk_mode=args.bulk, fuzzy=args.fuzzy, + fix_fuzzy=args.fix_fuzzy, folder_language=args.folder_language ) diff --git a/python_gpt_po/models/config.py b/python_gpt_po/models/config.py index 17f5398..9022544 100644 --- a/python_gpt_po/models/config.py +++ b/python_gpt_po/models/config.py @@ -16,4 +16,5 @@ class TranslationConfig: model: str bulk_mode: bool = False fuzzy: bool = False + fix_fuzzy: bool = False folder_language: bool = False diff --git a/python_gpt_po/services/translation_service.py b/python_gpt_po/services/translation_service.py index ab3d228..9c265f1 100644 --- a/python_gpt_po/services/translation_service.py +++ b/python_gpt_po/services/translation_service.py @@ -336,36 +336,38 @@ def scan_and_process_po_files( logging.info("Discovered .po file: %s", po_file_path) # Prepare the PO file, if it returns None then skip this file - po_file = self._prepare_po_file(po_file_path, languages) - if po_file is None: + po_file_result = self._prepare_po_file(po_file_path, languages) + if po_file_result is None: logging.info("Skipping file %s due to language mismatch or other issues", po_file_path) continue - # Process the file - self.process_po_file(po_file_path, languages, detail_languages) + # Process the file, passing the prepared po_file and file_lang + self.process_po_file(po_file_path, languages, detail_languages, po_file_result) def process_po_file( self, po_file_path: str, languages: List[str], - detail_languages: Optional[Dict[str, str]] = None + detail_languages: Optional[Dict[str, str]] = None, + po_file_result=None, ): """Processes a single .po file with translations.""" try: - po_file = self._prepare_po_file(po_file_path, languages) - if not po_file: - return + # Only prepare the po_file if not provided (for backward compatibility) + if po_file_result is None: + po_file_result = self._prepare_po_file(po_file_path, languages) + if po_file_result is None: + return - file_lang = self.po_file_handler.get_file_language( - po_file_path, - po_file, - languages, - self.config.folder_language - ) + po_file, file_lang = po_file_result # Get the detailed language name if available detail_lang = detail_languages.get(file_lang) if detail_languages else None + if self.config.fix_fuzzy: + self.fix_fuzzy_entries(po_file, po_file_path, file_lang, detail_lang) + return + texts_to_translate = [entry.msgid for entry in po_file if not entry.msgstr.strip() and entry.msgid] translations = self.get_translations(texts_to_translate, file_lang, po_file_path, detail_lang) @@ -384,6 +386,9 @@ def process_po_file( def _prepare_po_file(self, po_file_path: str, languages: List[str]): """Prepares the .po file for translation.""" if self.config.fuzzy: + logging.warning( + "Consider running with '--fix-fuzzy' to clean and update the fuzzy translations properly.", + ) self.po_file_handler.disable_fuzzy_translations(po_file_path) po_file = polib.pofile(po_file_path) file_lang = self.po_file_handler.get_file_language( @@ -395,7 +400,7 @@ def _prepare_po_file(self, po_file_path: str, languages: List[str]): if not file_lang: logging.warning("Skipping .po file due to language mismatch: %s", po_file_path) return None - return po_file + return po_file, file_lang def get_translations( self, @@ -424,6 +429,22 @@ def _update_po_entries( else: self._handle_empty_translation(entry, target_language, detail_language) + def _update_fuzzy_po_entries( + self, + po_file, + translations: List[str], + entries_to_update: list + ): + """Update only fuzzy entries, remove 'fuzzy' flag, and log cleanly.""" + for entry, translation in zip(entries_to_update, translations): + if translation.strip(): + self.po_file_handler.update_po_entry(po_file, entry.msgid, translation) + if 'fuzzy' in entry.flags: + entry.flags.remove('fuzzy') + logging.info("Fixed fuzzy entry '%s' -> '%s'", entry.msgid, translation) + else: + logging.warning("Translation for fuzzy '%s' is still empty, leaving fuzzy.", entry.msgid) + def _handle_empty_translation(self, entry, target_language: str, detail_language: Optional[str] = None): """Handles cases where the initial translation is empty.""" logging.warning("Empty translation for '%s'. Attempting individual translation.", entry.msgid) @@ -453,3 +474,34 @@ def _handle_untranslated_entries(self, po_file, target_language: str, detail_lan ) else: logging.error("Failed to translate '%s' after final attempt.", entry.msgid) + + def fix_fuzzy_entries( + self, + po_file, + po_file_path: str, + target_language: str, + detail_language: Optional[str] = None, + ): + """Find and fix fuzzy entries in a PO file using AI translation.""" + fuzzy_entries = [entry for entry in po_file if 'fuzzy' in entry.flags] + + if not fuzzy_entries: + logging.info("No fuzzy entries found in %s", po_file_path) + return + + logging.info("Found %d fuzzy entries to fix in %s", len(fuzzy_entries), po_file_path) + + texts_to_translate = [entry.msgid for entry in fuzzy_entries] + translations = self.get_translations(texts_to_translate, target_language, po_file_path, detail_language) + + self._update_fuzzy_po_entries(po_file, translations, entries_to_update=fuzzy_entries) + + po_file.save(po_file_path) + + self.po_file_handler.log_translation_status( + po_file_path, + texts_to_translate, + [entry.msgstr for entry in fuzzy_entries] + ) + + logging.info("Fuzzy fix completed for %s", po_file_path) diff --git a/python_gpt_po/tests/unit/__init__.py b/python_gpt_po/tests/unit/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python_gpt_po/tests/unit/test_translation_service_fuzzy.py b/python_gpt_po/tests/unit/test_translation_service_fuzzy.py new file mode 100644 index 0000000..af937ea --- /dev/null +++ b/python_gpt_po/tests/unit/test_translation_service_fuzzy.py @@ -0,0 +1,50 @@ +from unittest.mock import MagicMock, patch + +import polib + +from python_gpt_po.models.config import TranslationConfig +from python_gpt_po.models.enums import ModelProvider +from python_gpt_po.models.provider_clients import ProviderClients +from python_gpt_po.services.translation_service import TranslationService +from python_gpt_po.tests.test_multi_provider import SAMPLE_PO_CONTENT + + +def test_fix_fuzzy_entries_on_sample_po_content(tmp_path): + # Write SAMPLE_PO_CONTENT to a real temp file + po_file_path = tmp_path / "sample.po" + po_file_path.write_text(SAMPLE_PO_CONTENT, encoding="utf-8") + + # Parse the file with polib + po_file = polib.pofile(str(po_file_path)) + + # Sanity check - confirm fuzzy is present + fuzzy_entries = [entry for entry in po_file if 'fuzzy' in entry.flags] + assert len(fuzzy_entries) == 1 + assert fuzzy_entries[0].msgid == "This is a fuzzy translation" + + # Setup dummy config + clients = ProviderClients() + config = TranslationConfig( + provider_clients=clients, + provider=ModelProvider.OPENAI, + model="gpt-4o", + bulk_mode=True, + fuzzy=False, + folder_language=False, + fix_fuzzy=True + ) + service = TranslationService(config=config) + + # Mock get_translations to return a valid translation + service.get_translations = MagicMock(return_value=["Ceci est une traduction correcte"]) + + # Patch save to avoid actual file I/O + with patch.object(po_file, 'save') as mock_save: + service.fix_fuzzy_entries(po_file, str(po_file_path), "fr") + + # Validate that the entry was updated and fuzzy flag removed + assert fuzzy_entries[0].msgstr == "Ceci est une traduction correcte" + assert 'fuzzy' not in fuzzy_entries[0].flags + + # Validate save was called + mock_save.assert_called_once_with(str(po_file_path)) diff --git a/python_gpt_po/utils/cli.py b/python_gpt_po/utils/cli.py index 0e145d8..e105afd 100644 --- a/python_gpt_po/utils/cli.py +++ b/python_gpt_po/utils/cli.py @@ -68,6 +68,7 @@ def parse_args(): provider_group = parser.add_argument_group('Provider Settings') api_group = parser.add_argument_group('API Keys') advanced_group = parser.add_argument_group('Advanced Options') + fuzzy_group = advanced_group.add_mutually_exclusive_group() # Required arguments (not required if listing models) required_group.add_argument( @@ -135,11 +136,6 @@ def parse_args(): ) # Advanced options - advanced_group.add_argument( - "--fuzzy", - action="store_true", - help="Process fuzzy translations (remove fuzzy markers)" - ) advanced_group.add_argument( "--bulk", action="store_true", @@ -152,6 +148,16 @@ def parse_args(): metavar="SIZE", help="Number of strings to translate in each batch (default: 50)" ) + fuzzy_group.add_argument( + "--fuzzy", + action="store_true", + help="Remove fuzzy markers without translating (legacy behavior, risky)" + ) + fuzzy_group.add_argument( + "--fix-fuzzy", + action="store_true", + help="Translate and clean fuzzy entries safely (recommended)" + ) # Version information parser.add_argument(