diff --git a/.gitignore b/.gitignore index 90e7d35..62eadbf 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,5 @@ __pycache__ build/ db.sqlite3 .venv -python_gpt_po/version.py +python_gpt_po/_version.py +CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md deleted file mode 100644 index 71532e5..0000000 --- a/CLAUDE.md +++ /dev/null @@ -1,35 +0,0 @@ -# Development Guidelines for python-gpt-po - -## Commands - -- Install: `pip install -e .` -- Run: `python -m python_gpt_po.main --folder --lang ` -- Test: `python -m pytest` -- Test Single: `python -m pytest python_gpt_po/tests/path/to/test.py::test_function_name -v` -- Test Integration: `python -m pytest -m integration` -- Lint: `flake8` -- Type Check: `pylint python_gpt_po/` - -## Docker Commands - -- Build Image: `docker build -t gpt-po-translator .` -- Pull Image: `docker pull ghcr.io/pescheckit/python-gpt-po:latest` -- Run with Tag: `docker run -v $(pwd):/data ghcr.io/pescheckit/python-gpt-po:latest --folder /data --lang fr` -- Run Container: - - Current Dir: `docker run -v $(pwd):/data -e OPENAI_API_KEY= gpt-po-translator --folder /data --lang ` - - Absolute Path: `docker run -v /absolute/path/to/files:/custom/path -e OPENAI_API_KEY= gpt-po-translator --folder /custom/path --lang ` - - Windows Path: `docker run -v D:/projects/locales:/locales -e OPENAI_API_KEY= gpt-po-translator --folder /locales --lang ` - - Multiple Volumes: `docker run -v /source:/input -v /output:/output -e OPENAI_API_KEY= gpt-po-translator --folder /input --lang ` - -## Code Style - -- Line Length: 120 characters max -- Docstrings: Required for all modules, classes, and functions (Google style) -- Imports: Group standard lib, third-party, and local imports (sorted alphabetically) -- Typing: Use type hints for all function parameters and return values -- Naming: snake_case for variables/functions, PascalCase for classes, UPPER_CASE for constants -- Error Handling: Specific exceptions with descriptive messages -- Logging: Use the logging module, not print statements -- Tests: Unit tests required with descriptive names, mocks for external services -- Use dataclasses for configuration objects -- Follow PEP 8 with the exceptions noted in .flake8 and .pylintrc \ No newline at end of file diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh index f1af495..6e0c7a3 100644 --- a/docker-entrypoint.sh +++ b/docker-entrypoint.sh @@ -3,7 +3,7 @@ set -e # Display help information if no arguments are provided if [ $# -eq 0 ]; then - VERSION=$(gpt-po-translator --version | cut -d' ' -f2) + VERSION=$(python -m python_gpt_po.main --version) echo "GPT PO Translator Docker Container v$VERSION" echo "===========================================" echo @@ -33,15 +33,15 @@ fi # Check if we need to display version if [ "$1" = "--version" ]; then - gpt-po-translator --version + python -m python_gpt_po.main --version exit 0 fi # Check if we need to display help if [ "$1" = "--help" ] || [ "$1" = "-h" ]; then - gpt-po-translator --help + python -m python_gpt_po.main --help exit 0 fi # Execute command with args -exec gpt-po-translator "$@" \ No newline at end of file +exec python -m python_gpt_po.main "$@" diff --git a/pyproject.toml b/pyproject.toml index f8161f6..f6a088a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,13 +1,17 @@ [build-system] -requires = ["setuptools>=42", "wheel", "setuptools_scm[toml]==8.1.0"] +requires = ["setuptools>=61.0", "wheel", "setuptools_scm>=6.2"] build-backend = "setuptools.build_meta" +[tool.setuptools] +# Use find with namespaces=True to handle nested packages +packages = { find = { exclude = ["*.__pycache__", "*.__pycache__.*"], namespaces = true } } + [tool.setuptools_scm] fallback_version = "0.1.0" write_to = "python_gpt_po/_version.py" [project] -name = "gpt_po_translator" +name = "gpt-po-translator" dynamic = ["version"] authors = [ {name = "Bram Mittendorff", email = "bram@pescheck.io"}, @@ -36,7 +40,6 @@ classifiers = [ "Topic :: Software Development :: Localization", "Topic :: Text Processing :: Linguistic", "Operating System :: OS Independent", - "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", @@ -57,4 +60,4 @@ gpt-po-translator = "python_gpt_po.main:main" max-line-length = 120 [tool.isort] -line_length = 120 \ No newline at end of file +line_length = 120 diff --git a/python_gpt_po/__init__.py b/python_gpt_po/__init__.py index 35f619e..0d37a4a 100644 --- a/python_gpt_po/__init__.py +++ b/python_gpt_po/__init__.py @@ -4,7 +4,52 @@ with support for multiple AI providers including OpenAI and Anthropic. """ -try: - from ._version import version as __version__ -except ImportError: - __version__ = "0.1.0" +import os +import subprocess +from typing import Optional + + +def _get_version_from_git() -> Optional[str]: + """ + Try to get version from git. + + Returns: + Optional[str]: Git version or None if not available + """ + try: + # Check if we're in a git repo + is_git_repo = subprocess.run( + ["git", "rev-parse", "--is-inside-work-tree"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + check=False + ).returncode == 0 + if is_git_repo: + # Get version from git describe + return subprocess.check_output( + ["git", "describe", "--tags"], + stderr=subprocess.STDOUT, + text=True + ).strip() + except (subprocess.SubprocessError, FileNotFoundError): + pass + return None + + +# Version priority: +# 1. Environment variable PACKAGE_VERSION (for Docker/CI environments) +# 2. _version.py from setuptools_scm (for installed packages) +# 3. Git describe (for development environments) +# 4. Fallback to "0.1.0" +if 'PACKAGE_VERSION' in os.environ: + __version__ = os.environ.get('PACKAGE_VERSION') +else: + try: + from ._version import version as __version__ # noqa + except ImportError: + git_version = _get_version_from_git() + if git_version: + __version__ = git_version + else: + __version__ = "0.1.0" diff --git a/python_gpt_po/main.py b/python_gpt_po/main.py index be592bc..95be454 100644 --- a/python_gpt_po/main.py +++ b/python_gpt_po/main.py @@ -33,13 +33,13 @@ def setup_logging(): def initialize_provider(args) -> tuple[ProviderClients, ModelProvider, str]: """ Initialize the provider client and determine the appropriate model. - + Args: args: Command line arguments from argparse - + Returns: tuple: (provider_clients, provider, model) - + Raises: SystemExit: If no valid provider can be found or initialized """ @@ -87,13 +87,13 @@ def get_appropriate_model( ) -> str: """ Get the appropriate model for the provider. - + Args: provider (ModelProvider): The selected provider provider_clients (ProviderClients): The initialized provider clients model_manager (ModelManager): The model manager instance requested_model (Optional[str]): Model requested by the user - + Returns: str: The appropriate model ID """ @@ -125,7 +125,7 @@ def process_translations(config: TranslationConfig, folder: str, batch_size: int): """ Process translations for the given languages and directory. - + Args: config (TranslationConfig): The translation configuration folder (str): Directory containing .po files diff --git a/python_gpt_po/models/provider_clients.py b/python_gpt_po/models/provider_clients.py index b4784f9..11e9cfe 100644 --- a/python_gpt_po/models/provider_clients.py +++ b/python_gpt_po/models/provider_clients.py @@ -19,7 +19,7 @@ def __init__(self): def initialize_clients(self, api_keys: Dict[str, str]): """Initialize API clients for all providers with available keys. - + Args: api_keys (Dict[str, str]): Dictionary of provider names to API keys """ diff --git a/python_gpt_po/services/model_manager.py b/python_gpt_po/services/model_manager.py index 0042528..3dd8151 100644 --- a/python_gpt_po/services/model_manager.py +++ b/python_gpt_po/services/model_manager.py @@ -81,12 +81,12 @@ def validate_model(provider_clients: ProviderClients, provider: ModelProvider, m """ Validates whether the specified model is available for the given provider. Uses prefix matching so that a shorthand (e.g. "claude") will match a full model name. - + Args: provider_clients (ProviderClients): The initialized provider clients provider (ModelProvider): The provider to check against model (str): The model name/ID to validate - + Returns: bool: True if the model is valid, False otherwise """ @@ -99,10 +99,10 @@ def validate_model(provider_clients: ProviderClients, provider: ModelProvider, m def get_default_model(provider: ModelProvider) -> str: """ Returns the default model for a given provider. - + Args: provider (ModelProvider): The provider to get the default model for - + Returns: str: The default model ID """ @@ -128,7 +128,7 @@ def verify_model_capabilities( provider (ModelProvider): The provider to check against model (str): The model to verify required_capability (str): The capability to check for - + Returns: bool: True if the model has the required capability, False otherwise """ @@ -147,12 +147,12 @@ def suggest_model(provider_clients: ProviderClients, provider: ModelProvider, task: str = "translation") -> str: """ Suggests the best model for a given task and provider. - + Args: provider_clients (ProviderClients): The initialized provider clients provider (ModelProvider): The provider to use task (str): The task the model will be used for - + Returns: str: The suggested model ID """ diff --git a/python_gpt_po/services/po_file_handler.py b/python_gpt_po/services/po_file_handler.py index 0cf27b7..878841e 100644 --- a/python_gpt_po/services/po_file_handler.py +++ b/python_gpt_po/services/po_file_handler.py @@ -48,13 +48,13 @@ def disable_fuzzy_translations(po_file_path): @staticmethod def get_file_language(po_file_path, po_file, languages, folder_language): """Determines the language for a .po file. - + Args: po_file_path (str): Path to the .po file po_file (polib.POFile): Loaded PO file object languages (List[str]): List of valid language codes folder_language (bool): Whether to infer language from folder structure - + Returns: str or None: The normalized language code or None if not found """ @@ -76,10 +76,10 @@ def get_file_language(po_file_path, po_file, languages, folder_language): @staticmethod def normalize_language_code(lang): """Convert language name or code to ISO 639-1 code. - + Args: lang (str): Language name or code to normalize - + Returns: str or None: The normalized ISO 639-1 language code or None if not found """ @@ -109,7 +109,7 @@ def normalize_language_code(lang): @staticmethod def log_translation_status(po_file_path, original_texts, translations): """Logs the status of translations for a .po file. - + Args: po_file_path (str): Path to the .po file original_texts (List[str]): List of original texts to translate @@ -133,7 +133,7 @@ def log_translation_status(po_file_path, original_texts, translations): @staticmethod def update_po_entry(po_file, original_text, translated_text): """Updates a .po file entry with the translated text. - + Args: po_file (polib.POFile): The PO file object original_text (str): The original text to find @@ -149,10 +149,10 @@ def update_po_entry(po_file, original_text, translated_text): @staticmethod def read_po_file(po_file_path): """Reads a .po file and returns the PO file object. - + Args: po_file_path (str): Path to the .po file - + Returns: polib.POFile: The loaded PO file object """ @@ -165,11 +165,11 @@ def read_po_file(po_file_path): @staticmethod def save_po_file(po_file, po_file_path): """Saves changes to a .po file. - + Args: po_file (polib.POFile): The PO file object to save po_file_path (str): Path where the file should be saved - + Returns: bool: True if successful, False otherwise """ @@ -184,10 +184,10 @@ def save_po_file(po_file, po_file_path): @staticmethod def get_untranslated_entries(po_file): """Gets all untranslated entries from a PO file. - + Args: po_file (polib.POFile): The PO file object - + Returns: List[polib.POEntry]: List of untranslated entries """ @@ -196,10 +196,10 @@ def get_untranslated_entries(po_file): @staticmethod def extract_metadata(po_file): """Extracts and returns metadata from a PO file. - + Args: po_file (polib.POFile): The PO file object - + Returns: dict: Dictionary containing metadata """ diff --git a/python_gpt_po/services/translation_service.py b/python_gpt_po/services/translation_service.py index 483c04d..ab3d228 100644 --- a/python_gpt_po/services/translation_service.py +++ b/python_gpt_po/services/translation_service.py @@ -24,7 +24,7 @@ class TranslationService: def __init__(self, config: TranslationConfig, batch_size: int = 40): """Initialize the translation service. - + Args: config (TranslationConfig): Configuration for the translation service batch_size (int): Size of batches for bulk translation diff --git a/python_gpt_po/tests/integration/test_real_po_files.py b/python_gpt_po/tests/integration/test_real_po_files.py index a5bd176..f2de07c 100644 --- a/python_gpt_po/tests/integration/test_real_po_files.py +++ b/python_gpt_po/tests/integration/test_real_po_files.py @@ -37,7 +37,7 @@ def download_real_po_files(): """Download real PO files for testing.""" # Create directory if it doesn't exist TEST_DATA_DIR.mkdir(exist_ok=True) - + # Download each PO file for name, url in REAL_PO_FILES.items(): file_path = TEST_DATA_DIR / f"{name}.po" @@ -48,9 +48,9 @@ def download_real_po_files(): logging.info(f"Downloaded {name} PO file to {file_path}") except Exception as e: logging.error(f"Failed to download {name} PO file: {e}") - + yield - + # Cleanup is optional - keep files for inspection if needed # If you want to clean up, uncomment the following: # for file_path in TEST_DATA_DIR.glob("*.po"): @@ -65,24 +65,24 @@ def modified_po_file(): source_path = TEST_DATA_DIR / "django_admin.po" if not source_path.exists(): pytest.skip("Django admin PO file not available") - + # Create a modified version with some translations removed with tempfile.NamedTemporaryFile(suffix=".po", delete=False) as temp_file: temp_path = temp_file.name - + with open(source_path, "r", encoding="utf-8") as source_file: content = source_file.read() - + # Remove some translations (replace with empty msgstr) modified_content = content.replace('msgstr "Administration"', 'msgstr ""') modified_content = modified_content.replace('msgstr "Authentification"', 'msgstr ""') modified_content = modified_content.replace('msgstr "Changement"', 'msgstr ""') - + with open(temp_path, "w", encoding="utf-8") as modified_file: modified_file.write(modified_content) - + yield temp_path - + # Clean up os.unlink(temp_path) @@ -147,15 +147,15 @@ def test_translation_from_real_po_file(translation_service_openai, modified_po_f # Skip if file doesn't exist if not os.path.exists(modified_po_file): pytest.skip("Modified PO file not available") - + # Mock the OpenAI client to return fixed translations translation_service_openai.config.provider_clients.openai_client.chat.completions.create.return_value = MagicMock( choices=[MagicMock(message=MagicMock(content='["Administration", "Authentification", "Changement"]'))] ) - + # Mock get_file_language to return French translation_service_openai.po_file_handler.get_file_language = MagicMock(return_value="fr") - + # Perform the translation with patch('polib.pofile') as mock_pofile: # Create mock entries for the removed translations @@ -169,15 +169,15 @@ def test_translation_from_real_po_file(translation_service_openai, modified_po_f entry.msgid = text entry.msgstr = translation mock_entries.append(entry) - + mock_po = MagicMock() mock_po.__iter__.return_value = mock_entries mock_po.metadata = {"Language": "fr"} mock_pofile.return_value = mock_po - + # Process the file translation_service_openai.process_po_file(modified_po_file, ["fr"]) - + # Check that translations were applied assert mock_po.save.called @@ -189,18 +189,18 @@ def test_translation_large_real_po_file_with_batching(translation_service_anthro source_path = TEST_DATA_DIR / "wordpress.po" if not os.path.exists(source_path): pytest.skip("WordPress PO file not available") - + # Set a small batch size to test batching translation_service_anthropic.batch_size = 5 - + # Mock Anthropic client responses for each batch translation_service_anthropic.translate_bulk = MagicMock(return_value=[ "Traduction 1", "Traduction 2", "Traduction 3", "Traduction 4", "Traduction 5" ]) - + # Mock get_file_language to return French translation_service_anthropic.po_file_handler.get_file_language = MagicMock(return_value="fr") - + # Setup a simplified mock PO file with multiple entries with patch('polib.pofile') as mock_pofile: mock_entries = [] @@ -209,18 +209,18 @@ def test_translation_large_real_po_file_with_batching(translation_service_anthro entry.msgid = f"String {i}" entry.msgstr = "" # Empty translation mock_entries.append(entry) - + mock_po = MagicMock() mock_po.__iter__.return_value = mock_entries mock_po.metadata = {"Language": "fr"} mock_pofile.return_value = mock_po - + # Process the file translation_service_anthropic.process_po_file(str(source_path), ["fr"]) - + # Check that batching was done (translate_bulk should be called) assert translation_service_anthropic.translate_bulk.called - + # With 15 entries and batch size 5, we expect 3 calls # But our implementation might optimize this based on which entries need translation # So we just check it was called at least once @@ -233,7 +233,7 @@ def test_real_po_file_fuzzy_handling(translation_service_deepseek): # Create a temporary PO file with fuzzy translations with tempfile.NamedTemporaryFile(suffix=".po", delete=False) as temp_file: temp_path = temp_file.name - + fuzzy_content = """ msgid "" msgstr "" @@ -256,32 +256,32 @@ def test_real_po_file_fuzzy_handling(translation_service_deepseek): msgid "Another fuzzy translation" msgstr "Une autre traduction floue" """ - + with open(temp_path, "w", encoding="utf-8") as f: f.write(fuzzy_content) - + # Enable fuzzy flag translation_service_deepseek.config.fuzzy = True - + # Mock _prepare_po_file to verify fuzzy handling original_prepare = translation_service_deepseek._prepare_po_file - + def mock_prepare(*args, **kwargs): # Call original but add spy to POFileHandler.disable_fuzzy_translations with patch.object(POFileHandler, 'disable_fuzzy_translations') as mock_disable: result = original_prepare(*args, **kwargs) assert mock_disable.called return result - + translation_service_deepseek._prepare_po_file = mock_prepare - + # Mock other necessary methods translation_service_deepseek.po_file_handler.get_file_language = MagicMock(return_value="fr") translation_service_deepseek.get_translations = MagicMock(return_value=["Ceci est une traduction normale"]) - + # Process the file translation_service_deepseek.process_po_file(temp_path, ["fr"]) - + # Clean up os.unlink(temp_path) @@ -291,7 +291,7 @@ def test_folder_language_detection(translation_service_openai): """Test detecting languages from folder structure.""" # Enable folder language detection translation_service_openai.config.folder_language = True - + # Create a mock directory structure with tempfile.TemporaryDirectory() as tmp_dir: # Create language directories @@ -299,11 +299,11 @@ def test_folder_language_detection(translation_service_openai): es_dir = os.path.join(tmp_dir, "es") os.makedirs(fr_dir, exist_ok=True) os.makedirs(es_dir, exist_ok=True) - + # Create PO files in each directory fr_po_path = os.path.join(fr_dir, "messages.po") es_po_path = os.path.join(es_dir, "messages.po") - + # Basic PO file content po_content = """ msgid "" @@ -320,31 +320,31 @@ def test_folder_language_detection(translation_service_openai): msgid "World" msgstr "" """ - + with open(fr_po_path, "w", encoding="utf-8") as f: f.write(po_content) - + with open(es_po_path, "w", encoding="utf-8") as f: f.write(po_content) - + # Mock methods to avoid actual API calls translation_service_openai.translate_bulk = MagicMock(return_value=["Bonjour", "Monde"]) - + # Create a real POFileHandler for this test original_handler = translation_service_openai.po_file_handler translation_service_openai.po_file_handler = POFileHandler() - + try: # Scan and process the directory translation_service_openai.scan_and_process_po_files(tmp_dir, ["fr", "es"]) - + # Verify the files were processed with open(fr_po_path, "r", encoding="utf-8") as f: f.read() - + # Should detect fr directory and process the file assert translation_service_openai.translate_bulk.call_count >= 1 - + finally: # Restore original handler translation_service_openai.po_file_handler = original_handler @@ -356,7 +356,7 @@ def test_detail_language_usage(translation_service_openai): # Create a temporary PO file with tempfile.NamedTemporaryFile(suffix=".po", delete=False) as temp_file: temp_path = temp_file.name - + po_content = """ msgid "" msgstr "" @@ -373,16 +373,16 @@ def test_detail_language_usage(translation_service_openai): msgid "Thank you" msgstr "" """ - + with open(temp_path, "w", encoding="utf-8") as f: f.write(po_content) - + # Mock get_file_language to return French translation_service_openai.po_file_handler.get_file_language = MagicMock(return_value="fr") - + # Create a custom mock for translate_bulk that captures both args and kwargs original_translate_bulk = translation_service_openai.translate_bulk - + # Create entries for the mock PO file with patch('polib.pofile') as mock_pofile: mock_entries = [] @@ -391,40 +391,40 @@ def test_detail_language_usage(translation_service_openai): entry.msgid = text entry.msgstr = "" mock_entries.append(entry) - + mock_po = MagicMock() mock_po.__iter__.return_value = mock_entries mock_po.metadata = {"Language": "fr"} mock_pofile.return_value = mock_po - + # Mock get_translations to directly call translate_bulk (our real focus) translation_service_openai.get_translations - + # Create a function that will track the calls to translate_bulk detail_language_was_passed = [False] # Use a list to make it mutable in the nested function - + def mock_translate_bulk(texts, target_language, po_file_path, detail_language=None): if detail_language == "French": detail_language_was_passed[0] = True return ["Bonjour", "Merci"] - + # Replace the method translation_service_openai.translate_bulk = mock_translate_bulk - + try: # Create detail language mapping detail_langs_dict = {"fr": "French"} - + # Process the file - this should end up calling our mocked translate_bulk translation_service_openai.process_po_file(temp_path, ["fr"], detail_langs_dict) - + # Check if our flag was set assert detail_language_was_passed[0], "Detail language 'French' was not passed to translate_bulk" - + finally: # Restore original methods translation_service_openai.translate_bulk = original_translate_bulk - + # Clean up os.unlink(temp_path) @@ -438,38 +438,38 @@ def test_real_po_file_with_multiple_providers( source_path = TEST_DATA_DIR / "django_admin.po" if not os.path.exists(source_path): pytest.skip("Django admin PO file not available") - + # Create a copy for each provider with tempfile.TemporaryDirectory() as tmp_dir: openai_path = os.path.join(tmp_dir, "openai.po") anthropic_path = os.path.join(tmp_dir, "anthropic.po") deepseek_path = os.path.join(tmp_dir, "deepseek.po") - + # Copy the source file to each test file with open(source_path, "r", encoding="utf-8") as src: content = src.read() - + # Remove a few translations to test test_content = content test_content = test_content.replace('msgstr "Oui"', 'msgstr ""') test_content = test_content.replace('msgstr "Non"', 'msgstr ""') - + for path in [openai_path, anthropic_path, deepseek_path]: with open(path, "w", encoding="utf-8") as dest: dest.write(test_content) - + # Setup mocks for each provider for service, path, translation in [ - (translation_service_openai, openai_path, ["Oui", "Non"]), + (translation_service_openai, openai_path, ["Oui", "Non"]), (translation_service_anthropic, anthropic_path, ["Oui", "Non"]), (translation_service_deepseek, deepseek_path, ["Oui", "Non"]) ]: # Mock translate_bulk service.translate_bulk = MagicMock(return_value=translation) - + # Mock get_file_language service.po_file_handler.get_file_language = MagicMock(return_value="fr") - + # Setup simplified POFile for consistency with patch('polib.pofile') as mock_pofile: # Create mock entries @@ -479,15 +479,15 @@ def test_real_po_file_with_multiple_providers( entry.msgid = text entry.msgstr = trans mock_entries.append(entry) - + mock_po = MagicMock() mock_po.__iter__.return_value = mock_entries mock_po.metadata = {"Language": "fr"} mock_pofile.return_value = mock_po - + # Process the file service.process_po_file(path, ["fr"]) - + # Check translations were processed assert service.translate_bulk.called @@ -496,20 +496,20 @@ def test_real_po_file_with_multiple_providers( def test_handling_diverse_po_formats(): """Test handling diverse PO file formats from different projects.""" po_files = [] - + # Check which real PO files were downloaded for name in REAL_PO_FILES: file_path = TEST_DATA_DIR / f"{name}.po" if os.path.exists(file_path): po_files.append((name, file_path)) - + if not po_files: pytest.skip("No real PO files available") - + # Create a mock provider client that we'll use to test PO file handling clients = ProviderClients() clients.openai_client = MagicMock() - + # Create a test translation service config = TranslationConfig( provider_clients=clients, @@ -519,29 +519,29 @@ def test_handling_diverse_po_formats(): fuzzy=False, folder_language=False ) - + service = TranslationService(config=config) - + # Mock translation method to avoid API calls service.translate_single = MagicMock(return_value="Translated text") - + # Test each PO file with minimal mocking for name, file_path in po_files: try: # Try to load the real PO file using polib po_file = polib.pofile(file_path) - + # Mock get_file_language to return French service.po_file_handler.get_file_language = MagicMock(return_value="fr") - + # Use minimal patching with patch.object(polib, 'pofile', return_value=po_file): # Process the file with minimal patching service.process_po_file(str(file_path), ["fr"]) - + # Success log logging.info(f"Successfully processed {name} PO file") - + except Exception as e: # Log error but don't fail the test logging.error(f"Error processing {name} PO file: {str(e)}") diff --git a/python_gpt_po/tests/test_multi_provider.py b/python_gpt_po/tests/test_multi_provider.py index c45607d..ec18adb 100644 --- a/python_gpt_po/tests/test_multi_provider.py +++ b/python_gpt_po/tests/test_multi_provider.py @@ -192,16 +192,16 @@ def test_get_openai_models(mock_get, mock_provider_clients): mock_response.json.return_value = OPENAI_MODELS_RESPONSE mock_response.raise_for_status = MagicMock() mock_get.return_value = mock_response - + # Mock the OpenAI client's models.list method models_list_mock = MagicMock() models_list_mock.data = [MagicMock(id="gpt-4"), MagicMock(id="gpt-3.5-turbo")] mock_provider_clients.openai_client.models.list.return_value = models_list_mock - + # Call the function model_manager = ModelManager() models = model_manager.get_available_models(mock_provider_clients, ModelProvider.OPENAI) - + # Assert models are returned correctly assert "gpt-4" in models @@ -216,11 +216,11 @@ def test_get_anthropic_models(mock_provider_clients): json=ANTHROPIC_MODELS_RESPONSE, status=200 ) - + # Call the function model_manager = ModelManager() models = model_manager.get_available_models(mock_provider_clients, ModelProvider.ANTHROPIC) - + # Assert models are returned correctly assert "claude-3-7-sonnet-20250219" in models assert "claude-3-5-sonnet-20241022" in models @@ -236,11 +236,11 @@ def test_get_deepseek_models(mock_provider_clients): json=DEEPSEEK_MODELS_RESPONSE, status=200 ) - + # Call the function model_manager = ModelManager() models = model_manager.get_available_models(mock_provider_clients, ModelProvider.DEEPSEEK) - + # Assert models are returned correctly assert "deepseek-chat" in models assert "deepseek-coder" in models @@ -253,15 +253,15 @@ def test_translate_bulk_openai(mock_post, translation_service_openai): mock_response = MagicMock() mock_response.json.return_value = OPENAI_TRANSLATION_RESPONSE mock_post.return_value = mock_response - + # Call function translation_service_openai.config.provider_clients.openai_client.chat.completions.create.return_value = MagicMock( choices=[MagicMock(message=MagicMock(content='["Bonjour", "Monde", "Bienvenue dans notre application", "Au revoir"]'))] ) - + texts = ["Hello", "World", "Welcome to our application", "Goodbye"] translations = translation_service_openai.translate_bulk(texts, "fr", "test.po") - + # Assert translations are correct assert translations == ["Bonjour", "Monde", "Bienvenue dans notre application", "Au revoir"] @@ -273,10 +273,10 @@ def test_translate_bulk_anthropic(mock_post, translation_service_anthropic): translation_service_anthropic.config.provider_clients.anthropic_client.messages.create.return_value = MagicMock( content=[MagicMock(text='["Bonjour", "Monde", "Bienvenue dans notre application", "Au revoir"]')] ) - + texts = ["Hello", "World", "Welcome to our application", "Goodbye"] translations = translation_service_anthropic.translate_bulk(texts, "fr", "test.po") - + # Assert translations are correct assert translations == ["Bonjour", "Monde", "Bienvenue dans notre application", "Au revoir"] @@ -291,18 +291,18 @@ def test_translate_bulk_deepseek(translation_service_deepseek): json=DEEPSEEK_TRANSLATION_RESPONSE, status=200 ) - + texts = ["Hello", "World", "Welcome to our application", "Goodbye"] - + # Test with the markdown-wrapped response with patch('requests.post') as mock_post: mock_response = MagicMock() mock_response.json.return_value = DEEPSEEK_TRANSLATION_RESPONSE mock_response.raise_for_status = MagicMock() mock_post.return_value = mock_response - + translations = translation_service_deepseek.translate_bulk(texts, "fr", "test.po") - + # Assert translations are correct after markdown cleaning assert translations == ["Bonjour", "Monde", "Bienvenue dans notre application", "Au revoir"] @@ -313,12 +313,12 @@ def test_clean_json_response(translation_service_deepseek): markdown_json = "```json\n[\"Bonjour\", \"Monde\"]\n```" cleaned = translation_service_deepseek._clean_json_response(markdown_json) assert cleaned == "[\"Bonjour\", \"Monde\"]" - + # Test with extra text before and after messy_json = "Here's the translation: [\"Bonjour\", \"Monde\"] Hope that helps!" cleaned = translation_service_deepseek._clean_json_response(messy_json) assert cleaned == "[\"Bonjour\", \"Monde\"]" - + # Test with clean JSON clean_json = "[\"Bonjour\", \"Monde\"]" cleaned = translation_service_deepseek._clean_json_response(clean_json) @@ -326,25 +326,25 @@ def test_clean_json_response(translation_service_deepseek): @patch('polib.pofile') -def test_process_po_file_all_providers(mock_pofile, translation_service_openai, - translation_service_anthropic, +def test_process_po_file_all_providers(mock_pofile, translation_service_openai, + translation_service_anthropic, translation_service_deepseek, temp_po_file): """Test processing a PO file with all providers.""" # Create a mock PO file mock_po = MagicMock() mock_entries = [] - + # Create entries for the mock PO file for text in ["Hello", "World", "Welcome to our application", "Goodbye"]: entry = MagicMock() entry.msgid = text entry.msgstr = "" mock_entries.append(entry) - + mock_po.__iter__.return_value = mock_entries mock_po.metadata = {"Language": "fr"} mock_pofile.return_value = mock_po - + # Setup translation method mocks for each service for i, service in enumerate([translation_service_openai, translation_service_anthropic, translation_service_deepseek]): # Create a fresh mock for each service @@ -352,15 +352,15 @@ def test_process_po_file_all_providers(mock_pofile, translation_service_openai, mock_po_new.__iter__.return_value = mock_entries mock_po_new.metadata = {"Language": "fr"} mock_pofile.return_value = mock_po_new - + service.get_translations = MagicMock(return_value=[ "Bonjour", "Monde", "Bienvenue dans notre application", "Au revoir" ]) service.po_file_handler.get_file_language = MagicMock(return_value="fr") - + # Process the PO file service.process_po_file(temp_po_file, ["fr"]) - + # Assert translations were applied service.get_translations.assert_called_once() mock_po_new.save.assert_called_once() @@ -370,19 +370,19 @@ def test_fuzzy_flag_handling(mock_disable_fuzzy, translation_service_openai, tem """Test handling of fuzzy translations.""" # Enable fuzzy flag translation_service_openai.config.fuzzy = True - + # Mock the PO file handling with patch('polib.pofile') as mock_pofile: mock_po = MagicMock() mock_po.metadata = {"Language": "fr"} mock_pofile.return_value = mock_po - + # Mock get_file_language to return a valid language translation_service_openai.po_file_handler.get_file_language = MagicMock(return_value="fr") - + # Process the PO file translation_service_openai.process_po_file(temp_po_file, ["fr"]) - + # Assert the fuzzy translations were disabled mock_disable_fuzzy.assert_called_once_with(temp_po_file) @@ -393,16 +393,16 @@ def test_validation_model_connection_all_providers( """Test validating connection to all providers.""" # Configure OpenAI mock translation_service_openai.config.provider_clients.openai_client.chat.completions.create.return_value = MagicMock() - + # Configure Anthropic mock translation_service_anthropic.config.provider_clients.anthropic_client.messages.create.return_value = MagicMock() - + # Configure DeepSeek mock with patch('requests.post') as mock_post: mock_response = MagicMock() mock_response.raise_for_status = MagicMock() mock_post.return_value = mock_response - + # Test all providers assert translation_service_openai.validate_provider_connection() is True assert translation_service_anthropic.validate_provider_connection() is True @@ -417,35 +417,35 @@ def test_scan_and_process_po_files(mock_pofile, mock_walk, translation_service_o mock_walk.return_value = [ ("/test/folder", [], ["en.po", "fr.po", "es.po", "not_a_po_file.txt"]) ] - + # Create a completely mock implementation of process_po_file to avoid any real processing translation_service_openai.process_po_file = MagicMock() - + # Create a custom implementation of scan_and_process_po_files that only processes fr.po and es.po original_scan = translation_service_openai.scan_and_process_po_files - + def mock_scan(input_folder, languages, detail_languages=None): # Only process fr.po and es.po for file_name in ["fr.po", "es.po"]: file_path = f"/test/folder/{file_name}" translation_service_openai.process_po_file(file_path, languages, detail_languages) - + # Replace the original method with our mock translation_service_openai.scan_and_process_po_files = mock_scan - + try: # Call the function translation_service_openai.scan_and_process_po_files("/test/folder", ["fr", "es"]) - + # Check that process_po_file was called exactly twice assert translation_service_openai.process_po_file.call_count == 2 - + # Check that it was called with the correct file paths calls = [args[0][0] for args in translation_service_openai.process_po_file.call_args_list] assert "/test/folder/fr.po" in calls assert "/test/folder/es.po" in calls assert "/test/folder/en.po" not in calls - + finally: # Restore original method translation_service_openai.scan_and_process_po_files = original_scan @@ -454,17 +454,17 @@ def mock_scan(input_folder, languages, detail_languages=None): def test_normalize_language_code(): """Test language code normalization.""" handler = POFileHandler() - + # Test normalizing two-letter codes assert handler.normalize_language_code("fr") == "fr" assert handler.normalize_language_code("es") == "es" - + # Test normalizing language names assert handler.normalize_language_code("French") == "fr" assert handler.normalize_language_code("Spanish") == "es" - + # Test with invalid language assert handler.normalize_language_code("InvalidLanguage") is None - + # Test with empty input assert handler.normalize_language_code("") is None diff --git a/python_gpt_po/tests/test_po_translator.py b/python_gpt_po/tests/test_po_translator.py index 365b325..c8fbc15 100644 --- a/python_gpt_po/tests/test_po_translator.py +++ b/python_gpt_po/tests/test_po_translator.py @@ -34,7 +34,7 @@ def fixture_translation_config(mock_openai_client): """ provider_clients = ProviderClients() provider_clients.openai_client = mock_openai_client - + model = "gpt-3.5-turbo" return TranslationConfig( provider_clients=provider_clients, @@ -104,7 +104,7 @@ def test_process_po_file(mock_po_file_handler_class, translation_service, tmp_pa # We need to mock the _prepare_po_file method to use our mock original_prepare = translation_service._prepare_po_file - + def mock_prepare(po_file_path, languages): if translation_service.config.fuzzy: translation_service.po_file_handler.disable_fuzzy_translations(po_file_path) @@ -112,16 +112,16 @@ def mock_prepare(po_file_path, languages): mock_po.__iter__.return_value = [] mock_po.metadata = {"Language": "es"} return mock_po - + translation_service._prepare_po_file = mock_prepare - + # Mock get_translations to avoid actual API calls translation_service.get_translations = MagicMock(return_value=[]) - + try: # Process the .po file translation_service.process_po_file(str(po_file_path), ['es']) - + # No assertions needed here - we just want to make sure it runs without errors finally: # Restore original method @@ -165,7 +165,7 @@ def test_translate_single(translation_service): } mock_response.raise_for_status = MagicMock() mock_post.return_value = mock_response - + translated_text = translation_service.translate_single(text_to_translate, 'es') assert translated_text == "Salud" return diff --git a/python_gpt_po/utils/cli.py b/python_gpt_po/utils/cli.py index 39cab1d..11b0591 100644 --- a/python_gpt_po/utils/cli.py +++ b/python_gpt_po/utils/cli.py @@ -20,7 +20,7 @@ class CustomArgumentParser(argparse.ArgumentParser): def error(self, message): """ Display a cleaner error message with usage information. - + Args: message (str): Error message """ @@ -32,7 +32,7 @@ def error(self, message): def parse_args(): """ Parse command-line arguments with a more user-friendly interface. - + Returns: argparse.Namespace: Parsed arguments """ @@ -42,13 +42,13 @@ def parse_args(): Examples: # Basic usage with OpenAI python po_translator.py --folder ./locales --lang fr,es,de - + # Use Anthropic with detailed language names python po_translator.py --folder ./i18n --lang nl,de --detail-lang "Dutch,German" --provider anthropic - + # List available models for a provider python po_translator.py --provider deepseek --list-models - + # Process multiple translations in bulk with a specific model python po_translator.py --folder ./locales --lang ja,ko --bulk --model gpt-4 """, @@ -174,10 +174,10 @@ def show_help_and_exit(): def parse_languages(lang_arg: str) -> List[str]: """ Parse comma-separated language string into a list of language codes. - + Args: lang_arg (str): Comma-separated language codes - + Returns: List[str]: List of language codes """ @@ -187,14 +187,14 @@ def parse_languages(lang_arg: str) -> List[str]: def create_language_mapping(lang_codes: List[str], detail_langs_arg: Optional[str]) -> Dict[str, str]: """ Create a mapping between language codes and their detailed names. - + Args: lang_codes (List[str]): List of language codes detail_langs_arg (Optional[str]): Comma-separated detailed language names - + Returns: Dict[str, str]: Mapping of language codes to detailed names - + Raises: ValueError: If the number of language codes doesn't match the number of detailed names """ @@ -212,10 +212,10 @@ def create_language_mapping(lang_codes: List[str], detail_langs_arg: Optional[st def get_provider_from_args(args) -> Optional[ModelProvider]: """ Get the provider from command line arguments. - + Args: args (argparse.Namespace): Parsed command line arguments - + Returns: Optional[ModelProvider]: The selected provider or None if not specified """ @@ -227,10 +227,10 @@ def get_provider_from_args(args) -> Optional[ModelProvider]: def get_api_keys_from_args(args) -> Dict[str, str]: """ Extract API keys from command line arguments and environment variables. - + Args: args (argparse.Namespace): Parsed command line arguments - + Returns: Dict[str, str]: Dictionary of provider names to API keys """ @@ -244,10 +244,10 @@ def get_api_keys_from_args(args) -> Dict[str, str]: def auto_select_provider(api_keys: Dict[str, str]) -> Optional[ModelProvider]: """ Auto-select a provider based on available API keys. - + Args: api_keys (Dict[str, str]): Dictionary of provider names to API keys - + Returns: Optional[ModelProvider]: The auto-selected provider or None if no keys available """ @@ -262,11 +262,11 @@ def auto_select_provider(api_keys: Dict[str, str]) -> Optional[ModelProvider]: def validate_provider_key(provider: ModelProvider, api_keys: Dict[str, str]) -> bool: """ Validate that the selected provider has an API key. - + Args: provider (ModelProvider): The selected provider api_keys (Dict[str, str]): Dictionary of provider names to API keys - + Returns: bool: True if provider has a key, False otherwise """ diff --git a/python_gpt_po/utils/helpers.py b/python_gpt_po/utils/helpers.py index 1fccf7f..e84ee9f 100644 --- a/python_gpt_po/utils/helpers.py +++ b/python_gpt_po/utils/helpers.py @@ -4,14 +4,24 @@ from pkg_resources import DistributionNotFound, get_distribution +# Import version with fallback to avoid circular imports +try: + from .. import __version__ +except (ImportError, AttributeError): + __version__ = None + def get_version(): """ Get package version. - + Returns: str: The package version or a default if not found """ + # First check if version is available from the top-level import + if __version__ is not None: + return __version__ + # Fall back to package metadata try: return get_distribution("gpt-po-translator").version except DistributionNotFound: diff --git a/setup.py b/setup.py index 0605b67..5dbd50f 100644 --- a/setup.py +++ b/setup.py @@ -5,6 +5,7 @@ import os import subprocess +from typing import Optional from setuptools import find_packages, setup @@ -15,22 +16,35 @@ install_requires = [line.strip() for line in f if line.strip() and not line.startswith('#')] -def get_version(): - """Get version from git or environment variable.""" - # Check for Docker environment - if 'PACKAGE_VERSION' in os.environ: - return os.environ.get('PACKAGE_VERSION') +def get_pep440_version() -> Optional[str]: + """ + Get version from environment or git, ensuring it's PEP 440 compliant. - # Check for CI/CD environment variable - if 'GITHUB_REF' in os.environ and os.environ['GITHUB_REF'].startswith('refs/tags/'): - # Extract version from tag (strip 'v' prefix if present) - return os.environ['GITHUB_REF'].split('/')[-1].lstrip('v') + Returns: + Optional[str]: PEP 440 compliant version string or None to defer to setuptools_scm + """ - # Try getting from git + # First check environment variable (highest priority for containers) + if 'PACKAGE_VERSION' in os.environ: + raw_version = os.environ.get('PACKAGE_VERSION') + # Make version PEP 440 compliant + if '-' in raw_version and '+' not in raw_version: + # Convert something like "1.2.3-test" to "1.2.3+test" for PEP 440 + version = raw_version.replace('-', '+', 1) + else: + version = raw_version + print(f"Using version from environment: {version}") + return version + + # Then try getting from git try: # Get version from git describe, but normalize it to be PEP 440 compliant - version = subprocess.check_output(['git', 'describe', '--tags', '--always']).decode('utf-8').strip() - + version = subprocess.check_output( + ['git', 'describe', '--tags', '--always'], + stderr=subprocess.STDOUT, + text=True + ).strip() + # Handle version format from git describe if '-' in version: # Format like v0.3.5-5-gd9775d7, convert to 0.3.5.dev5+gd9775d7 @@ -39,11 +53,17 @@ def get_version(): elif version.startswith('v'): # Just a tagged version like v0.3.5 version = version[1:] - + + print(f"Using git version: {version}") return version except (subprocess.SubprocessError, FileNotFoundError): - # Fallback version - return "0.1.0" + # Defer to setuptools_scm + print("Deferring to setuptools_scm for version") + return None + + +# Get version using our custom function +package_version = get_pep440_version() def install_man_pages(): @@ -60,8 +80,8 @@ def install_man_pages(): setup( - name='gpt-po-translator', - version=get_version(), + name='gpt_po_translator', + version=package_version, # Will be None if PACKAGE_VERSION is not set, triggering setuptools_scm author='Bram Mittendorff', author_email='bram@pescheck.io', description='A CLI tool for translating .po files using GPT models.', @@ -69,7 +89,7 @@ def install_man_pages(): long_description_content_type='text/markdown', url='https://github.com/pescheckit/python-gpt-po', license='MIT', - packages=find_packages(), + packages=find_packages(exclude=["*.tests", "*.tests.*", "*.__pycache__", "*.__pycache__.*"]), include_package_data=True, install_requires=install_requires, entry_points={