diff --git a/README.md b/README.md index 53618c5..f4a7ec8 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ ![PyPI](https://img.shields.io/pypi/v/gpt-po-translator?label=gpt-po-translator) ![Downloads](https://pepy.tech/badge/gpt-po-translator) -A robust tool for translating gettext (.po) files using AI models from multiple providers (OpenAI, Anthropic / Claude, and DeepSeek). It supports both bulk and individual translations, handles fuzzy entries, and can infer target languages based on folder structures. Available as a Python package and Docker container with support for Python 3.8-3.12. +A robust tool for translating gettext (.po) files using AI models from multiple providers (OpenAI, Azure OpenAI, Anthropic / Claude, and DeepSeek). It supports both bulk and individual translations, handles fuzzy entries, and can infer target languages based on folder structures. Available as a Python package and Docker container with support for Python 3.8-3.12. ## What is GPT-PO Translator? @@ -12,7 +12,7 @@ This tool helps you translate gettext (.po) files using AI models. It's perfect ### Key Features -- **Multiple AI providers** - OpenAI, Anthropic/Claude, and DeepSeek +- **Multiple AI providers** - OpenAI, Azure OpenAI, Anthropic/Claude, and DeepSeek - **Flexible translation modes** - Bulk or entry-by-entry processing - **Smart language handling** - Auto-detects target languages from folder structure - **Production-ready** - Includes retry logic, validation, and detailed logging @@ -29,6 +29,8 @@ pip install gpt-po-translator ### Basic Usage +To translate the `po` files for the German and French languages found in the `locales` folder, using OpenAI: + ```bash # Set up your API key export OPENAI_API_KEY='your_api_key_here' @@ -77,6 +79,7 @@ export OPENAI_API_KEY='your_api_key_here' # Or for other providers: export ANTHROPIC_API_KEY='your_api_key_here' export DEEPSEEK_API_KEY='your_api_key_here' +export AZURE_OPENAI_API_KEY='your_api_key_here' ``` ### Option 2: Command Line @@ -110,8 +113,14 @@ gpt-po-translator --provider anthropic --folder ./locales --lang de # Use DeepSeek models gpt-po-translator --provider deepseek --folder ./locales --lang de -# List available models +# List available models for openai gpt-po-translator --provider openai --list-models + +# List available models for azure openai +gpt-po-translator --provider azure_openai \ + --azure-openai-endpoint https://.cognitiveservices.azure.com/ \ + --azure-openai-api-version \ + --list-models ``` ## Command Reference diff --git a/pyproject.toml b/pyproject.toml index 821bc22..7bda1c2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ requires = ["setuptools>=61.0", "wheel", "setuptools_scm>=6.2"] build-backend = "setuptools.build_meta" [tool.setuptools] -# Use find with namespaces=True to handle nested packages +# Use find with namespaces=True to handle nested packages packages = { find = { exclude = ["*.__pycache__", "*.__pycache__.*"], namespaces = true } } [tool.setuptools_scm] diff --git a/python_gpt_po/main.py b/python_gpt_po/main.py index aaae0db..f94b118 100644 --- a/python_gpt_po/main.py +++ b/python_gpt_po/main.py @@ -49,7 +49,7 @@ def initialize_provider(args: Namespace) -> tuple[ProviderClients, ModelProvider # Initialize provider clients provider_clients = ProviderClients() - provider_clients.initialize_clients(api_keys) + provider_clients.initialize_clients(args, api_keys) # Get provider from arguments or auto-select provider = get_provider_from_args(args) @@ -117,7 +117,8 @@ def get_appropriate_model( # Fall back to default model if no models could be retrieved default_model = model_manager.get_default_model(provider) - logging.warning("No available models found from API; defaulting to %s", default_model) + logging.warning("No available models found from API; defaulting to %s", + default_model) return default_model @@ -145,7 +146,8 @@ def process_translations(config: TranslationConfig, folder: str, sys.exit(1) # Start processing files - logging.info("Starting translation with %s using model %s", config.provider.value, config.model) + logging.info("Starting translation with %s using model %s in folder %s", + config.provider.value, config.model, folder) translation_service.scan_and_process_po_files(folder, languages, detail_languages) logging.info("Translation completed successfully") diff --git a/python_gpt_po/models/enums.py b/python_gpt_po/models/enums.py index 103dde3..5df10f8 100644 --- a/python_gpt_po/models/enums.py +++ b/python_gpt_po/models/enums.py @@ -10,10 +10,12 @@ class ModelProvider(Enum): OPENAI = "openai" ANTHROPIC = "anthropic" DEEPSEEK = "deepseek" + AZURE_OPENAI = "azure_openai" ModelProviderList = [ ModelProvider.OPENAI.value, ModelProvider.ANTHROPIC.value, - ModelProvider.DEEPSEEK.value + ModelProvider.DEEPSEEK.value, + ModelProvider.AZURE_OPENAI.value ] diff --git a/python_gpt_po/models/provider_clients.py b/python_gpt_po/models/provider_clients.py index f451535..9e56088 100644 --- a/python_gpt_po/models/provider_clients.py +++ b/python_gpt_po/models/provider_clients.py @@ -2,10 +2,12 @@ Client classes for different AI providers. """ +import os +from argparse import Namespace from typing import Dict from anthropic import Anthropic -from openai import OpenAI +from openai import AzureOpenAI, OpenAI from .enums import ModelProvider @@ -15,11 +17,12 @@ class ProviderClients: def __init__(self): self.openai_client = None + self.azure_openai_client = None self.anthropic_client = None self.deepseek_api_key = None self.deepseek_base_url = "https://api.deepseek.com/v1" - def initialize_clients(self, api_keys: Dict[str, str]): + def initialize_clients(self, args: Namespace, api_keys: Dict[str, str]): """Initialize API clients for all providers with available keys. Args: @@ -28,6 +31,21 @@ def initialize_clients(self, api_keys: Dict[str, str]): if api_keys.get(ModelProvider.OPENAI.value): self.openai_client = OpenAI(api_key=api_keys[ModelProvider.OPENAI.value]) + if api_keys.get(ModelProvider.AZURE_OPENAI.value): + endpoint = args.azure_openai_endpoint or os.getenv("AZURE_OPENAI_ENDPOINT") + if not endpoint: + raise ValueError("Missing Azure OpenAI endpoint.") + + api_version = args.azure_openai_api_version or os.getenv("AZURE_OPENAI_API_VERSION") + if not api_version: + raise ValueError("Missing Azure OpenAI API version.") + + self.azure_openai_client = AzureOpenAI( + azure_endpoint=endpoint, + api_key=api_keys[ModelProvider.AZURE_OPENAI.value], + api_version=api_version + ) + if api_keys.get(ModelProvider.ANTHROPIC.value): self.anthropic_client = Anthropic(api_key=api_keys[ModelProvider.ANTHROPIC.value]) diff --git a/python_gpt_po/services/model_manager.py b/python_gpt_po/services/model_manager.py index 3dd8151..d220958 100644 --- a/python_gpt_po/services/model_manager.py +++ b/python_gpt_po/services/model_manager.py @@ -71,11 +71,24 @@ def get_available_models(provider_clients: ProviderClients, provider: ModelProvi else: logging.error("DeepSeek API key not set") + elif provider == ModelProvider.AZURE_OPENAI: + return ModelManager._get_azure_openai_models(provider_clients) + except Exception as e: logging.error("Error fetching models from %s: %s", provider.value, str(e)) return models + @staticmethod + def _get_azure_openai_models(provider_clients: ProviderClients) -> List[str]: + """Retrieve models from Azure OpenAI.""" + if provider_clients.azure_openai_client: + response = provider_clients.azure_openai_client.models.list() + return [model.id for model in response.data] + + logging.error("Azure OpenAI client not initialized") + return [] + @staticmethod def validate_model(provider_clients: ProviderClients, provider: ModelProvider, model: str) -> bool: """ @@ -109,9 +122,10 @@ def get_default_model(provider: ModelProvider) -> str: default_models = { ModelProvider.OPENAI: "gpt-4o-mini", ModelProvider.ANTHROPIC: "claude-3-5-haiku-latest", - ModelProvider.DEEPSEEK: "deepseek-chat" + ModelProvider.DEEPSEEK: "deepseek-chat", + ModelProvider.AZURE_OPENAI: "gpt-35-turbo", } - return default_models.get(provider) + return default_models.get(provider, "") @staticmethod def verify_model_capabilities( diff --git a/python_gpt_po/services/translation_service.py b/python_gpt_po/services/translation_service.py index 9c265f1..511a3f1 100644 --- a/python_gpt_po/services/translation_service.py +++ b/python_gpt_po/services/translation_service.py @@ -83,6 +83,19 @@ def _get_deepseek_response(self, content: str) -> str: response.raise_for_status() return response.json()["choices"][0]["message"]["content"].strip() + def _get_azure_openai_response(self, content: str) -> str: + """Get response from OpenAI API.""" + if not self.config.provider_clients.azure_openai_client: + raise ValueError("OpenAI client not initialized") + + message = {"role": "user", "content": content} + completion = self.config.provider_clients.azure_openai_client.chat.completions.create( + model=self.config.model, + max_tokens=4000, + messages=[message] + ) + return completion.choices[0].message.content.strip() + def validate_provider_connection(self) -> bool: """Validates the connection to the selected provider by making a test API call.""" provider = self.config.provider @@ -231,6 +244,8 @@ def _get_provider_response(self, content: str) -> str: return self._get_anthropic_response(content) if provider == ModelProvider.DEEPSEEK: return self._get_deepseek_response(content) + if provider == ModelProvider.AZURE_OPENAI: + return self._get_azure_openai_response(content) return "" def _process_bulk_response(self, response_text: str, original_texts: List[str]) -> List[str]: diff --git a/python_gpt_po/tests/test_multi_provider.py b/python_gpt_po/tests/test_multi_provider.py index ec18adb..e012c76 100644 --- a/python_gpt_po/tests/test_multi_provider.py +++ b/python_gpt_po/tests/test_multi_provider.py @@ -3,6 +3,7 @@ """ import logging +import os from unittest.mock import MagicMock, patch import pytest @@ -57,6 +58,8 @@ "object": "list" } +AZURE_OPENAI_MODELS_RESPONSE = OPENAI_MODELS_RESPONSE + ANTHROPIC_MODELS_RESPONSE = { "data": [ {"type": "model", "id": "claude-3-7-sonnet-20250219", "display_name": "Claude 3.7 Sonnet", "created_at": "2025-02-19T00:00:00Z"}, @@ -87,6 +90,8 @@ ] } +AZURE_OPENAI_TRANSLATION_RESPONSE = OPENAI_TRANSLATION_RESPONSE + ANTHROPIC_TRANSLATION_RESPONSE = { "content": [ { @@ -107,16 +112,16 @@ @pytest.fixture -def temp_po_file(tmp_path): +def temp_po_file(tmp_path: str) -> str: """Create a temporary PO file for testing.""" - po_file_path = tmp_path / "test.po" + po_file_path = os.path.join(tmp_path, "test.po") with open(po_file_path, "w", encoding="utf-8") as f: f.write(SAMPLE_PO_CONTENT) return str(po_file_path) @pytest.fixture -def mock_provider_clients(): +def mock_provider_clients() -> ProviderClients: """Mock provider clients for testing.""" clients = ProviderClients() clients.openai_client = MagicMock() @@ -124,11 +129,13 @@ def mock_provider_clients(): clients.anthropic_client.api_key = "sk-ant-mock-key" clients.deepseek_api_key = "sk-deepseek-mock-key" clients.deepseek_base_url = "https://api.deepseek.com/v1" + clients.azure_openai_client = MagicMock() + clients.azure_openai_client.api_key = "sk-aoi-mock-key" return clients @pytest.fixture -def translation_config_openai(mock_provider_clients): +def translation_config_openai(mock_provider_clients: ProviderClients) -> TranslationConfig: """Create an OpenAI translation config for testing.""" return TranslationConfig( provider_clients=mock_provider_clients, @@ -141,7 +148,20 @@ def translation_config_openai(mock_provider_clients): @pytest.fixture -def translation_config_anthropic(mock_provider_clients): +def translation_config_azure_openai(mock_provider_clients: ProviderClients) -> TranslationConfig: + """Create an OpenAI translation config for testing.""" + return TranslationConfig( + provider_clients=mock_provider_clients, + provider=ModelProvider.AZURE_OPENAI, + model="gpt-3.5-turbo", + bulk_mode=True, + fuzzy=False, + folder_language=False + ) + + +@pytest.fixture +def translation_config_anthropic(mock_provider_clients: ProviderClients) -> TranslationConfig: """Create an Anthropic translation config for testing.""" return TranslationConfig( provider_clients=mock_provider_clients, @@ -154,7 +174,7 @@ def translation_config_anthropic(mock_provider_clients): @pytest.fixture -def translation_config_deepseek(mock_provider_clients): +def translation_config_deepseek(mock_provider_clients: ProviderClients) -> TranslationConfig: """Create a DeepSeek translation config for testing.""" return TranslationConfig( provider_clients=mock_provider_clients, @@ -167,25 +187,31 @@ def translation_config_deepseek(mock_provider_clients): @pytest.fixture -def translation_service_openai(translation_config_openai): +def translation_service_openai(translation_config_openai: TranslationConfig) -> TranslationService: """Create an OpenAI translation service for testing.""" return TranslationService(config=translation_config_openai) @pytest.fixture -def translation_service_anthropic(translation_config_anthropic): +def translation_service_azure_openai(translation_config_azure_openai: TranslationConfig) -> TranslationService: + """Create an Azure OpenAI translation service for testing.""" + return TranslationService(config=translation_config_azure_openai) + + +@pytest.fixture +def translation_service_anthropic(translation_config_anthropic: TranslationConfig) -> TranslationService: """Create an Anthropic translation service for testing.""" return TranslationService(config=translation_config_anthropic) @pytest.fixture -def translation_service_deepseek(translation_config_deepseek): +def translation_service_deepseek(translation_config_deepseek: TranslationConfig) -> TranslationService: """Create a DeepSeek translation service for testing.""" return TranslationService(config=translation_config_deepseek) @patch('requests.get') -def test_get_openai_models(mock_get, mock_provider_clients): +def test_get_openai_models(mock_get, mock_provider_clients: ProviderClients): """Test getting OpenAI models.""" # Setup mock response mock_response = MagicMock() @@ -206,8 +232,30 @@ def test_get_openai_models(mock_get, mock_provider_clients): assert "gpt-4" in models +@patch('requests.get') +def test_get_ayure_openai_models(mock_get, mock_provider_clients: ProviderClients): + """Test getting OpenAI models.""" + # Setup mock response + mock_response = MagicMock() + mock_response.json.return_value = AZURE_OPENAI_MODELS_RESPONSE + mock_response.raise_for_status = MagicMock() + mock_get.return_value = mock_response + + # Mock the OpenAI client's models.list method + models_list_mock = MagicMock() + models_list_mock.data = [MagicMock(id="gpt-4"), MagicMock(id="gpt-3.5-turbo")] + mock_provider_clients.azure_openai_client.models.list.return_value = models_list_mock + + # Call the function + model_manager = ModelManager() + models = model_manager.get_available_models(mock_provider_clients, ModelProvider.AZURE_OPENAI) + + # Assert models are returned correctly + assert "gpt-3.5-turbo" in models + + @responses.activate -def test_get_anthropic_models(mock_provider_clients): +def test_get_anthropic_models(mock_provider_clients: ProviderClients): """Test getting Anthropic models.""" # Setup mock response responses.add( @@ -227,7 +275,7 @@ def test_get_anthropic_models(mock_provider_clients): @responses.activate -def test_get_deepseek_models(mock_provider_clients): +def test_get_deepseek_models(mock_provider_clients: ProviderClients): """Test getting DeepSeek models.""" # Setup mock response responses.add( @@ -247,7 +295,7 @@ def test_get_deepseek_models(mock_provider_clients): @patch('python_gpt_po.services.translation_service.requests.post') -def test_translate_bulk_openai(mock_post, translation_service_openai): +def test_translate_bulk_openai(mock_post, translation_service_openai: TranslationService): """Test bulk translation with OpenAI.""" # Setup mock response mock_response = MagicMock() @@ -267,7 +315,27 @@ def test_translate_bulk_openai(mock_post, translation_service_openai): @patch('python_gpt_po.services.translation_service.requests.post') -def test_translate_bulk_anthropic(mock_post, translation_service_anthropic): +def test_translate_bulk_azure_openai(mock_post, translation_service_azure_openai: TranslationService): + """Test bulk translation with OpenAI.""" + # Setup mock response + mock_response = MagicMock() + mock_response.json.return_value = AZURE_OPENAI_TRANSLATION_RESPONSE + mock_post.return_value = mock_response + + # Call function + translation_service_azure_openai.config.provider_clients.azure_openai_client.chat.completions.create.return_value = MagicMock( + choices=[MagicMock(message=MagicMock(content='["Bonjour", "Monde", "Bienvenue dans notre application", "Au revoir"]'))] + ) + + texts = ["Hello", "World", "Welcome to our application", "Goodbye"] + translations = translation_service_azure_openai.translate_bulk(texts, "fr", "test.po") + + # Assert translations are correct + assert translations == ["Bonjour", "Monde", "Bienvenue dans notre application", "Au revoir"] + + +@patch('python_gpt_po.services.translation_service.requests.post') +def test_translate_bulk_anthropic(mock_post, translation_service_anthropic: TranslationService): """Test bulk translation with Anthropic.""" # Setup mock client response translation_service_anthropic.config.provider_clients.anthropic_client.messages.create.return_value = MagicMock( @@ -282,7 +350,7 @@ def test_translate_bulk_anthropic(mock_post, translation_service_anthropic): @responses.activate -def test_translate_bulk_deepseek(translation_service_deepseek): +def test_translate_bulk_deepseek(translation_service_deepseek: TranslationService): """Test bulk translation with DeepSeek.""" # Setup mock response responses.add( @@ -307,7 +375,7 @@ def test_translate_bulk_deepseek(translation_service_deepseek): assert translations == ["Bonjour", "Monde", "Bienvenue dans notre application", "Au revoir"] -def test_clean_json_response(translation_service_deepseek): +def test_clean_json_response(translation_service_deepseek: TranslationService): """Test cleaning JSON responses from different formats.""" # Test markdown code block format markdown_json = "```json\n[\"Bonjour\", \"Monde\"]\n```" @@ -326,9 +394,12 @@ def test_clean_json_response(translation_service_deepseek): @patch('polib.pofile') -def test_process_po_file_all_providers(mock_pofile, translation_service_openai, - translation_service_anthropic, - translation_service_deepseek, temp_po_file): +def test_process_po_file_all_providers(mock_pofile, + translation_service_openai: TranslationService, + translation_service_anthropic: TranslationService, + translation_service_deepseek: TranslationService, + translation_service_azure_openai: TranslationService, + temp_po_file: str): """Test processing a PO file with all providers.""" # Create a mock PO file mock_po = MagicMock() @@ -346,7 +417,10 @@ def test_process_po_file_all_providers(mock_pofile, translation_service_openai, mock_pofile.return_value = mock_po # Setup translation method mocks for each service - for i, service in enumerate([translation_service_openai, translation_service_anthropic, translation_service_deepseek]): + for i, service in enumerate([translation_service_openai, + translation_service_anthropic, + translation_service_deepseek, + translation_service_azure_openai]): # Create a fresh mock for each service mock_po_new = MagicMock() mock_po_new.__iter__.return_value = mock_entries @@ -365,8 +439,9 @@ def test_process_po_file_all_providers(mock_pofile, translation_service_openai, service.get_translations.assert_called_once() mock_po_new.save.assert_called_once() + @patch('python_gpt_po.services.po_file_handler.POFileHandler.disable_fuzzy_translations') -def test_fuzzy_flag_handling(mock_disable_fuzzy, translation_service_openai, temp_po_file): +def test_fuzzy_flag_handling(mock_disable_fuzzy, translation_service_openai: TranslationService, temp_po_file): """Test handling of fuzzy translations.""" # Enable fuzzy flag translation_service_openai.config.fuzzy = True @@ -388,7 +463,10 @@ def test_fuzzy_flag_handling(mock_disable_fuzzy, translation_service_openai, tem def test_validation_model_connection_all_providers( - translation_service_openai, translation_service_anthropic, translation_service_deepseek + translation_service_openai: TranslationService, + translation_service_anthropic: TranslationService, + translation_service_deepseek: TranslationService, + translation_service_azure_openai: TranslationService ): """Test validating connection to all providers.""" # Configure OpenAI mock @@ -398,6 +476,10 @@ def test_validation_model_connection_all_providers( translation_service_anthropic.config.provider_clients.anthropic_client.messages.create.return_value = MagicMock() # Configure DeepSeek mock + + # Configure Azure OpenAI mock + translation_service_azure_openai.config.provider_clients.azure_openai_client.chat.completions.create.return_value = MagicMock() + with patch('requests.post') as mock_post: mock_response = MagicMock() mock_response.raise_for_status = MagicMock() @@ -407,11 +489,12 @@ def test_validation_model_connection_all_providers( assert translation_service_openai.validate_provider_connection() is True assert translation_service_anthropic.validate_provider_connection() is True assert translation_service_deepseek.validate_provider_connection() is True + assert translation_service_azure_openai.validate_provider_connection() is True @patch('os.walk') @patch('polib.pofile') -def test_scan_and_process_po_files(mock_pofile, mock_walk, translation_service_openai): +def test_scan_and_process_po_files(mock_pofile, mock_walk, translation_service_openai: TranslationService): """Test scanning and processing PO files.""" # Setup mock directory structure mock_walk.return_value = [ diff --git a/python_gpt_po/utils/cli.py b/python_gpt_po/utils/cli.py index 16cff5b..0a1258a 100644 --- a/python_gpt_po/utils/cli.py +++ b/python_gpt_po/utils/cli.py @@ -129,12 +129,29 @@ def parse_args() -> Namespace: metavar="KEY", help="DeepSeek API key (can also use DEEPSEEK_API_KEY env var)" ) + api_group.add_argument( + "--azure-openai-key", + metavar="KEY", + help="Azure OpenAI API key (can also use AZURE_OPENAI_API_KEY env var)" + ) api_group.add_argument( "--api_key", metavar="KEY", help="Fallback API key for OpenAI (deprecated, use --openai-key instead)" ) + # Azure OpenAI options + advanced_group.add_argument( + "--azure-openai-endpoint", + metavar="ENDPOINT", + help="Azure OpenAI endpoint URL (can also use AZURE_OPENAI_ENDPOINT env var)" + ) + advanced_group.add_argument( + "--azure-openai-api-version", + metavar="VERSION", + help="Azure OpenAI API version (can also use AZURE_OPENAI_API_VERSION env var)" + ) + # Advanced options advanced_group.add_argument( "--bulk", @@ -250,7 +267,8 @@ def get_api_keys_from_args(args: Namespace) -> Dict[str, str]: return { ModelProvider.OPENAI.value: args.openai_key or args.api_key or os.getenv("OPENAI_API_KEY", ""), ModelProvider.ANTHROPIC.value: args.anthropic_key or os.getenv("ANTHROPIC_API_KEY", ""), - ModelProvider.DEEPSEEK.value: args.deepseek_key or os.getenv("DEEPSEEK_API_KEY", "") + ModelProvider.DEEPSEEK.value: args.deepseek_key or os.getenv("DEEPSEEK_API_KEY", ""), + ModelProvider.AZURE_OPENAI.value: args.azure_openai_key or os.getenv("AZURE_OPENAI_API_KEY", ""), }