Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@
![PyPI](https://img.shields.io/pypi/v/gpt-po-translator?label=gpt-po-translator)
![Downloads](https://pepy.tech/badge/gpt-po-translator)

A robust tool for translating gettext (.po) files using AI models from multiple providers (OpenAI, Anthropic / Claude, and DeepSeek). It supports both bulk and individual translations, handles fuzzy entries, and can infer target languages based on folder structures. Available as a Python package and Docker container with support for Python 3.8-3.12.
A robust tool for translating gettext (.po) files using AI models from multiple providers (OpenAI, Azure OpenAI, Anthropic / Claude, and DeepSeek). It supports both bulk and individual translations, handles fuzzy entries, and can infer target languages based on folder structures. Available as a Python package and Docker container with support for Python 3.8-3.12.

## What is GPT-PO Translator?

This tool helps you translate gettext (.po) files using AI models. It's perfect for developers who need to localize their applications quickly and accurately.

### Key Features

- **Multiple AI providers** - OpenAI, Anthropic/Claude, and DeepSeek
- **Multiple AI providers** - OpenAI, Azure OpenAI, Anthropic/Claude, and DeepSeek
- **Flexible translation modes** - Bulk or entry-by-entry processing
- **Smart language handling** - Auto-detects target languages from folder structure
- **Production-ready** - Includes retry logic, validation, and detailed logging
Expand All @@ -29,6 +29,8 @@ pip install gpt-po-translator

### Basic Usage

To translate the `po` files for the German and French languages found in the `locales` folder, using OpenAI:

```bash
# Set up your API key
export OPENAI_API_KEY='your_api_key_here'
Expand Down Expand Up @@ -77,6 +79,7 @@ export OPENAI_API_KEY='your_api_key_here'
# Or for other providers:
export ANTHROPIC_API_KEY='your_api_key_here'
export DEEPSEEK_API_KEY='your_api_key_here'
export AZURE_OPENAI_API_KEY='your_api_key_here'
```

### Option 2: Command Line
Expand Down Expand Up @@ -110,8 +113,14 @@ gpt-po-translator --provider anthropic --folder ./locales --lang de
# Use DeepSeek models
gpt-po-translator --provider deepseek --folder ./locales --lang de

# List available models
# List available models for openai
gpt-po-translator --provider openai --list-models

# List available models for azure openai
gpt-po-translator --provider azure_openai \
--azure-openai-endpoint https://<deployment>.cognitiveservices.azure.com/ \
--azure-openai-api-version <api_version> \
--list-models
```

## Command Reference
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ requires = ["setuptools>=61.0", "wheel", "setuptools_scm>=6.2"]
build-backend = "setuptools.build_meta"

[tool.setuptools]
# Use find with namespaces=True to handle nested packages
# Use find with namespaces=True to handle nested packages
packages = { find = { exclude = ["*.__pycache__", "*.__pycache__.*"], namespaces = true } }

[tool.setuptools_scm]
Expand Down
8 changes: 5 additions & 3 deletions python_gpt_po/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def initialize_provider(args: Namespace) -> tuple[ProviderClients, ModelProvider

# Initialize provider clients
provider_clients = ProviderClients()
provider_clients.initialize_clients(api_keys)
provider_clients.initialize_clients(args, api_keys)

# Get provider from arguments or auto-select
provider = get_provider_from_args(args)
Expand Down Expand Up @@ -117,7 +117,8 @@ def get_appropriate_model(

# Fall back to default model if no models could be retrieved
default_model = model_manager.get_default_model(provider)
logging.warning("No available models found from API; defaulting to %s", default_model)
logging.warning("No available models found from API; defaulting to %s",
default_model)
return default_model


Expand Down Expand Up @@ -145,7 +146,8 @@ def process_translations(config: TranslationConfig, folder: str,
sys.exit(1)

# Start processing files
logging.info("Starting translation with %s using model %s", config.provider.value, config.model)
logging.info("Starting translation with %s using model %s in folder %s",
config.provider.value, config.model, folder)
translation_service.scan_and_process_po_files(folder, languages, detail_languages)
logging.info("Translation completed successfully")

Expand Down
4 changes: 3 additions & 1 deletion python_gpt_po/models/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@ class ModelProvider(Enum):
OPENAI = "openai"
ANTHROPIC = "anthropic"
DEEPSEEK = "deepseek"
AZURE_OPENAI = "azure_openai"


ModelProviderList = [
ModelProvider.OPENAI.value,
ModelProvider.ANTHROPIC.value,
ModelProvider.DEEPSEEK.value
ModelProvider.DEEPSEEK.value,
ModelProvider.AZURE_OPENAI.value
]
22 changes: 20 additions & 2 deletions python_gpt_po/models/provider_clients.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
Client classes for different AI providers.
"""

import os
from argparse import Namespace
from typing import Dict

from anthropic import Anthropic
from openai import OpenAI
from openai import AzureOpenAI, OpenAI

from .enums import ModelProvider

Expand All @@ -15,11 +17,12 @@ class ProviderClients:

def __init__(self):
self.openai_client = None
self.azure_openai_client = None
self.anthropic_client = None
self.deepseek_api_key = None
self.deepseek_base_url = "https://api.deepseek.com/v1"

def initialize_clients(self, api_keys: Dict[str, str]):
def initialize_clients(self, args: Namespace, api_keys: Dict[str, str]):
"""Initialize API clients for all providers with available keys.

Args:
Expand All @@ -28,6 +31,21 @@ def initialize_clients(self, api_keys: Dict[str, str]):
if api_keys.get(ModelProvider.OPENAI.value):
self.openai_client = OpenAI(api_key=api_keys[ModelProvider.OPENAI.value])

if api_keys.get(ModelProvider.AZURE_OPENAI.value):
endpoint = args.azure_openai_endpoint or os.getenv("AZURE_OPENAI_ENDPOINT")
if not endpoint:
raise ValueError("Missing Azure OpenAI endpoint.")

api_version = args.azure_openai_api_version or os.getenv("AZURE_OPENAI_API_VERSION")
if not api_version:
raise ValueError("Missing Azure OpenAI API version.")

self.azure_openai_client = AzureOpenAI(
azure_endpoint=endpoint,
api_key=api_keys[ModelProvider.AZURE_OPENAI.value],
api_version=api_version
)

if api_keys.get(ModelProvider.ANTHROPIC.value):
self.anthropic_client = Anthropic(api_key=api_keys[ModelProvider.ANTHROPIC.value])

Expand Down
18 changes: 16 additions & 2 deletions python_gpt_po/services/model_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,24 @@ def get_available_models(provider_clients: ProviderClients, provider: ModelProvi
else:
logging.error("DeepSeek API key not set")

elif provider == ModelProvider.AZURE_OPENAI:
return ModelManager._get_azure_openai_models(provider_clients)

except Exception as e:
logging.error("Error fetching models from %s: %s", provider.value, str(e))

return models

@staticmethod
def _get_azure_openai_models(provider_clients: ProviderClients) -> List[str]:
"""Retrieve models from Azure OpenAI."""
if provider_clients.azure_openai_client:
response = provider_clients.azure_openai_client.models.list()
return [model.id for model in response.data]

logging.error("Azure OpenAI client not initialized")
return []

@staticmethod
def validate_model(provider_clients: ProviderClients, provider: ModelProvider, model: str) -> bool:
"""
Expand Down Expand Up @@ -109,9 +122,10 @@ def get_default_model(provider: ModelProvider) -> str:
default_models = {
ModelProvider.OPENAI: "gpt-4o-mini",
ModelProvider.ANTHROPIC: "claude-3-5-haiku-latest",
ModelProvider.DEEPSEEK: "deepseek-chat"
ModelProvider.DEEPSEEK: "deepseek-chat",
ModelProvider.AZURE_OPENAI: "gpt-35-turbo",
}
return default_models.get(provider)
return default_models.get(provider, "")

@staticmethod
def verify_model_capabilities(
Expand Down
15 changes: 15 additions & 0 deletions python_gpt_po/services/translation_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,19 @@ def _get_deepseek_response(self, content: str) -> str:
response.raise_for_status()
return response.json()["choices"][0]["message"]["content"].strip()

def _get_azure_openai_response(self, content: str) -> str:
"""Get response from OpenAI API."""
if not self.config.provider_clients.azure_openai_client:
raise ValueError("OpenAI client not initialized")

message = {"role": "user", "content": content}
completion = self.config.provider_clients.azure_openai_client.chat.completions.create(
model=self.config.model,
max_tokens=4000,
messages=[message]
)
return completion.choices[0].message.content.strip()

def validate_provider_connection(self) -> bool:
"""Validates the connection to the selected provider by making a test API call."""
provider = self.config.provider
Expand Down Expand Up @@ -231,6 +244,8 @@ def _get_provider_response(self, content: str) -> str:
return self._get_anthropic_response(content)
if provider == ModelProvider.DEEPSEEK:
return self._get_deepseek_response(content)
if provider == ModelProvider.AZURE_OPENAI:
return self._get_azure_openai_response(content)
return ""

def _process_bulk_response(self, response_text: str, original_texts: List[str]) -> List[str]:
Expand Down
Loading