Skip to content

Commit e816622

Browse files
author
bram
committed
Added context with msgctxt
1 parent a9a6d6c commit e816622

File tree

4 files changed

+315
-29
lines changed

4 files changed

+315
-29
lines changed

python_gpt_po/services/translation_service.py

Lines changed: 106 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ class TranslationRequest:
6363
target_language: str
6464
po_file_path: str
6565
detail_language: Optional[str] = None
66+
contexts: Optional[List[Optional[str]]] = None # msgctxt for each entry
6667

6768

6869
@dataclass
@@ -128,11 +129,11 @@ def validate_provider_connection(self) -> bool:
128129

129130
def _translate_chunk(self, chunk_data):
130131
"""Translate a single chunk of texts."""
131-
chunk, target_language, detail_language, chunk_num, total_chunks = chunk_data
132+
chunk, target_language, detail_language, chunk_num, total_chunks, context = chunk_data
132133
logging.info("Batch %d/%d: Translating %d entries...", chunk_num, total_chunks, len(chunk))
133134
try:
134135
translations = self.perform_translation(
135-
chunk, target_language, is_bulk=True, detail_language=detail_language
136+
chunk, target_language, is_bulk=True, detail_language=detail_language, context=context
136137
)
137138
logging.info("Batch %d/%d: Successfully translated %d entries",
138139
chunk_num, total_chunks, len(translations))
@@ -145,7 +146,7 @@ def _translate_chunk(self, chunk_data):
145146
try:
146147
logging.info(" Translating entry %d/%d...", j, len(chunk))
147148
translation = self.perform_translation(
148-
text, target_language, is_bulk=False, detail_language=detail_language
149+
text, target_language, is_bulk=False, detail_language=detail_language, context=context
149150
)
150151
results.append(translation)
151152
except Exception as inner_e:
@@ -158,15 +159,36 @@ def translate_bulk(
158159
texts: List[str],
159160
target_language: str,
160161
po_file_path: str,
161-
detail_language: Optional[str] = None) -> List[str]:
162-
"""Translates a list of texts in bulk, processing in smaller chunks."""
162+
detail_language: Optional[str] = None,
163+
contexts: Optional[List[Optional[str]]] = None) -> List[str]:
164+
"""Translates a list of texts in bulk, processing in smaller chunks.
165+
166+
Args:
167+
texts: List of texts to translate
168+
target_language: Target language code
169+
po_file_path: Path to PO file
170+
detail_language: Detailed language name (optional)
171+
contexts: List of msgctxt values for each text (optional)
172+
"""
163173
translated_texts = []
164174
total_chunks = (len(texts) - 1) // self.batch_size + 1
165175

166176
for i in range(0, len(texts), self.batch_size):
167177
chunk_num = i // self.batch_size + 1
178+
chunk_texts = texts[i:i + self.batch_size]
179+
180+
# Get most common context in this chunk (if contexts provided)
181+
chunk_context = None
182+
if contexts:
183+
chunk_contexts = contexts[i:i + self.batch_size]
184+
# Use most common non-None context, or None if all are None
185+
non_none_contexts = [c for c in chunk_contexts if c]
186+
if non_none_contexts:
187+
from collections import Counter
188+
chunk_context = Counter(non_none_contexts).most_common(1)[0][0]
189+
168190
chunk_data = (
169-
texts[i:i + self.batch_size], target_language, detail_language, chunk_num, total_chunks
191+
chunk_texts, target_language, detail_language, chunk_num, total_chunks, chunk_context
170192
)
171193
translations = self._translate_chunk(chunk_data)
172194
translated_texts.extend(translations)
@@ -183,11 +205,19 @@ def translate_bulk(
183205

184206
return translated_texts
185207

186-
def translate_single(self, text: str, target_language: str, detail_language: Optional[str] = None) -> str:
187-
"""Translates a single text."""
208+
def translate_single(self, text: str, target_language: str, detail_language: Optional[str] = None,
209+
context: Optional[str] = None) -> str:
210+
"""Translates a single text.
211+
212+
Args:
213+
text: Text to translate
214+
target_language: Target language code
215+
detail_language: Detailed language name (optional)
216+
context: Message context from msgctxt (optional, e.g., "button", "menu item")
217+
"""
188218
try:
189219
translation = self.perform_translation(
190-
text, target_language, is_bulk=False, detail_language=detail_language
220+
text, target_language, is_bulk=False, detail_language=detail_language, context=context
191221
)
192222
if not translation.strip():
193223
display_text = text[:50] if len(text) > 50 else text
@@ -223,25 +253,44 @@ def perform_translation_without_validation(
223253
), target_language)
224254

225255
@staticmethod
226-
def get_translation_prompt(target_language: str, is_bulk: bool, detail_language: Optional[str] = None) -> str:
227-
"""Returns the appropriate translation prompt based on the translation mode."""
256+
def get_translation_prompt(target_language: str, is_bulk: bool, detail_language: Optional[str] = None,
257+
context: Optional[str] = None) -> str:
258+
"""Returns the appropriate translation prompt based on the translation mode.
259+
260+
Args:
261+
target_language: Target language code
262+
is_bulk: Whether translating in bulk mode
263+
detail_language: Detailed language name (optional)
264+
context: Message context from msgctxt (optional, e.g., "button", "menu item")
265+
"""
228266
# Use detailed language if provided, otherwise use the short target language code
229267
target_lang_text = detail_language if detail_language else target_language
230268

269+
# Build context prefix if provided (goes at the very beginning)
270+
context_prefix = ""
271+
if context:
272+
context_prefix = (
273+
f"CONTEXT: {context}\n"
274+
f"IMPORTANT: Choose the translation that matches this specific context and usage. "
275+
f"Do not use a literal dictionary translation if the context requires a different word form or meaning.\n\n"
276+
)
277+
231278
if is_bulk:
232279
return (
280+
f"{context_prefix}"
233281
f"Translate the following list of texts from English to {target_lang_text}. "
234282
"Provide only the translations in a JSON array format, maintaining the original order. "
235-
"Each translation should be concise and direct, without explanations or additional context. "
283+
"Each translation should be concise and direct, without explanations. "
236284
"Keep special characters, placeholders, and formatting intact. "
237285
"Do NOT add or remove any leading/trailing whitespace - translate only the text content. "
238286
"If a term should not be translated (like 'URL' or technical terms), keep it as is. "
239287
"Example format: [\"Translation 1\", \"Translation 2\", ...]\n\n"
240288
"Texts to translate:\n"
241289
)
242290
return (
291+
f"{context_prefix}"
243292
f"Translate the following text from English to {target_lang_text}. "
244-
"Return only the direct, word-for-word translation without any explanation or additional context. "
293+
"Return only the direct translation without any explanation. "
245294
"Keep special characters, placeholders, and formatting intact. "
246295
"If a term should not be translated (like 'URL' or technical terms), keep it as is. "
247296
"Here is the text to translate:\n"
@@ -253,10 +302,13 @@ def perform_translation(
253302
texts: Any,
254303
target_language: str,
255304
is_bulk: bool = False,
256-
detail_language: Optional[str] = None) -> Any:
305+
detail_language: Optional[str] = None,
306+
context: Optional[str] = None) -> Any:
257307
"""Performs the actual translation using the selected provider's API."""
258308
logging.debug("Translating to '%s' via %s API", target_language, self.config.provider.value)
259-
prompt = self.get_translation_prompt(target_language, is_bulk, detail_language)
309+
if context:
310+
logging.debug("Using context: %s", context)
311+
prompt = self.get_translation_prompt(target_language, is_bulk, detail_language, context)
260312

261313
# For bulk mode, strip whitespace before sending to AI
262314
if is_bulk:
@@ -802,8 +854,14 @@ def _prepare_translation_request(self, po_file, po_file_path, file_lang, detail_
802854
"""Prepare a translation request from PO file data."""
803855
entries = [entry for entry in po_file if is_entry_untranslated(entry)]
804856
texts = [entry.msgid for entry in entries]
857+
contexts = [entry.msgctxt if hasattr(entry, 'msgctxt') else None for entry in entries]
805858
detail_lang = detail_languages.get(file_lang) if detail_languages else None
806859

860+
# Log context usage
861+
context_count = sum(1 for c in contexts if c)
862+
if context_count > 0:
863+
logging.debug("Found %d entries with msgctxt in %s", context_count, po_file_path)
864+
807865
# Check for and warn about whitespace in msgid
808866
whitespace_entries = [
809867
text for text in texts
@@ -826,7 +884,8 @@ def _prepare_translation_request(self, po_file, po_file_path, file_lang, detail_
826884
texts=texts,
827885
target_language=file_lang,
828886
po_file_path=po_file_path,
829-
detail_language=detail_lang
887+
detail_language=detail_lang,
888+
contexts=contexts
830889
)
831890

832891
def process_po_file(
@@ -887,14 +946,22 @@ def process_po_file(
887946

888947
def _process_batch(self, batch_info, po_file, po_file_path, detail_language=None):
889948
"""Process a single batch of translations."""
890-
batch_texts, batch_entries, current_batch, total_batches, target_language = batch_info
949+
batch_texts, batch_entries, current_batch, total_batches, target_language, batch_contexts = batch_info
891950
translated_count = 0
892951

952+
# Determine most common context for this batch
953+
batch_context = None
954+
if batch_contexts:
955+
non_none_contexts = [c for c in batch_contexts if c]
956+
if non_none_contexts:
957+
from collections import Counter
958+
batch_context = Counter(non_none_contexts).most_common(1)[0][0]
959+
893960
logging.info("[BULK %d/%d] Translating %d entries...", current_batch, total_batches, len(batch_texts))
894961

895962
# Get translations for this batch
896963
translations = self.perform_translation(
897-
batch_texts, target_language, is_bulk=True, detail_language=detail_language
964+
batch_texts, target_language, is_bulk=True, detail_language=detail_language, context=batch_context
898965
)
899966

900967
# Update entries with translations
@@ -918,12 +985,14 @@ def _process_with_incremental_save_bulk(self, request: TranslationRequest):
918985
# Process in batches
919986
for i in range(0, total_entries, self.batch_size):
920987
batch_num = i // self.batch_size + 1
988+
batch_contexts = request.contexts[i:i + self.batch_size] if request.contexts else None
921989
batch_info = (
922990
request.texts[i:i + self.batch_size],
923991
request.entries[i:i + self.batch_size],
924992
batch_num,
925993
total_batches,
926-
request.target_language
994+
request.target_language,
995+
batch_contexts
927996
)
928997

929998
try:
@@ -949,9 +1018,10 @@ def _process_with_incremental_save_single(self, request: TranslationRequest):
9491018

9501019
for i, (text, entry) in enumerate(zip(request.texts, request.entries), 1):
9511020
try:
1021+
context = request.contexts[i - 1] if request.contexts else None
9521022
logging.info("[SINGLE %d/%d] Translating entry...", i, total_entries)
9531023

954-
translation = self.translate_single(text, request.target_language, request.detail_language)
1024+
translation = self.translate_single(text, request.target_language, request.detail_language, context)
9551025

9561026
if translation.strip():
9571027
entry.msgstr = translation
@@ -997,19 +1067,28 @@ def get_translations(
9971067
texts: List[str],
9981068
target_language: str,
9991069
po_file_path: str,
1000-
detail_language: Optional[str] = None) -> List[str]:
1070+
detail_language: Optional[str] = None,
1071+
contexts: Optional[List[Optional[str]]] = None) -> List[str]:
10011072
"""
10021073
Retrieves translations for the given texts using either bulk or individual translation.
1074+
1075+
Args:
1076+
texts: List of texts to translate
1077+
target_language: Target language code
1078+
po_file_path: Path to PO file
1079+
detail_language: Detailed language name (optional)
1080+
contexts: List of msgctxt values for each text (optional)
10031081
"""
10041082
if self.config.flags.bulk_mode:
1005-
return self.translate_bulk(texts, target_language, po_file_path, detail_language)
1083+
return self.translate_bulk(texts, target_language, po_file_path, detail_language, contexts)
10061084

10071085
# Single mode with progress tracking
10081086
translations = []
10091087
total = len(texts)
10101088
for i, text in enumerate(texts, 1):
1089+
context = contexts[i - 1] if contexts else None
10111090
logging.info("[SINGLE %d/%d] Translating entry...", i, total)
1012-
translation = self.translate_single(text, target_language, detail_language)
1091+
translation = self.translate_single(text, target_language, detail_language, context)
10131092
translations.append(translation)
10141093
if i % 10 == 0 or i == total: # Progress update every 10 items or at the end
10151094
logging.info("Progress: %d/%d entries completed (%.1f%%)", i, total, 100.0 * i / total)
@@ -1073,7 +1152,10 @@ def fix_fuzzy_entries(
10731152
logging.info("Found %d fuzzy entries to fix in %s", len(fuzzy_entries), po_file_path)
10741153

10751154
texts_to_translate = [entry.msgid for entry in fuzzy_entries]
1076-
translations = self.get_translations(texts_to_translate, target_language, po_file_path, detail_language)
1155+
fuzzy_contexts = [entry.msgctxt if hasattr(entry, 'msgctxt') else None for entry in fuzzy_entries]
1156+
translations = self.get_translations(
1157+
texts_to_translate, target_language, po_file_path, detail_language, fuzzy_contexts
1158+
)
10771159

10781160
self._update_fuzzy_po_entries(po_file, translations, entries_to_update=fuzzy_entries)
10791161

python_gpt_po/tests/integration/test_real_po_files.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,7 @@ def test_detail_language_usage(translation_service_openai):
402402
# Create a function that will track the calls to perform_translation
403403
detail_language_was_passed = [False] # Use a list to make it mutable in the nested function
404404

405-
def mock_perform_translation(texts, target_language, is_bulk=False, detail_language=None):
405+
def mock_perform_translation(texts, target_language, is_bulk=False, detail_language=None, context=None):
406406
if detail_language == "French":
407407
detail_language_was_passed[0] = True
408408
return ["Bonjour", "Merci"]

python_gpt_po/tests/test_incremental_save.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ def test_single_mode_keyboard_interrupt(self, mock_translate):
168168
# Mock translations, but raise KeyboardInterrupt after 7 translations
169169
translations_done = []
170170

171-
def translate_with_interrupt(text, lang, detail=None):
171+
def translate_with_interrupt(text, lang, detail_language=None, context=None):
172172
if len(translations_done) >= 7:
173173
raise KeyboardInterrupt("User pressed Ctrl+C")
174174
result = f"Translation {len(translations_done) + 1}"
@@ -213,7 +213,7 @@ def test_bulk_mode_keyboard_interrupt(self, mock_translate):
213213
po_path, po_file = self.create_test_po_file(30)
214214

215215
# Mock translations - interrupt during batch 2 (after batch 1 completes)
216-
def translate_with_interrupt(texts, lang, is_bulk=False, detail_language=None):
216+
def translate_with_interrupt(texts, lang, is_bulk=False, detail_language=None, context=None):
217217
if mock_translate.call_count == 1:
218218
# First batch completes successfully
219219
return [f"Translation {i+1}" for i in range(1, 11)]
@@ -283,7 +283,7 @@ def test_continue_on_error_single_mode(self, mock_translate):
283283
po_path, po_file = self.create_test_po_file(10)
284284

285285
# Mock translations with one failure
286-
def translate_with_error(text, lang, detail=None):
286+
def translate_with_error(text, lang, detail_language=None, context=None):
287287
if "string 5" in text:
288288
raise Exception("API error for string 5")
289289
return f"Translation for {text}"
@@ -325,7 +325,7 @@ def test_continue_on_error_bulk_mode(self, mock_translate):
325325
po_path, po_file = self.create_test_po_file(15)
326326

327327
# Mock translations with batch 2 failing
328-
def translate_with_error(texts, lang, is_bulk=False, detail_language=None):
328+
def translate_with_error(texts, lang, is_bulk=False, detail_language=None, context=None):
329329
if mock_translate.call_count == 2:
330330
raise Exception("API error for batch 2")
331331
return [f"Translation {i+1}" for i in range(len(texts))]

0 commit comments

Comments
 (0)