.

hydropix · hydropix · commit 750ed095d48d · 2025-12-16T23:35:05.000+01:00
diff --git a/src/core/llm_client.py b/src/core/llm_client.py
@@ -4,7 +4,10 @@
 from typing import Optional, Dict, Any
 
 from src.config import API_ENDPOINT, DEFAULT_MODEL
-from src.core.llm_providers import create_llm_provider, LLMProvider
+from src.core.llm_providers import create_llm_provider, LLMProvider, ContextOverflowError
+
+# Re-export for convenience
+__all__ = ['LLMClient', 'default_client', 'create_llm_client', 'ContextOverflowError']
 
 
 class LLMClient:
diff --git a/src/core/llm_providers.py b/src/core/llm_providers.py
@@ -338,9 +338,22 @@ async def generate(self, prompt: str, timeout: int = REQUEST_TIMEOUT,
                         continue
                     return None
             except httpx.HTTPStatusError as e:
-                    print(f"OpenAI API HTTP Error (attempt {attempt + 1}/{MAX_TRANSLATION_ATTEMPTS}): {e}")
+                    error_message = str(e)
+                    error_body = ""
                     if hasattr(e, 'response') and hasattr(e.response, 'text'):
-                        print(f"Response details: Status {e.response.status_code}, Body: {e.response.text[:500]}...")
+                        error_body = e.response.text[:500]
+                        error_message = f"{e} - {error_body}"
+
+                    print(f"OpenAI API HTTP Error (attempt {attempt + 1}/{MAX_TRANSLATION_ATTEMPTS}): {e}")
+                    if error_body:
+                        print(f"Response details: Status {e.response.status_code}, Body: {error_body}...")
+
+                    # Detect context overflow errors (OpenAI uses "context_length_exceeded" or similar)
+                    context_overflow_keywords = ["context_length", "maximum context", "token limit",
+                                                  "too many tokens", "reduce the length", "max_tokens"]
+                    if any(keyword in error_message.lower() for keyword in context_overflow_keywords):
+                        raise ContextOverflowError(f"OpenAI context overflow: {error_message}")
+
                     if attempt < MAX_TRANSLATION_ATTEMPTS - 1:
                         await asyncio.sleep(RETRY_DELAY_SECONDS)
                         continue
@@ -606,8 +619,10 @@ async def generate(self, prompt: str, timeout: int = REQUEST_TIMEOUT,
                 return None
             except httpx.HTTPStatusError as e:
                 error_body = ""
+                error_message = str(e)
                 if hasattr(e, 'response') and hasattr(e.response, 'text'):
                     error_body = e.response.text[:500]
+                    error_message = f"{e} - {error_body}"
 
                 # Parse OpenRouter specific error messages
                 if e.response.status_code == 404:
@@ -622,6 +637,13 @@ async def generate(self, prompt: str, timeout: int = REQUEST_TIMEOUT,
                     print(f"OpenRouter API HTTP Error (attempt {attempt + 1}/{MAX_TRANSLATION_ATTEMPTS}): {e}")
                     print(f"Response details: Status {e.response.status_code}, Body: {error_body}...")
 
+                # Detect context overflow errors
+                context_overflow_keywords = ["context_length", "maximum context", "token limit",
+                                              "too many tokens", "reduce the length", "max_tokens",
+                                              "context window", "exceeds"]
+                if any(keyword in error_message.lower() for keyword in context_overflow_keywords):
+                    raise ContextOverflowError(f"OpenRouter context overflow: {error_message}")
+
                 if attempt < MAX_TRANSLATION_ATTEMPTS - 1:
                     await asyncio.sleep(RETRY_DELAY_SECONDS)
                     continue
@@ -771,9 +793,22 @@ async def generate(self, prompt: str, timeout: int = REQUEST_TIMEOUT,
                         continue
                     return None
             except httpx.HTTPStatusError as e:
-                    print(f"Gemini API HTTP Error (attempt {attempt + 1}/{MAX_TRANSLATION_ATTEMPTS}): {e}")
+                    error_message = str(e)
+                    error_body = ""
                     if hasattr(e, 'response') and hasattr(e.response, 'text'):
-                        print(f"Response details: Status {e.response.status_code}, Body: {e.response.text[:200]}...")
+                        error_body = e.response.text[:500]
+                        error_message = f"{e} - {error_body}"
+
+                    print(f"Gemini API HTTP Error (attempt {attempt + 1}/{MAX_TRANSLATION_ATTEMPTS}): {e}")
+                    if error_body:
+                        print(f"Response details: Status {e.response.status_code}, Body: {error_body[:200]}...")
+
+                    # Detect context overflow errors (Gemini uses "RESOURCE_EXHAUSTED" or token limits)
+                    context_overflow_keywords = ["resource_exhausted", "token limit", "input too long",
+                                                  "maximum input", "context length", "too many tokens"]
+                    if any(keyword in error_message.lower() for keyword in context_overflow_keywords):
+                        raise ContextOverflowError(f"Gemini context overflow: {error_message}")
+
                     if attempt < MAX_TRANSLATION_ATTEMPTS - 1:
                         await asyncio.sleep(RETRY_DELAY_SECONDS)
                         continue
diff --git a/src/core/subtitle_translator.py b/src/core/subtitle_translator.py
@@ -97,7 +97,8 @@ async def translate_subtitles(subtitles: List[Dict[str, str]], source_language:
             )
             
             if translated_text is not None:
-                translations[idx] = translated_text
+                # Single point of cleaning for subtitles
+                translations[idx] = clean_translated_text(translated_text)
                 completed_count += 1
             else:
                 # Keep original text if translation fails
diff --git a/src/core/translator.py b/src/core/translator.py

Original file line number	Diff line number	Diff line change
`@@ -97,7 +97,8 @@ async def translate_subtitles(subtitles: List[Dict[str, str]], source_language:`
`97`	`97`	`)`
`98`	`98`
`99`	`99`	`if translated_text is not None:`
`100`		`- translations[idx] = translated_text`
	`100`	`+ # Single point of cleaning for subtitles`
	`101`	`+ translations[idx] = clean_translated_text(translated_text)`
`101`	`102`	`completed_count += 1`
`102`	`103`	`else:`
`103`	`104`	`# Keep original text if translation fails`