fix: disable Gemini thinking mode for 6x faster translation + fuzzy tag extraction

hydropix · claude · hydropix · commit a821863876a5 · 2026-02-13T23:25:10.000+01:00
- Disable thinking for Gemini 2.5+ models (thinkingBudget: 0) reducing
  response time from ~10s to ~1.5s per chunk
- Add fuzzy closing tag matching to handle malformed tags like &lt;/TRANATION&gt;
- Fix CLI progress_callback parameter that doesn't exist in translate_file()

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/src/core/llm/providers/gemini.py b/src/core/llm/providers/gemini.py
@@ -57,6 +57,16 @@ def __init__(self, api_key: str, model: str = "gemini-2.0-flash"):
         self.api_key = api_key
         self.api_endpoint = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent"
 
+    def _is_thinking_model(self) -> bool:
+        """Check if the current model supports thinking mode (Gemini 2.5+)."""
+        return "2.5" in self.model
+
+    def _get_thinking_config(self) -> dict:
+        """Return thinkingConfig to disable thinking for supported models."""
+        if self._is_thinking_model():
+            return {"thinkingConfig": {"thinkingBudget": 0}}
+        return {}
+
     async def get_available_models(self) -> list[dict]:
         """
         Fetch available Gemini models from API, excluding experimental/vision models.
@@ -138,7 +148,8 @@ async def generate(self, prompt: str, timeout: int = REQUEST_TIMEOUT,
                 }]
             }],
             "generationConfig": {
-                "temperature": 0.7
+                "temperature": 0.7,
+                **self._get_thinking_config()
             }
         }
 
diff --git a/src/core/llm/utils/extraction.py b/src/core/llm/utils/extraction.py
@@ -110,6 +110,25 @@ def extract(self, response: str) -> Optional[str]:
 
             return extracted
 
+        # FUZZY FALLBACK: Opening tag found but closing tag is malformed
+        # Some models (e.g. Gemini without thinking) write </TRANATION> instead of </TRANSLATION>
+        if starts_correctly:
+            # Extract tag name from closing tag (e.g. "TRANSLATION" from "</TRANSLATION>")
+            closing_tag_match = re.match(r'</(\w+)>', self._tag_out)
+            if closing_tag_match:
+                tag_name = closing_tag_match.group(1)
+                # Look for any closing tag that starts with the same prefix (at least 3 chars)
+                prefix = tag_name[:3]
+                fuzzy_pattern = re.compile(
+                    rf'{re.escape(self._tag_in)}(.*?)</\w*{re.escape(prefix)}\w*>',
+                    re.DOTALL
+                )
+                fuzzy_match = fuzzy_pattern.search(response)
+                if fuzzy_match:
+                    extracted = fuzzy_match.group(1).strip()
+                    print(f"[WARN] Fuzzy tag match: closing tag was malformed, extracted content using prefix '</{prefix}...'")
+                    return extracted
+
         # No tags found at all
         return None
 
diff --git a/translate.py b/translate.py
@@ -135,7 +135,6 @@ def stats_callback(stats: dict):
             llm_provider=args.provider,
             checkpoint_manager=checkpoint_manager,
             translation_id=translation_id,
-            progress_callback=None,
             log_callback=log_callback,
             stats_callback=stats_callback,
             check_interruption_callback=None,