fix: resolve issue #105 - Gemini extraction failures due to truncated responses

hydropix · claude · hydropix · commit 9c858acd0484 · 2026-02-13T22:46:17.000+01:00
Remove hardcoded maxOutputTokens: 2048 that was truncating Gemini responses,
cutting off the closing &lt;/TRANSLATION&gt; tag. Let Gemini manage its own output
limits like other cloud providers (Mistral, DeepSeek, OpenRouter).

Also adds finishReason: MAX_TOKENS detection and markdown code block stripping.

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/src/core/llm/providers/gemini.py b/src/core/llm/providers/gemini.py
@@ -138,8 +138,7 @@ async def generate(self, prompt: str, timeout: int = REQUEST_TIMEOUT,
                 }]
             }],
             "generationConfig": {
-                "temperature": 0.7,
-                "maxOutputTokens": 2048
+                "temperature": 0.7
             }
         }
 
@@ -165,11 +164,18 @@ async def generate(self, prompt: str, timeout: int = REQUEST_TIMEOUT,
                 response_json = response.json()
                 # Extract text from Gemini response structure
                 response_text = ""
+                was_truncated = False
                 if "candidates" in response_json and response_json["candidates"]:
-                    content = response_json["candidates"][0].get("content", {})
+                    candidate = response_json["candidates"][0]
+                    content = candidate.get("content", {})
                     parts = content.get("parts", [])
                     if parts:
                         response_text = parts[0].get("text", "")
+                    # Detect truncation via finishReason
+                    finish_reason = candidate.get("finishReason", "")
+                    if finish_reason == "MAX_TOKENS":
+                        was_truncated = True
+                        print(f"⚠️ Gemini response was truncated (finishReason: MAX_TOKENS)")
 
                 # Extract token usage if available
                 usage_metadata = response_json.get("usageMetadata", {})
@@ -182,7 +188,7 @@ async def generate(self, prompt: str, timeout: int = REQUEST_TIMEOUT,
                     completion_tokens=completion_tokens,
                     context_used=prompt_tokens + completion_tokens,
                     context_limit=0,  # Gemini manages context internally
-                    was_truncated=False
+                    was_truncated=was_truncated
                 )
 
             except httpx.TimeoutException as e:
diff --git a/src/core/llm/utils/extraction.py b/src/core/llm/utils/extraction.py
@@ -78,6 +78,9 @@ def extract(self, response: str) -> Optional[str]:
         # Remove all <think>...</think> blocks completely
         response = self._remove_think_blocks(response)
 
+        # Remove markdown code block wrappers (some providers like Gemini may wrap in ```)
+        response = self._remove_markdown_code_blocks(response)
+
         response = response.strip()
 
         if len(response) < original_length:
@@ -110,6 +113,25 @@ def extract(self, response: str) -> Optional[str]:
         # No tags found at all
         return None
 
+    def _remove_markdown_code_blocks(self, response: str) -> str:
+        """
+        Remove markdown code block wrappers from response.
+
+        Some providers (notably Gemini) may wrap responses in markdown code blocks
+        like ```xml\\n...\\n``` which prevents tag extraction.
+
+        Args:
+            response: Text potentially wrapped in markdown code blocks
+
+        Returns:
+            Text with markdown code block wrappers removed
+        """
+        # Match ```lang\n...\n``` wrapping the entire response
+        match = re.match(r'^```\w*\s*\n(.*?)\n```\s*$', response, re.DOTALL)
+        if match:
+            return match.group(1)
+        return response
+
     def _remove_think_blocks(self, response: str) -> str:
         """
         Remove all <think>...</think> blocks from response.