Skip to content

Commit 9c858ac

Browse files
hydropixclaude
andcommitted
fix: resolve issue #105 - Gemini extraction failures due to truncated responses
Remove hardcoded maxOutputTokens: 2048 that was truncating Gemini responses, cutting off the closing </TRANSLATION> tag. Let Gemini manage its own output limits like other cloud providers (Mistral, DeepSeek, OpenRouter). Also adds finishReason: MAX_TOKENS detection and markdown code block stripping. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent ae0961b commit 9c858ac

File tree

2 files changed

+32
-4
lines changed

2 files changed

+32
-4
lines changed

src/core/llm/providers/gemini.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -138,8 +138,7 @@ async def generate(self, prompt: str, timeout: int = REQUEST_TIMEOUT,
138138
}]
139139
}],
140140
"generationConfig": {
141-
"temperature": 0.7,
142-
"maxOutputTokens": 2048
141+
"temperature": 0.7
143142
}
144143
}
145144

@@ -165,11 +164,18 @@ async def generate(self, prompt: str, timeout: int = REQUEST_TIMEOUT,
165164
response_json = response.json()
166165
# Extract text from Gemini response structure
167166
response_text = ""
167+
was_truncated = False
168168
if "candidates" in response_json and response_json["candidates"]:
169-
content = response_json["candidates"][0].get("content", {})
169+
candidate = response_json["candidates"][0]
170+
content = candidate.get("content", {})
170171
parts = content.get("parts", [])
171172
if parts:
172173
response_text = parts[0].get("text", "")
174+
# Detect truncation via finishReason
175+
finish_reason = candidate.get("finishReason", "")
176+
if finish_reason == "MAX_TOKENS":
177+
was_truncated = True
178+
print(f"⚠️ Gemini response was truncated (finishReason: MAX_TOKENS)")
173179

174180
# Extract token usage if available
175181
usage_metadata = response_json.get("usageMetadata", {})
@@ -182,7 +188,7 @@ async def generate(self, prompt: str, timeout: int = REQUEST_TIMEOUT,
182188
completion_tokens=completion_tokens,
183189
context_used=prompt_tokens + completion_tokens,
184190
context_limit=0, # Gemini manages context internally
185-
was_truncated=False
191+
was_truncated=was_truncated
186192
)
187193

188194
except httpx.TimeoutException as e:

src/core/llm/utils/extraction.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,9 @@ def extract(self, response: str) -> Optional[str]:
7878
# Remove all <think>...</think> blocks completely
7979
response = self._remove_think_blocks(response)
8080

81+
# Remove markdown code block wrappers (some providers like Gemini may wrap in ```)
82+
response = self._remove_markdown_code_blocks(response)
83+
8184
response = response.strip()
8285

8386
if len(response) < original_length:
@@ -110,6 +113,25 @@ def extract(self, response: str) -> Optional[str]:
110113
# No tags found at all
111114
return None
112115

116+
def _remove_markdown_code_blocks(self, response: str) -> str:
117+
"""
118+
Remove markdown code block wrappers from response.
119+
120+
Some providers (notably Gemini) may wrap responses in markdown code blocks
121+
like ```xml\\n...\\n``` which prevents tag extraction.
122+
123+
Args:
124+
response: Text potentially wrapped in markdown code blocks
125+
126+
Returns:
127+
Text with markdown code block wrappers removed
128+
"""
129+
# Match ```lang\n...\n``` wrapping the entire response
130+
match = re.match(r'^```\w*\s*\n(.*?)\n```\s*$', response, re.DOTALL)
131+
if match:
132+
return match.group(1)
133+
return response
134+
113135
def _remove_think_blocks(self, response: str) -> str:
114136
"""
115137
Remove all <think>...</think> blocks from response.

0 commit comments

Comments
 (0)