Skip to content

Commit db1cbbe

Browse files
authored
Merge pull request #220 from veithly/fix/gemini
Refactor GeminiProvider to enhance thinking model support.
2 parents 63e008e + 5f58700 commit db1cbbe

File tree

1 file changed

+65
-22
lines changed

1 file changed

+65
-22
lines changed

spoon_ai/llm/providers/gemini_provider.py

Lines changed: 65 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -101,16 +101,43 @@ def _resolve_model_name(self, requested_model: Optional[str]) -> str:
101101
return self.model
102102

103103
@staticmethod
104-
def _requires_thinking_mode(model: str) -> bool:
105-
"""Return True if the model requires thinking_config to produce output.
104+
def _thinking_budget_min_for_model(model: str) -> Optional[int]:
105+
"""Return minimum thinking_budget for models that support/require thinking_config."""
106+
if not isinstance(model, str):
107+
return None
108+
normalized = model.strip().lower()
109+
if not normalized:
110+
return None
111+
112+
# Gemini 3 preview: must be in thinking mode; small budgets still work.
113+
if "gemini-3" in normalized:
114+
return 24
115+
116+
# Gemini 2.5 Pro: supports thinking_config but rejects small budgets.
117+
if "gemini-2.5-pro" in normalized:
118+
return 128
106119

107-
Some Gemini 3 preview models only work in thinking mode and may return empty
108-
content unless thinking_config is set with a sufficient budget.
120+
return None
121+
122+
@staticmethod
123+
def _min_output_tokens_for_model(model: str) -> int:
124+
"""Return a safe minimum max_output_tokens for models that can otherwise return empty output.
125+
126+
Empirically, some Gemini "pro" / thinking-oriented models may return empty
127+
visible content when max_output_tokens is too small (even for short answers).
109128
"""
110129
if not isinstance(model, str):
111-
return False
130+
return 0
112131
normalized = model.strip().lower()
113-
return "gemini-3" in normalized
132+
if not normalized:
133+
return 0
134+
135+
# Gemini 3 preview (thinking) and Gemini 2.5 Pro frequently need a higher
136+
# output budget to include visible text (otherwise finish_reason=MAX_TOKENS and text=None).
137+
if "gemini-3" in normalized or "gemini-2.5-pro" in normalized:
138+
return 256
139+
140+
return 0
114141

115142
def _apply_thinking_defaults(
116143
self,
@@ -119,24 +146,23 @@ def _apply_thinking_defaults(
119146
requested_max_tokens: int,
120147
kwargs: Dict[str, Any],
121148
) -> tuple[int, Optional[types.ThinkingConfig]]:
122-
"""Apply safe defaults for thinking models to avoid empty outputs."""
123-
if not self._requires_thinking_mode(model):
124-
return requested_max_tokens, None
125-
126-
# Empirically, Gemini 3 thinking models may produce empty visible output
127-
# when max_output_tokens is too small (even for short answers).
128-
# 256 is a practical minimum to avoid truncated outputs for structured JSON.
129-
min_output_tokens = 256
149+
"""Apply safe defaults for Gemini models to avoid empty outputs."""
130150
max_tokens = requested_max_tokens
131-
if max_tokens < min_output_tokens:
151+
152+
min_output_tokens = self._min_output_tokens_for_model(model)
153+
if min_output_tokens and max_tokens < min_output_tokens:
132154
logger.info(
133-
"Gemini thinking model '%s' requested max_tokens=%s; bumping to %s to avoid empty output",
155+
"Gemini model '%s' requested max_tokens=%s; bumping to %s to avoid empty output",
134156
model,
135157
requested_max_tokens,
136158
min_output_tokens,
137159
)
138160
max_tokens = min_output_tokens
139161

162+
min_thinking_budget = self._thinking_budget_min_for_model(model)
163+
if min_thinking_budget is None:
164+
return max_tokens, None
165+
140166
# Allow callers to pass an explicit ThinkingConfig or thinking_budget.
141167
thinking_cfg = kwargs.get("thinking_config")
142168
if isinstance(thinking_cfg, dict):
@@ -145,20 +171,37 @@ def _apply_thinking_defaults(
145171
except Exception:
146172
thinking_cfg = None
147173
if isinstance(thinking_cfg, types.ThinkingConfig):
148-
return max_tokens, thinking_cfg
174+
# Normalize invalid/empty budgets for known models.
175+
budget = getattr(thinking_cfg, "thinking_budget", None)
176+
if budget is None:
177+
budget_int = min_thinking_budget
178+
else:
179+
try:
180+
budget_int = int(budget)
181+
except Exception:
182+
budget_int = min_thinking_budget
183+
184+
if budget_int < min_thinking_budget:
185+
budget_int = min_thinking_budget
186+
187+
return max_tokens, types.ThinkingConfig(
188+
include_thoughts=getattr(thinking_cfg, "include_thoughts", None),
189+
thinking_level=getattr(thinking_cfg, "thinking_level", None),
190+
thinking_budget=budget_int,
191+
)
149192

150193
thinking_budget = kwargs.get("thinking_budget")
151194
if thinking_budget is None:
152195
# Default tuned to reliably yield visible text output.
153-
thinking_budget = 32
196+
thinking_budget = 32 if min_thinking_budget <= 32 else min_thinking_budget
154197
try:
155198
thinking_budget_int = int(thinking_budget)
156199
except Exception:
157-
thinking_budget_int = 32
200+
thinking_budget_int = 32 if min_thinking_budget <= 32 else min_thinking_budget
158201

159-
# For Gemini 3 preview: budget 0 is invalid; too-small budgets can still yield empty output.
160-
if thinking_budget_int < 24:
161-
thinking_budget_int = 24
202+
# Enforce model-specific minimums.
203+
if thinking_budget_int < min_thinking_budget:
204+
thinking_budget_int = min_thinking_budget
162205

163206
return max_tokens, types.ThinkingConfig(thinking_budget=thinking_budget_int)
164207

0 commit comments

Comments
 (0)