Skip to content

Commit 5f58700

Browse files
committed
Refactor GeminiProvider to enhance thinking model support. Introduce methods for determining minimum thinking budgets and output token requirements for specific models. Update logic in _apply_thinking_defaults to ensure safe defaults are applied, preventing empty outputs for Gemini models. Improve handling of thinking configuration to normalize budgets and enforce model-specific minimums.
1 parent e4e93fa commit 5f58700

File tree

1 file changed

+65
-22
lines changed

1 file changed

+65
-22
lines changed

spoon_ai/llm/providers/gemini_provider.py

Lines changed: 65 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -101,16 +101,43 @@ def _resolve_model_name(self, requested_model: Optional[str]) -> str:
101101
return self.model
102102

103103
@staticmethod
104-
def _requires_thinking_mode(model: str) -> bool:
105-
"""Return True if the model requires thinking_config to produce output.
104+
def _thinking_budget_min_for_model(model: str) -> Optional[int]:
105+
"""Return minimum thinking_budget for models that support/require thinking_config."""
106+
if not isinstance(model, str):
107+
return None
108+
normalized = model.strip().lower()
109+
if not normalized:
110+
return None
111+
112+
# Gemini 3 preview: must be in thinking mode; small budgets still work.
113+
if "gemini-3" in normalized:
114+
return 24
115+
116+
# Gemini 2.5 Pro: supports thinking_config but rejects small budgets.
117+
if "gemini-2.5-pro" in normalized:
118+
return 128
106119

107-
Some Gemini 3 preview models only work in thinking mode and may return empty
108-
content unless thinking_config is set with a sufficient budget.
120+
return None
121+
122+
@staticmethod
123+
def _min_output_tokens_for_model(model: str) -> int:
124+
"""Return a safe minimum max_output_tokens for models that can otherwise return empty output.
125+
126+
Empirically, some Gemini "pro" / thinking-oriented models may return empty
127+
visible content when max_output_tokens is too small (even for short answers).
109128
"""
110129
if not isinstance(model, str):
111-
return False
130+
return 0
112131
normalized = model.strip().lower()
113-
return "gemini-3" in normalized
132+
if not normalized:
133+
return 0
134+
135+
# Gemini 3 preview (thinking) and Gemini 2.5 Pro frequently need a higher
136+
# output budget to include visible text (otherwise finish_reason=MAX_TOKENS and text=None).
137+
if "gemini-3" in normalized or "gemini-2.5-pro" in normalized:
138+
return 256
139+
140+
return 0
114141

115142
def _apply_thinking_defaults(
116143
self,
@@ -119,24 +146,23 @@ def _apply_thinking_defaults(
119146
requested_max_tokens: int,
120147
kwargs: Dict[str, Any],
121148
) -> tuple[int, Optional[types.ThinkingConfig]]:
122-
"""Apply safe defaults for thinking models to avoid empty outputs."""
123-
if not self._requires_thinking_mode(model):
124-
return requested_max_tokens, None
125-
126-
# Empirically, Gemini 3 thinking models may produce empty visible output
127-
# when max_output_tokens is too small (even for short answers).
128-
# 256 is a practical minimum to avoid truncated outputs for structured JSON.
129-
min_output_tokens = 256
149+
"""Apply safe defaults for Gemini models to avoid empty outputs."""
130150
max_tokens = requested_max_tokens
131-
if max_tokens < min_output_tokens:
151+
152+
min_output_tokens = self._min_output_tokens_for_model(model)
153+
if min_output_tokens and max_tokens < min_output_tokens:
132154
logger.info(
133-
"Gemini thinking model '%s' requested max_tokens=%s; bumping to %s to avoid empty output",
155+
"Gemini model '%s' requested max_tokens=%s; bumping to %s to avoid empty output",
134156
model,
135157
requested_max_tokens,
136158
min_output_tokens,
137159
)
138160
max_tokens = min_output_tokens
139161

162+
min_thinking_budget = self._thinking_budget_min_for_model(model)
163+
if min_thinking_budget is None:
164+
return max_tokens, None
165+
140166
# Allow callers to pass an explicit ThinkingConfig or thinking_budget.
141167
thinking_cfg = kwargs.get("thinking_config")
142168
if isinstance(thinking_cfg, dict):
@@ -145,20 +171,37 @@ def _apply_thinking_defaults(
145171
except Exception:
146172
thinking_cfg = None
147173
if isinstance(thinking_cfg, types.ThinkingConfig):
148-
return max_tokens, thinking_cfg
174+
# Normalize invalid/empty budgets for known models.
175+
budget = getattr(thinking_cfg, "thinking_budget", None)
176+
if budget is None:
177+
budget_int = min_thinking_budget
178+
else:
179+
try:
180+
budget_int = int(budget)
181+
except Exception:
182+
budget_int = min_thinking_budget
183+
184+
if budget_int < min_thinking_budget:
185+
budget_int = min_thinking_budget
186+
187+
return max_tokens, types.ThinkingConfig(
188+
include_thoughts=getattr(thinking_cfg, "include_thoughts", None),
189+
thinking_level=getattr(thinking_cfg, "thinking_level", None),
190+
thinking_budget=budget_int,
191+
)
149192

150193
thinking_budget = kwargs.get("thinking_budget")
151194
if thinking_budget is None:
152195
# Default tuned to reliably yield visible text output.
153-
thinking_budget = 32
196+
thinking_budget = 32 if min_thinking_budget <= 32 else min_thinking_budget
154197
try:
155198
thinking_budget_int = int(thinking_budget)
156199
except Exception:
157-
thinking_budget_int = 32
200+
thinking_budget_int = 32 if min_thinking_budget <= 32 else min_thinking_budget
158201

159-
# For Gemini 3 preview: budget 0 is invalid; too-small budgets can still yield empty output.
160-
if thinking_budget_int < 24:
161-
thinking_budget_int = 24
202+
# Enforce model-specific minimums.
203+
if thinking_budget_int < min_thinking_budget:
204+
thinking_budget_int = min_thinking_budget
162205

163206
return max_tokens, types.ThinkingConfig(thinking_budget=thinking_budget_int)
164207

0 commit comments

Comments
 (0)