@@ -101,16 +101,43 @@ def _resolve_model_name(self, requested_model: Optional[str]) -> str:
101101 return self .model
102102
103103 @staticmethod
104- def _requires_thinking_mode (model : str ) -> bool :
105- """Return True if the model requires thinking_config to produce output.
104+ def _thinking_budget_min_for_model (model : str ) -> Optional [int ]:
105+ """Return minimum thinking_budget for models that support/require thinking_config."""
106+ if not isinstance (model , str ):
107+ return None
108+ normalized = model .strip ().lower ()
109+ if not normalized :
110+ return None
111+
112+ # Gemini 3 preview: must be in thinking mode; small budgets still work.
113+ if "gemini-3" in normalized :
114+ return 24
115+
116+ # Gemini 2.5 Pro: supports thinking_config but rejects small budgets.
117+ if "gemini-2.5-pro" in normalized :
118+ return 128
106119
107- Some Gemini 3 preview models only work in thinking mode and may return empty
108- content unless thinking_config is set with a sufficient budget.
120+ return None
121+
122+ @staticmethod
123+ def _min_output_tokens_for_model (model : str ) -> int :
124+ """Return a safe minimum max_output_tokens for models that can otherwise return empty output.
125+
126+ Empirically, some Gemini "pro" / thinking-oriented models may return empty
127+ visible content when max_output_tokens is too small (even for short answers).
109128 """
110129 if not isinstance (model , str ):
111- return False
130+ return 0
112131 normalized = model .strip ().lower ()
113- return "gemini-3" in normalized
132+ if not normalized :
133+ return 0
134+
135+ # Gemini 3 preview (thinking) and Gemini 2.5 Pro frequently need a higher
136+ # output budget to include visible text (otherwise finish_reason=MAX_TOKENS and text=None).
137+ if "gemini-3" in normalized or "gemini-2.5-pro" in normalized :
138+ return 256
139+
140+ return 0
114141
115142 def _apply_thinking_defaults (
116143 self ,
@@ -119,24 +146,23 @@ def _apply_thinking_defaults(
119146 requested_max_tokens : int ,
120147 kwargs : Dict [str , Any ],
121148 ) -> tuple [int , Optional [types .ThinkingConfig ]]:
122- """Apply safe defaults for thinking models to avoid empty outputs."""
123- if not self ._requires_thinking_mode (model ):
124- return requested_max_tokens , None
125-
126- # Empirically, Gemini 3 thinking models may produce empty visible output
127- # when max_output_tokens is too small (even for short answers).
128- # 256 is a practical minimum to avoid truncated outputs for structured JSON.
129- min_output_tokens = 256
149+ """Apply safe defaults for Gemini models to avoid empty outputs."""
130150 max_tokens = requested_max_tokens
131- if max_tokens < min_output_tokens :
151+
152+ min_output_tokens = self ._min_output_tokens_for_model (model )
153+ if min_output_tokens and max_tokens < min_output_tokens :
132154 logger .info (
133- "Gemini thinking model '%s' requested max_tokens=%s; bumping to %s to avoid empty output" ,
155+ "Gemini model '%s' requested max_tokens=%s; bumping to %s to avoid empty output" ,
134156 model ,
135157 requested_max_tokens ,
136158 min_output_tokens ,
137159 )
138160 max_tokens = min_output_tokens
139161
162+ min_thinking_budget = self ._thinking_budget_min_for_model (model )
163+ if min_thinking_budget is None :
164+ return max_tokens , None
165+
140166 # Allow callers to pass an explicit ThinkingConfig or thinking_budget.
141167 thinking_cfg = kwargs .get ("thinking_config" )
142168 if isinstance (thinking_cfg , dict ):
@@ -145,20 +171,37 @@ def _apply_thinking_defaults(
145171 except Exception :
146172 thinking_cfg = None
147173 if isinstance (thinking_cfg , types .ThinkingConfig ):
148- return max_tokens , thinking_cfg
174+ # Normalize invalid/empty budgets for known models.
175+ budget = getattr (thinking_cfg , "thinking_budget" , None )
176+ if budget is None :
177+ budget_int = min_thinking_budget
178+ else :
179+ try :
180+ budget_int = int (budget )
181+ except Exception :
182+ budget_int = min_thinking_budget
183+
184+ if budget_int < min_thinking_budget :
185+ budget_int = min_thinking_budget
186+
187+ return max_tokens , types .ThinkingConfig (
188+ include_thoughts = getattr (thinking_cfg , "include_thoughts" , None ),
189+ thinking_level = getattr (thinking_cfg , "thinking_level" , None ),
190+ thinking_budget = budget_int ,
191+ )
149192
150193 thinking_budget = kwargs .get ("thinking_budget" )
151194 if thinking_budget is None :
152195 # Default tuned to reliably yield visible text output.
153- thinking_budget = 32
196+ thinking_budget = 32 if min_thinking_budget <= 32 else min_thinking_budget
154197 try :
155198 thinking_budget_int = int (thinking_budget )
156199 except Exception :
157- thinking_budget_int = 32
200+ thinking_budget_int = 32 if min_thinking_budget <= 32 else min_thinking_budget
158201
159- # For Gemini 3 preview: budget 0 is invalid; too-small budgets can still yield empty output .
160- if thinking_budget_int < 24 :
161- thinking_budget_int = 24
202+ # Enforce model-specific minimums .
203+ if thinking_budget_int < min_thinking_budget :
204+ thinking_budget_int = min_thinking_budget
162205
163206 return max_tokens , types .ThinkingConfig (thinking_budget = thinking_budget_int )
164207
0 commit comments