@@ -1944,21 +1944,39 @@ def _close_tool_loop_for_thinking(
19441944 # =========================================================================
19451945
19461946 def _get_thinking_config (
1947- self , reasoning_effort : Optional [str ], model : str
1947+ self ,
1948+ reasoning_effort : Optional [str ],
1949+ model : str ,
1950+ explicit_budget : Optional [int ] = None ,
19481951 ) -> Optional [Dict [str , Any ]]:
19491952 """
19501953 Map reasoning_effort to thinking configuration.
19511954
19521955 - Gemini 2.5 & Claude: thinkingBudget (integer tokens)
19531956 - Gemini 3 Pro: thinkingLevel (string: "low"/"high")
19541957 - Gemini 3 Flash: thinkingLevel (string: "minimal"/"low"/"medium"/"high")
1958+
1959+ If explicit_budget is provided (from Anthropic route), it takes precedence
1960+ over reasoning_effort mapping. For Claude, explicit budget is capped at 31999.
19551961 """
19561962 internal = self ._alias_to_internal (model )
19571963 is_gemini_25 = "gemini-2.5" in model
19581964 is_gemini_3 = internal .startswith ("gemini-3-" )
19591965 is_gemini_3_flash = "gemini-3-flash" in model or "gemini-3-flash" in internal
19601966 is_claude = self ._is_claude (model )
19611967
1968+ if not (is_gemini_25 or is_gemini_3 or is_claude ):
1969+ return None
1970+
1971+ # Handle explicit budget from Anthropic route (takes precedence)
1972+ if explicit_budget is not None and (is_gemini_25 or is_claude ):
1973+ if explicit_budget <= 0 :
1974+ return {"thinkingBudget" : 0 , "include_thoughts" : False }
1975+ # Cap Claude budget at max allowed
1976+ if is_claude :
1977+ explicit_budget = min (explicit_budget , CLAUDE_FORCED_THINKING_BUDGET )
1978+ return {"thinkingBudget" : explicit_budget , "include_thoughts" : True }
1979+
19621980 if not (is_gemini_25 or is_gemini_3 or is_claude ):
19631981 return None
19641982
@@ -3756,7 +3774,10 @@ async def acompletion(
37563774 # Gemini 3 performs better with temperature=1 for tool use
37573775 gen_config ["temperature" ] = 1.0
37583776
3759- thinking_config = self ._get_thinking_config (reasoning_effort , model )
3777+ explicit_thinking_budget = kwargs .get ("thinking_budget" )
3778+ thinking_config = self ._get_thinking_config (
3779+ reasoning_effort , model , explicit_thinking_budget
3780+ )
37603781 if thinking_config :
37613782 gen_config .setdefault ("thinkingConfig" , {}).update (thinking_config )
37623783
0 commit comments