@@ -1591,64 +1591,68 @@ def parse_model_string(model: str) -> ModelConfig:
15911591 dynamic_temperature = False ,
15921592 )
15931593
1594- # Low Reasoning Effort
1595- # Suitable for:
1596- # - Simple, straightforward questions
1597- # - Quick clarifications
1598- # - Well-defined tasks with clear steps
1599- LOW_EFFORT = {
1600- "min_thinking_tokens" : 256 , # ~100-200 words minimum
1601- "max_thinking_tokens" : 512 , # ~200-400 words maximum
1602- "max_thoughts" : 2 , # Allow only one alternative perspective
1603- "thought_switch_tokens" : [
1604- "However," , # Single alternative consideration
1605- "Wait," ,
1606- "Alternatively," ,
1607- ],
1608- "prefill" : "Let me think about this briefly..."
1609- }
1610-
1611- # Medium Reasoning Effort
1612- # Suitable for:
1613- # - Moderate complexity problems
1614- # - Analysis requiring multiple perspectives
1615- # - Tasks needing detailed explanation
1616- MEDIUM_EFFORT = {
1617- "min_thinking_tokens" : 512 , # ~200-400 words minimum
1618- "max_thinking_tokens" : 1024 , # ~400-800 words maximum
1619- "max_thoughts" : 4 , # Allow multiple perspective shifts
1620- "thought_switch_tokens" : [
1621- "Additionally," ,
1622- "Alternatively," ,
1623- "However," ,
1624- "Wait," ,
1625- ],
1626- "prefill" : "Let me analyze this from multiple angles..."
1627- }
1628-
1629- # High Reasoning Effort
1630- # Suitable for:
1631- # - Complex problem solving
1632- # - Deep analysis tasks
1633- # - Multi-step reasoning chains
1634- HIGH_EFFORT = {
1635- "min_thinking_tokens" : 1024 , # ~400-800 words minimum
1636- "max_thinking_tokens" : 2048 , # ~800-1600 words maximum
1637- "max_thoughts" : 6 , # Allow extensive exploration
1638- "thought_switch_tokens" : [
1639- "Additionally," ,
1640- "Alternatively," ,
1641- "However," ,
1642- "Wait," ,
1643- ],
1644- "prefill" : "This requires careful analysis. Let me think through it systematically..."
1645- }
1646-
1647- def get_effort_profile (effort_level : str ) -> dict :
1648- """Get reasoning effort profile based on specified level."""
1594+ def get_effort_profile (reasoning_effort : str , max_tokens : int = 4096 ) -> dict :
1595+ """Get reasoning effort profile based on specified level and max tokens.
1596+
1597+ Args:
1598+ reasoning_effort: 'low', 'medium', or 'high'
1599+ max_tokens: Maximum tokens allowed for generation, defaults to 4096
1600+
1601+ Returns:
1602+ dict: Configuration for the specified reasoning effort level
1603+ """
1604+ # Base profiles with percentages and thought counts
16491605 profiles = {
1650- "low" : LOW_EFFORT ,
1651- "medium" : MEDIUM_EFFORT ,
1652- "high" : HIGH_EFFORT
1606+ "low" : {
1607+ "min_tokens_pct" : 0.25 , # 25% of max_tokens
1608+ "max_tokens_pct" : 0.33 , # 33% of max_tokens
1609+ "max_thoughts" : 4 ,
1610+ "thought_switch_tokens" : [
1611+ "However," ,
1612+ "Additionally,"
1613+ ],
1614+ "prefill" : "Let me think about this briefly..."
1615+ },
1616+ "medium" : {
1617+ "min_tokens_pct" : 0.33 , # 33% of max_tokens
1618+ "max_tokens_pct" : 0.66 , # 66% of max_tokens
1619+ "max_thoughts" : 16 ,
1620+ "thought_switch_tokens" : [
1621+ "Additionally," ,
1622+ "Alternatively," ,
1623+ "However," ,
1624+ "Wait,"
1625+ ],
1626+ "prefill" : "Let me analyze this from multiple angles..."
1627+ },
1628+ "high" : {
1629+ "min_tokens_pct" : 0.66 , # 66% of max_tokens
1630+ "max_tokens_pct" : 0.90 , # 90% of max_tokens
1631+ "max_thoughts" : 32 ,
1632+ "thought_switch_tokens" : [
1633+ "Additionally," ,
1634+ "Alternatively," ,
1635+ "However," ,
1636+ "Wait,"
1637+ ],
1638+ "prefill" : "This requires careful analysis. Let me think through it systematically..."
1639+ }
1640+ }
1641+
1642+ # Get base profile or default to medium
1643+ profile = profiles .get (reasoning_effort .lower (), profiles ["low" ])
1644+
1645+ # Calculate actual token limits based on max_tokens
1646+ min_thinking_tokens = int (max_tokens * profile ["min_tokens_pct" ])
1647+ max_thinking_tokens = int (max_tokens * profile ["max_tokens_pct" ])
1648+
1649+ # Create final config
1650+ config = {
1651+ "min_thinking_tokens" : min_thinking_tokens ,
1652+ "max_thinking_tokens" : max_thinking_tokens ,
1653+ "max_thoughts" : profile ["max_thoughts" ],
1654+ "thought_switch_tokens" : profile ["thought_switch_tokens" ],
1655+ "prefill" : profile ["prefill" ]
16531656 }
1654- return profiles .get (effort_level , LOW_EFFORT )
1657+
1658+ return config
0 commit comments