Skip to content

Commit 1bd01f4

Browse files
committed
Update inference.py
1 parent aad3062 commit 1bd01f4

File tree

1 file changed

+63
-59
lines changed

1 file changed

+63
-59
lines changed

optillm/inference.py

Lines changed: 63 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1591,64 +1591,68 @@ def parse_model_string(model: str) -> ModelConfig:
15911591
dynamic_temperature=False,
15921592
)
15931593

1594-
# Low Reasoning Effort
1595-
# Suitable for:
1596-
# - Simple, straightforward questions
1597-
# - Quick clarifications
1598-
# - Well-defined tasks with clear steps
1599-
LOW_EFFORT = {
1600-
"min_thinking_tokens": 256, # ~100-200 words minimum
1601-
"max_thinking_tokens": 512, # ~200-400 words maximum
1602-
"max_thoughts": 2, # Allow only one alternative perspective
1603-
"thought_switch_tokens": [
1604-
"However,", # Single alternative consideration
1605-
"Wait,",
1606-
"Alternatively,",
1607-
],
1608-
"prefill": "Let me think about this briefly..."
1609-
}
1610-
1611-
# Medium Reasoning Effort
1612-
# Suitable for:
1613-
# - Moderate complexity problems
1614-
# - Analysis requiring multiple perspectives
1615-
# - Tasks needing detailed explanation
1616-
MEDIUM_EFFORT = {
1617-
"min_thinking_tokens": 512, # ~200-400 words minimum
1618-
"max_thinking_tokens": 1024, # ~400-800 words maximum
1619-
"max_thoughts": 4, # Allow multiple perspective shifts
1620-
"thought_switch_tokens": [
1621-
"Additionally,",
1622-
"Alternatively,",
1623-
"However,",
1624-
"Wait,",
1625-
],
1626-
"prefill": "Let me analyze this from multiple angles..."
1627-
}
1628-
1629-
# High Reasoning Effort
1630-
# Suitable for:
1631-
# - Complex problem solving
1632-
# - Deep analysis tasks
1633-
# - Multi-step reasoning chains
1634-
HIGH_EFFORT = {
1635-
"min_thinking_tokens": 1024, # ~400-800 words minimum
1636-
"max_thinking_tokens": 2048, # ~800-1600 words maximum
1637-
"max_thoughts": 6, # Allow extensive exploration
1638-
"thought_switch_tokens": [
1639-
"Additionally,",
1640-
"Alternatively,",
1641-
"However,",
1642-
"Wait,",
1643-
],
1644-
"prefill": "This requires careful analysis. Let me think through it systematically..."
1645-
}
1646-
1647-
def get_effort_profile(effort_level: str) -> dict:
1648-
"""Get reasoning effort profile based on specified level."""
1594+
def get_effort_profile(reasoning_effort: str, max_tokens: int = 4096) -> dict:
1595+
"""Get reasoning effort profile based on specified level and max tokens.
1596+
1597+
Args:
1598+
reasoning_effort: 'low', 'medium', or 'high'
1599+
max_tokens: Maximum tokens allowed for generation, defaults to 4096
1600+
1601+
Returns:
1602+
dict: Configuration for the specified reasoning effort level
1603+
"""
1604+
# Base profiles with percentages and thought counts
16491605
profiles = {
1650-
"low": LOW_EFFORT,
1651-
"medium": MEDIUM_EFFORT,
1652-
"high": HIGH_EFFORT
1606+
"low": {
1607+
"min_tokens_pct": 0.25, # 25% of max_tokens
1608+
"max_tokens_pct": 0.33, # 33% of max_tokens
1609+
"max_thoughts": 4,
1610+
"thought_switch_tokens": [
1611+
"However,",
1612+
"Additionally,"
1613+
],
1614+
"prefill": "Let me think about this briefly..."
1615+
},
1616+
"medium": {
1617+
"min_tokens_pct": 0.33, # 33% of max_tokens
1618+
"max_tokens_pct": 0.66, # 66% of max_tokens
1619+
"max_thoughts": 16,
1620+
"thought_switch_tokens": [
1621+
"Additionally,",
1622+
"Alternatively,",
1623+
"However,",
1624+
"Wait,"
1625+
],
1626+
"prefill": "Let me analyze this from multiple angles..."
1627+
},
1628+
"high": {
1629+
"min_tokens_pct": 0.66, # 66% of max_tokens
1630+
"max_tokens_pct": 0.90, # 90% of max_tokens
1631+
"max_thoughts": 32,
1632+
"thought_switch_tokens": [
1633+
"Additionally,",
1634+
"Alternatively,",
1635+
"However,",
1636+
"Wait,"
1637+
],
1638+
"prefill": "This requires careful analysis. Let me think through it systematically..."
1639+
}
1640+
}
1641+
1642+
# Get base profile or default to medium
1643+
profile = profiles.get(reasoning_effort.lower(), profiles["low"])
1644+
1645+
# Calculate actual token limits based on max_tokens
1646+
min_thinking_tokens = int(max_tokens * profile["min_tokens_pct"])
1647+
max_thinking_tokens = int(max_tokens * profile["max_tokens_pct"])
1648+
1649+
# Create final config
1650+
config = {
1651+
"min_thinking_tokens": min_thinking_tokens,
1652+
"max_thinking_tokens": max_thinking_tokens,
1653+
"max_thoughts": profile["max_thoughts"],
1654+
"thought_switch_tokens": profile["thought_switch_tokens"],
1655+
"prefill": profile["prefill"]
16531656
}
1654-
return profiles.get(effort_level, LOW_EFFORT)
1657+
1658+
return config

0 commit comments

Comments
 (0)