Skip to content

Commit 7630298

Browse files
authored
Merge pull request #260 from codelion/fix-reasoning-effort
Fix reasoning effort
2 parents 139cbc7 + 2495dd3 commit 7630298

File tree

4 files changed

+227
-5
lines changed

4 files changed

+227
-5
lines changed

openevolve/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
"""Version information for openevolve package."""
22

3-
__version__ = "0.2.12"
3+
__version__ = "0.2.13"

openevolve/config.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@ class LLMModelConfig:
3838

3939
# Reproducibility
4040
random_seed: Optional[int] = None
41+
42+
# Reasoning parameters
43+
reasoning_effort: Optional[str] = None
4144

4245

4346
@dataclass
@@ -69,6 +72,9 @@ class LLMConfig(LLMModelConfig):
6972
primary_model_weight: float = None
7073
secondary_model: str = None
7174
secondary_model_weight: float = None
75+
76+
# Reasoning parameters (inherited from LLMModelConfig but can be overridden)
77+
reasoning_effort: Optional[str] = None
7278

7379
def __post_init__(self):
7480
"""Post-initialization to set up model configurations"""
@@ -121,6 +127,7 @@ def __post_init__(self):
121127
"retries": self.retries,
122128
"retry_delay": self.retry_delay,
123129
"random_seed": self.random_seed,
130+
"reasoning_effort": self.reasoning_effort,
124131
}
125132
self.update_model_params(shared_config)
126133

@@ -173,6 +180,7 @@ def rebuild_models(self) -> None:
173180
"retries": self.retries,
174181
"retry_delay": self.retry_delay,
175182
"random_seed": self.random_seed,
183+
"reasoning_effort": self.reasoning_effort,
176184
}
177185
self.update_model_params(shared_config)
178186

openevolve/llm/openai.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ def __init__(
3333
self.api_base = model_cfg.api_base
3434
self.api_key = model_cfg.api_key
3535
self.random_seed = getattr(model_cfg, "random_seed", None)
36+
self.reasoning_effort = getattr(model_cfg, "reasoning_effort", None)
3637

3738
# Set up API client
3839
# OpenAI client requires max_retries to be int, not None
@@ -101,8 +102,9 @@ async def generate_with_context(
101102
"max_completion_tokens": kwargs.get("max_tokens", self.max_tokens),
102103
}
103104
# Add optional reasoning parameters if provided
104-
if "reasoning_effort" in kwargs:
105-
params["reasoning_effort"] = kwargs["reasoning_effort"]
105+
reasoning_effort = kwargs.get("reasoning_effort", self.reasoning_effort)
106+
if reasoning_effort is not None:
107+
params["reasoning_effort"] = reasoning_effort
106108
if "verbosity" in kwargs:
107109
params["verbosity"] = kwargs["verbosity"]
108110
else:
@@ -116,8 +118,9 @@ async def generate_with_context(
116118
}
117119

118120
# Handle reasoning_effort for open source reasoning models.
119-
if "reasoning_effort" in kwargs:
120-
params["reasoning_effort"] = kwargs["reasoning_effort"]
121+
reasoning_effort = kwargs.get("reasoning_effort", self.reasoning_effort)
122+
if reasoning_effort is not None:
123+
params["reasoning_effort"] = reasoning_effort
121124

122125
# Add seed parameter for reproducibility if configured
123126
# Skip seed parameter for Google AI Studio endpoint as it doesn't support it
Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
"""
2+
Tests for reasoning_effort configuration parameter
3+
"""
4+
5+
import unittest
6+
import yaml
7+
import asyncio
8+
from unittest.mock import Mock
9+
import tempfile
10+
import os
11+
12+
from openevolve.config import Config, LLMConfig, LLMModelConfig
13+
from openevolve.llm.openai import OpenAILLM
14+
15+
16+
class TestReasoningEffortConfig(unittest.TestCase):
17+
"""Tests for reasoning_effort parameter handling in configuration"""
18+
19+
def test_reasoning_effort_in_llm_config(self):
20+
"""Test that reasoning_effort can be loaded from YAML config at LLM level"""
21+
yaml_config = {
22+
"log_level": "INFO",
23+
"llm": {
24+
"api_base": "https://api.openai.com/v1",
25+
"api_key": "test-key",
26+
"temperature": 0.7,
27+
"max_tokens": 100000,
28+
"timeout": 5000,
29+
"retries": 1000000,
30+
"reasoning_effort": "high",
31+
"models": [
32+
{
33+
"name": "gpt-oss-120b",
34+
"weight": 1.0
35+
}
36+
]
37+
}
38+
}
39+
40+
# This should not raise a TypeError
41+
config = Config.from_dict(yaml_config)
42+
43+
self.assertEqual(config.llm.reasoning_effort, "high")
44+
self.assertEqual(config.llm.models[0].reasoning_effort, "high")
45+
46+
def test_reasoning_effort_in_model_config(self):
47+
"""Test that reasoning_effort can be specified per model"""
48+
yaml_config = {
49+
"log_level": "INFO",
50+
"llm": {
51+
"api_base": "https://api.openai.com/v1",
52+
"api_key": "test-key",
53+
"models": [
54+
{
55+
"name": "gpt-oss-120b",
56+
"weight": 1.0,
57+
"reasoning_effort": "medium"
58+
},
59+
{
60+
"name": "gpt-4",
61+
"weight": 0.5,
62+
"reasoning_effort": "high"
63+
}
64+
]
65+
}
66+
}
67+
68+
config = Config.from_dict(yaml_config)
69+
70+
self.assertEqual(config.llm.models[0].reasoning_effort, "medium")
71+
self.assertEqual(config.llm.models[1].reasoning_effort, "high")
72+
73+
def test_reasoning_effort_inheritance(self):
74+
"""Test that model configs inherit reasoning_effort from parent LLM config"""
75+
yaml_config = {
76+
"log_level": "INFO",
77+
"llm": {
78+
"api_base": "https://api.openai.com/v1",
79+
"api_key": "test-key",
80+
"reasoning_effort": "low",
81+
"models": [
82+
{
83+
"name": "gpt-oss-120b",
84+
"weight": 1.0
85+
# No reasoning_effort specified - should inherit
86+
}
87+
]
88+
}
89+
}
90+
91+
config = Config.from_dict(yaml_config)
92+
93+
self.assertEqual(config.llm.reasoning_effort, "low")
94+
self.assertEqual(config.llm.models[0].reasoning_effort, "low")
95+
96+
def test_reasoning_effort_model_override(self):
97+
"""Test that model-level reasoning_effort overrides LLM-level"""
98+
yaml_config = {
99+
"log_level": "INFO",
100+
"llm": {
101+
"api_base": "https://api.openai.com/v1",
102+
"api_key": "test-key",
103+
"reasoning_effort": "low",
104+
"models": [
105+
{
106+
"name": "gpt-oss-120b",
107+
"weight": 1.0,
108+
"reasoning_effort": "high" # Override parent
109+
}
110+
]
111+
}
112+
}
113+
114+
config = Config.from_dict(yaml_config)
115+
116+
self.assertEqual(config.llm.reasoning_effort, "low")
117+
self.assertEqual(config.llm.models[0].reasoning_effort, "high")
118+
119+
def test_openai_llm_uses_reasoning_effort(self):
120+
"""Test that OpenAILLM stores and uses reasoning_effort from config"""
121+
# Create a mock model config with reasoning_effort
122+
model_cfg = Mock()
123+
model_cfg.name = "gpt-oss-120b"
124+
model_cfg.system_message = "system"
125+
model_cfg.temperature = 0.7
126+
model_cfg.top_p = 0.95
127+
model_cfg.max_tokens = 4096
128+
model_cfg.timeout = 60
129+
model_cfg.retries = 3
130+
model_cfg.retry_delay = 5
131+
model_cfg.api_base = "https://api.openai.com/v1"
132+
model_cfg.api_key = "test-key"
133+
model_cfg.random_seed = None
134+
model_cfg.reasoning_effort = "high"
135+
136+
# Mock OpenAI client to avoid actual API calls
137+
with unittest.mock.patch('openai.OpenAI'):
138+
llm = OpenAILLM(model_cfg)
139+
140+
# Verify the reasoning_effort is stored
141+
self.assertEqual(llm.reasoning_effort, "high")
142+
143+
def test_reasoning_effort_passed_to_api_params(self):
144+
"""Test that reasoning_effort is included in API call parameters"""
145+
model_cfg = Mock()
146+
model_cfg.name = "gpt-oss-120b"
147+
model_cfg.system_message = "system"
148+
model_cfg.temperature = 0.7
149+
model_cfg.top_p = 0.95
150+
model_cfg.max_tokens = 4096
151+
model_cfg.timeout = 60
152+
model_cfg.retries = 3
153+
model_cfg.retry_delay = 5
154+
model_cfg.api_base = "https://api.openai.com/v1"
155+
model_cfg.api_key = "test-key"
156+
model_cfg.random_seed = None
157+
model_cfg.reasoning_effort = "medium"
158+
159+
with unittest.mock.patch('openai.OpenAI'):
160+
llm = OpenAILLM(model_cfg)
161+
162+
# Test the _call_api method directly with mocked client
163+
mock_response = Mock()
164+
mock_response.choices = [Mock()]
165+
mock_response.choices[0].message.content = "Test response"
166+
llm.client.chat.completions.create.return_value = mock_response
167+
168+
# Test OpenAI reasoning model (gpt-oss-120b at openai.com should use reasoning logic)
169+
test_params = {
170+
"model": "gpt-oss-120b",
171+
"messages": [{"role": "system", "content": "Test"}, {"role": "user", "content": "Test"}],
172+
"max_completion_tokens": 4096,
173+
"reasoning_effort": "medium"
174+
}
175+
176+
result = asyncio.run(llm._call_api(test_params))
177+
178+
# Verify the API was called with reasoning_effort
179+
llm.client.chat.completions.create.assert_called_once_with(**test_params)
180+
181+
def test_yaml_file_loading_with_reasoning_effort(self):
182+
"""Test loading reasoning_effort from actual YAML file"""
183+
yaml_content = """
184+
log_level: INFO
185+
llm:
186+
api_base: https://api.openai.com/v1
187+
api_key: test-key
188+
temperature: 0.7
189+
max_tokens: 100000
190+
timeout: 5000
191+
retries: 1000000
192+
reasoning_effort: high
193+
models:
194+
- name: gpt-oss-120b
195+
weight: 1.0
196+
"""
197+
198+
with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False) as f:
199+
f.write(yaml_content)
200+
f.flush()
201+
202+
try:
203+
config = Config.from_yaml(f.name)
204+
self.assertEqual(config.llm.reasoning_effort, "high")
205+
self.assertEqual(config.llm.models[0].reasoning_effort, "high")
206+
finally:
207+
os.unlink(f.name)
208+
209+
210+
if __name__ == "__main__":
211+
unittest.main()

0 commit comments

Comments
 (0)