Merge pull request #260 from codelion/fix-reasoning-effort

codelion · web-flow · commit 76302980ab73 · 2025-09-07T07:40:19.000+08:00
Fix reasoning effort
diff --git a/openevolve/_version.py b/openevolve/_version.py
@@ -1,3 +1,3 @@
 """Version information for openevolve package."""
 
-__version__ = "0.2.12"
+__version__ = "0.2.13"
diff --git a/openevolve/config.py b/openevolve/config.py
@@ -38,6 +38,9 @@ class LLMModelConfig:
 
     # Reproducibility
     random_seed: Optional[int] = None
+    
+    # Reasoning parameters
+    reasoning_effort: Optional[str] = None
 
 
 @dataclass
@@ -69,6 +72,9 @@ class LLMConfig(LLMModelConfig):
     primary_model_weight: float = None
     secondary_model: str = None
     secondary_model_weight: float = None
+    
+    # Reasoning parameters (inherited from LLMModelConfig but can be overridden)
+    reasoning_effort: Optional[str] = None
 
     def __post_init__(self):
         """Post-initialization to set up model configurations"""
@@ -121,6 +127,7 @@ def __post_init__(self):
             "retries": self.retries,
             "retry_delay": self.retry_delay,
             "random_seed": self.random_seed,
+            "reasoning_effort": self.reasoning_effort,
         }
         self.update_model_params(shared_config)
 
@@ -173,6 +180,7 @@ def rebuild_models(self) -> None:
             "retries": self.retries,
             "retry_delay": self.retry_delay,
             "random_seed": self.random_seed,
+            "reasoning_effort": self.reasoning_effort,
         }
         self.update_model_params(shared_config)
 
diff --git a/openevolve/llm/openai.py b/openevolve/llm/openai.py
@@ -33,6 +33,7 @@ def __init__(
         self.api_base = model_cfg.api_base
         self.api_key = model_cfg.api_key
         self.random_seed = getattr(model_cfg, "random_seed", None)
+        self.reasoning_effort = getattr(model_cfg, "reasoning_effort", None)
 
         # Set up API client
         # OpenAI client requires max_retries to be int, not None
@@ -101,8 +102,9 @@ async def generate_with_context(
                 "max_completion_tokens": kwargs.get("max_tokens", self.max_tokens),
             }
             # Add optional reasoning parameters if provided
-            if "reasoning_effort" in kwargs:
-                params["reasoning_effort"] = kwargs["reasoning_effort"]
+            reasoning_effort = kwargs.get("reasoning_effort", self.reasoning_effort)
+            if reasoning_effort is not None:
+                params["reasoning_effort"] = reasoning_effort
             if "verbosity" in kwargs:
                 params["verbosity"] = kwargs["verbosity"]
         else:
@@ -116,8 +118,9 @@ async def generate_with_context(
             }
 
             # Handle reasoning_effort for open source reasoning models.
-            if "reasoning_effort" in kwargs:
-                params["reasoning_effort"] = kwargs["reasoning_effort"]
+            reasoning_effort = kwargs.get("reasoning_effort", self.reasoning_effort)
+            if reasoning_effort is not None:
+                params["reasoning_effort"] = reasoning_effort
 
         # Add seed parameter for reproducibility if configured
         # Skip seed parameter for Google AI Studio endpoint as it doesn't support it
diff --git a/tests/test_reasoning_effort_config.py b/tests/test_reasoning_effort_config.py
@@ -0,0 +1,211 @@
+"""
+Tests for reasoning_effort configuration parameter
+"""
+
+import unittest
+import yaml
+import asyncio
+from unittest.mock import Mock
+import tempfile
+import os
+
+from openevolve.config import Config, LLMConfig, LLMModelConfig
+from openevolve.llm.openai import OpenAILLM
+
+
+class TestReasoningEffortConfig(unittest.TestCase):
+    """Tests for reasoning_effort parameter handling in configuration"""
+
+    def test_reasoning_effort_in_llm_config(self):
+        """Test that reasoning_effort can be loaded from YAML config at LLM level"""
+        yaml_config = {
+            "log_level": "INFO",
+            "llm": {
+                "api_base": "https://api.openai.com/v1",
+                "api_key": "test-key",
+                "temperature": 0.7,
+                "max_tokens": 100000,
+                "timeout": 5000,
+                "retries": 1000000,
+                "reasoning_effort": "high",
+                "models": [
+                    {
+                        "name": "gpt-oss-120b",
+                        "weight": 1.0
+                    }
+                ]
+            }
+        }
+        
+        # This should not raise a TypeError
+        config = Config.from_dict(yaml_config)
+        
+        self.assertEqual(config.llm.reasoning_effort, "high")
+        self.assertEqual(config.llm.models[0].reasoning_effort, "high")
+
+    def test_reasoning_effort_in_model_config(self):
+        """Test that reasoning_effort can be specified per model"""
+        yaml_config = {
+            "log_level": "INFO", 
+            "llm": {
+                "api_base": "https://api.openai.com/v1",
+                "api_key": "test-key",
+                "models": [
+                    {
+                        "name": "gpt-oss-120b",
+                        "weight": 1.0,
+                        "reasoning_effort": "medium"
+                    },
+                    {
+                        "name": "gpt-4",
+                        "weight": 0.5,
+                        "reasoning_effort": "high"
+                    }
+                ]
+            }
+        }
+        
+        config = Config.from_dict(yaml_config)
+        
+        self.assertEqual(config.llm.models[0].reasoning_effort, "medium")
+        self.assertEqual(config.llm.models[1].reasoning_effort, "high")
+
+    def test_reasoning_effort_inheritance(self):
+        """Test that model configs inherit reasoning_effort from parent LLM config"""
+        yaml_config = {
+            "log_level": "INFO",
+            "llm": {
+                "api_base": "https://api.openai.com/v1", 
+                "api_key": "test-key",
+                "reasoning_effort": "low",
+                "models": [
+                    {
+                        "name": "gpt-oss-120b",
+                        "weight": 1.0
+                        # No reasoning_effort specified - should inherit
+                    }
+                ]
+            }
+        }
+        
+        config = Config.from_dict(yaml_config)
+        
+        self.assertEqual(config.llm.reasoning_effort, "low")
+        self.assertEqual(config.llm.models[0].reasoning_effort, "low")
+
+    def test_reasoning_effort_model_override(self):
+        """Test that model-level reasoning_effort overrides LLM-level"""
+        yaml_config = {
+            "log_level": "INFO",
+            "llm": {
+                "api_base": "https://api.openai.com/v1",
+                "api_key": "test-key", 
+                "reasoning_effort": "low",
+                "models": [
+                    {
+                        "name": "gpt-oss-120b",
+                        "weight": 1.0,
+                        "reasoning_effort": "high"  # Override parent
+                    }
+                ]
+            }
+        }
+        
+        config = Config.from_dict(yaml_config)
+        
+        self.assertEqual(config.llm.reasoning_effort, "low")
+        self.assertEqual(config.llm.models[0].reasoning_effort, "high")
+
+    def test_openai_llm_uses_reasoning_effort(self):
+        """Test that OpenAILLM stores and uses reasoning_effort from config"""
+        # Create a mock model config with reasoning_effort
+        model_cfg = Mock()
+        model_cfg.name = "gpt-oss-120b"
+        model_cfg.system_message = "system"
+        model_cfg.temperature = 0.7
+        model_cfg.top_p = 0.95
+        model_cfg.max_tokens = 4096
+        model_cfg.timeout = 60
+        model_cfg.retries = 3
+        model_cfg.retry_delay = 5
+        model_cfg.api_base = "https://api.openai.com/v1"
+        model_cfg.api_key = "test-key"
+        model_cfg.random_seed = None
+        model_cfg.reasoning_effort = "high"
+        
+        # Mock OpenAI client to avoid actual API calls
+        with unittest.mock.patch('openai.OpenAI'):
+            llm = OpenAILLM(model_cfg)
+            
+        # Verify the reasoning_effort is stored
+        self.assertEqual(llm.reasoning_effort, "high")
+
+    def test_reasoning_effort_passed_to_api_params(self):
+        """Test that reasoning_effort is included in API call parameters"""
+        model_cfg = Mock()
+        model_cfg.name = "gpt-oss-120b" 
+        model_cfg.system_message = "system"
+        model_cfg.temperature = 0.7
+        model_cfg.top_p = 0.95
+        model_cfg.max_tokens = 4096
+        model_cfg.timeout = 60
+        model_cfg.retries = 3
+        model_cfg.retry_delay = 5
+        model_cfg.api_base = "https://api.openai.com/v1"
+        model_cfg.api_key = "test-key"
+        model_cfg.random_seed = None
+        model_cfg.reasoning_effort = "medium"
+        
+        with unittest.mock.patch('openai.OpenAI'):
+            llm = OpenAILLM(model_cfg)
+            
+            # Test the _call_api method directly with mocked client
+            mock_response = Mock()
+            mock_response.choices = [Mock()]
+            mock_response.choices[0].message.content = "Test response"
+            llm.client.chat.completions.create.return_value = mock_response
+            
+            # Test OpenAI reasoning model (gpt-oss-120b at openai.com should use reasoning logic)
+            test_params = {
+                "model": "gpt-oss-120b",
+                "messages": [{"role": "system", "content": "Test"}, {"role": "user", "content": "Test"}],
+                "max_completion_tokens": 4096,
+                "reasoning_effort": "medium"
+            }
+            
+            result = asyncio.run(llm._call_api(test_params))
+            
+            # Verify the API was called with reasoning_effort
+            llm.client.chat.completions.create.assert_called_once_with(**test_params)
+
+    def test_yaml_file_loading_with_reasoning_effort(self):
+        """Test loading reasoning_effort from actual YAML file"""
+        yaml_content = """
+log_level: INFO
+llm:
+  api_base: https://api.openai.com/v1
+  api_key: test-key
+  temperature: 0.7
+  max_tokens: 100000
+  timeout: 5000
+  retries: 1000000
+  reasoning_effort: high
+  models:
+  - name: gpt-oss-120b
+    weight: 1.0
+"""
+        
+        with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False) as f:
+            f.write(yaml_content)
+            f.flush()
+            
+            try:
+                config = Config.from_yaml(f.name)
+                self.assertEqual(config.llm.reasoning_effort, "high")
+                self.assertEqual(config.llm.models[0].reasoning_effort, "high")
+            finally:
+                os.unlink(f.name)
+
+
+if __name__ == "__main__":
+    unittest.main()

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,3 @@`
`1`	`1`	`"""Version information for openevolve package."""`
`2`	`2`
`3`		`-__version__ = "0.2.12"`
	`3`	`+__version__ = "0.2.13"`