Skip to content
Merged

fix #227

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion openevolve/_version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""Version information for openevolve package."""

__version__ = "0.2.2"
__version__ = "0.2.3"
28 changes: 26 additions & 2 deletions openevolve/llm/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,38 @@ async def generate_with_context(
formatted_messages.extend(messages)

# Set up generation parameters
if self.api_base == "https://api.openai.com/v1" and str(self.model).lower().startswith("o"):
# For o-series models
# Define OpenAI reasoning models that require max_completion_tokens
# These models don't support temperature/top_p and use different parameters
OPENAI_REASONING_MODEL_PREFIXES = (
# O-series reasoning models
"o1-", "o1", # o1, o1-mini, o1-preview
"o3-", "o3", # o3, o3-mini, o3-pro
"o4-", # o4-mini
# GPT-5 series are also reasoning models
"gpt-5-", "gpt-5" # gpt-5, gpt-5-mini, gpt-5-nano
)

# Check if this is an OpenAI reasoning model
model_lower = str(self.model).lower()
is_openai_reasoning_model = (
self.api_base == "https://api.openai.com/v1" and
model_lower.startswith(OPENAI_REASONING_MODEL_PREFIXES)
)

if is_openai_reasoning_model:
# For OpenAI reasoning models
params = {
"model": self.model,
"messages": formatted_messages,
"max_completion_tokens": kwargs.get("max_tokens", self.max_tokens),
}
# Add optional reasoning parameters if provided
if "reasoning_effort" in kwargs:
params["reasoning_effort"] = kwargs["reasoning_effort"]
if "verbosity" in kwargs:
params["verbosity"] = kwargs["verbosity"]
else:
# Standard parameters for all other models
params = {
"model": self.model,
"messages": formatted_messages,
Expand Down
70 changes: 70 additions & 0 deletions tests/test_model_parameter_demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
"""
Demonstration of fixed OpenAI model parameter handling
"""

def demo_model_parameter_selection():
"""Demonstrate how different models get different parameters"""

# Mock the logic from openai.py
OPENAI_REASONING_MODEL_PREFIXES = (
# O-series reasoning models
"o1-", "o1", # o1, o1-mini, o1-preview
"o3-", "o3", # o3, o3-mini, o3-pro
"o4-", # o4-mini
# GPT-5 series are also reasoning models
"gpt-5-", "gpt-5" # gpt-5, gpt-5-mini, gpt-5-nano
)

def get_params_for_model(model_name, api_base="https://api.openai.com/v1"):
"""Show what parameters would be used for each model"""
model_lower = str(model_name).lower()
is_openai_reasoning_model = (
api_base == "https://api.openai.com/v1" and
model_lower.startswith(OPENAI_REASONING_MODEL_PREFIXES)
)

if is_openai_reasoning_model:
return {
"type": "reasoning_model",
"uses": "max_completion_tokens",
"supports": ["reasoning_effort", "verbosity"],
"excludes": ["temperature", "top_p"]
}
else:
return {
"type": "standard_model",
"uses": "max_tokens",
"supports": ["temperature", "top_p"],
"excludes": []
}

print("🔧 OpenAI Model Parameter Selection Demo")
print("=" * 50)

test_models = [
# Reasoning models
("o1-mini", "✅ Reasoning"),
("o1-preview", "✅ Reasoning"),
("o3-mini-2025-01-31", "✅ Reasoning (with date)"),
("gpt-5-nano", "✅ Reasoning (GPT-5 series)"),

# Standard models
("gpt-4o-mini", "❌ Standard (not reasoning)"),
("gpt-4o", "❌ Standard"),
("gpt-4-turbo", "❌ Standard"),
]

for model, description in test_models:
params = get_params_for_model(model)
print(f"\n📋 Model: {model}")
print(f" Type: {description}")
print(f" Uses: {params['uses']}")
print(f" Supports: {', '.join(params['supports'])}")
if params['excludes']:
print(f" Excludes: {', '.join(params['excludes'])}")

print("\n" + "=" * 50)
print("✅ Fix successful! No more false positives/negatives.")

if __name__ == "__main__":
demo_model_parameter_selection()
101 changes: 101 additions & 0 deletions tests/test_openai_model_detection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
"""
Test OpenAI reasoning model detection logic
"""

import unittest
from unittest.mock import MagicMock


class TestOpenAIReasoningModelDetection(unittest.TestCase):
"""Test that OpenAI reasoning models are correctly identified"""

def test_reasoning_model_detection(self):
"""Test various model names to ensure correct reasoning model detection"""

# Define the same constants as in the code
OPENAI_REASONING_MODEL_PREFIXES = (
# O-series reasoning models
"o1-", "o1", # o1, o1-mini, o1-preview
"o3-", "o3", # o3, o3-mini, o3-pro
"o4-", # o4-mini
# GPT-5 series are also reasoning models
"gpt-5-", "gpt-5" # gpt-5, gpt-5-mini, gpt-5-nano
)

def is_reasoning_model(model_name, api_base="https://api.openai.com/v1"):
"""Test function that mimics the logic in openai.py"""
model_lower = str(model_name).lower()
return (
api_base == "https://api.openai.com/v1" and
model_lower.startswith(OPENAI_REASONING_MODEL_PREFIXES)
)

# Test cases: (model_name, expected_result, description)
test_cases = [
# Reasoning models - should return True
("o1", True, "Base o1 model"),
("o1-mini", True, "o1-mini model"),
("o1-preview", True, "o1-preview model"),
("o1-mini-2025-01-31", True, "o1-mini with date"),
("o3", True, "Base o3 model"),
("o3-mini", True, "o3-mini model"),
("o3-pro", True, "o3-pro model"),
("o4-mini", True, "o4-mini model"),
("gpt-5", True, "Base gpt-5 model"),
("gpt-5-mini", True, "gpt-5-mini model"),
("gpt-5-nano", True, "gpt-5-nano model"),

# Non-reasoning models - should return False
("gpt-4o-mini", False, "gpt-4o-mini (not reasoning)"),
("gpt-4o", False, "gpt-4o (not reasoning)"),
("gpt-4", False, "gpt-4 (not reasoning)"),
("gpt-3.5-turbo", False, "gpt-3.5-turbo (not reasoning)"),
("claude-3", False, "Non-OpenAI model"),
("gemini-pro", False, "Non-OpenAI model"),

# Edge cases
("O1-MINI", True, "Uppercase o1-mini"),
("GPT-5-MINI", True, "Uppercase gpt-5-mini"),
]

for model_name, expected, description in test_cases:
with self.subTest(model=model_name, desc=description):
result = is_reasoning_model(model_name)
self.assertEqual(
result,
expected,
f"Model '{model_name}' ({description}): expected {expected}, got {result}"
)

def test_non_openai_api_base(self):
"""Test that non-OpenAI API bases don't trigger reasoning model logic"""
OPENAI_REASONING_MODEL_PREFIXES = (
"o1-", "o1", "o3-", "o3", "o4-", "gpt-5-", "gpt-5"
)

def is_reasoning_model(model_name, api_base):
model_lower = str(model_name).lower()
return (
api_base == "https://api.openai.com/v1" and
model_lower.startswith(OPENAI_REASONING_MODEL_PREFIXES)
)

# Even reasoning model names should return False for non-OpenAI APIs
test_cases = [
("o1-mini", "https://api.anthropic.com/v1", False),
("gpt-5", "https://generativelanguage.googleapis.com/v1beta/openai/", False),
("o3-mini", "https://api.deepseek.com/v1", False),
]

for model_name, api_base, expected in test_cases:
with self.subTest(model=model_name, api=api_base):
result = is_reasoning_model(model_name, api_base)
self.assertEqual(
result,
expected,
f"Model '{model_name}' with API '{api_base}' should return {expected}"
)


if __name__ == "__main__":
unittest.main()