Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 16 additions & 7 deletions configs/default_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,21 @@ max_code_length: 10000 # Maximum allowed code length in character

# LLM configuration
llm:
# Primary model (used most frequently)
primary_model: "gemini-2.0-flash-lite"
primary_model_weight: 0.8 # Sampling weight for primary model

# Secondary model (used for occasional high-quality generations)
secondary_model: "gemini-2.0-flash"
secondary_model_weight: 0.2 # Sampling weight for secondary model
# Models for evolution
models:
# List of available models with their weights
- name: "gemini-2.0-flash-lite"
weight: 0.8
- name: "gemini-2.0-flash"
weight: 0.2

# Models for LLM feedback
evaluator_models:
# List of available models with their weights
- name: "gemini-2.0-flash-lite"
weight: 0.8
- name: "gemini-2.0-flash"
weight: 0.2

# API configuration
api_base: "https://generativelanguage.googleapis.com/v1beta/openai/" # Base URL for API (change for non-OpenAI models)
Expand All @@ -42,6 +50,7 @@ llm:
prompt:
template_dir: null # Custom directory for prompt templates
system_message: "You are an expert coder helping to improve programs through evolution."
evaluator_system_message: "You are an expert code reviewer."

# Number of examples to include in the prompt
num_top_programs: 3 # Number of top-performing programs to include
Expand Down
128 changes: 105 additions & 23 deletions openevolve/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,41 @@


@dataclass
class LLMConfig:
"""Configuration for LLM models"""
class LLMModelConfig:
"""Configuration for a single LLM model"""

# Primary model
primary_model: str = "gemini-2.0-flash-lite"
primary_model_weight: float = 0.8
# API configuration
api_base: str = None
api_key: Optional[str] = None
name: str = None

# Secondary model
secondary_model: str = "gemini-2.0-flash"
secondary_model_weight: float = 0.2
# Weight for model in ensemble
weight: float = 1.0

# Generation parameters
system_message: Optional[str] = None
temperature: float = None
top_p: float = None
max_tokens: int = None

# Request parameters
timeout: int = None
retries: int = None
retry_delay: int = None


@dataclass
class LLMConfig(LLMModelConfig):
"""Configuration for LLM models"""

# API configuration
api_base: str = "https://api.openai.com/v1"
api_key: Optional[str] = None
name: str = "gpt-4o"

# Generation parameters
system_message: Optional[str] = (
"You are an expert coder helping to improve programs through evolution."
)
temperature: float = 0.7
top_p: float = 0.95
max_tokens: int = 4096
Expand All @@ -36,13 +55,69 @@ class LLMConfig:
retries: int = 3
retry_delay: int = 5

# n-model configuration for evolution LLM ensemble
models: List[LLMModelConfig] = field(default_factory=lambda: [LLMModelConfig()])

# n-model configuration for evaluator LLM ensemble
evaluator_models: List[LLMModelConfig] = field(default_factory=lambda: [])

# Backwardes compatibility with primary_model(_weight) options
primary_model: str = "gemini-2.0-flash-lite"
primary_model_weight: float = 0.8
secondary_model: str = "gemini-2.0-flash"
secondary_model_weight: float = 0.2

def __post_init__(self):
"""Post-initialization to set up model configurations"""
# Handle backward compatibility for primary_model(_weight) and secondary_model(_weight).
if (self.primary_model or self.primary_model_weight) and len(self.models) < 1:

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

primary_model and these parameters has a default value

so it will always hit this branch

the new way of configuring models is not working

# Ensure we have a primary model
self.models.append(LLMModelConfig())
if self.primary_model:
self.models[0].name = self.primary_model
if self.primary_model_weight:
self.models[0].weight = self.primary_model_weight

if (self.secondary_model or self.secondary_model_weight) and len(self.models) < 2:
# Ensure we have a second model
self.models.append(LLMModelConfig())
if self.secondary_model:
self.models[1].name = self.secondary_model
if self.secondary_model_weight:
self.models[1].weight = self.secondary_model_weight

# If no evaluator models are defined, use the same models as for evolution
if not self.evaluator_models or len(self.evaluator_models) < 1:
self.evaluator_models = self.models.copy()

# Update models with shared configuration values
shared_config = {
"api_base": self.api_base,
"api_key": self.api_key,
"temperature": self.temperature,
"top_p": self.top_p,
"max_tokens": self.max_tokens,
"timeout": self.timeout,
"retries": self.retries,
"retry_delay": self.retry_delay,
}
self.update_model_params(shared_config)

def update_model_params(self, args: Dict[str, Any], overwrite: bool = False) -> None:
"""Update model parameters for all models"""
for model in self.models + self.evaluator_models:
for key, value in args.items():
if overwrite or getattr(model, key, None) is None:
setattr(model, key, value)


@dataclass
class PromptConfig:
"""Configuration for prompt generation"""

template_dir: Optional[str] = None
system_message: str = "You are an expert coder helping to improve programs through evolution."
evaluator_system_message: str = """You are an expert code reviewer."""

# Number of examples to include in the prompt
num_top_programs: int = 3
Expand Down Expand Up @@ -155,7 +230,14 @@ def from_dict(cls, config_dict: Dict[str, Any]) -> "Config":

# Update nested configs
if "llm" in config_dict:
config.llm = LLMConfig(**config_dict["llm"])
llm_dict = config_dict["llm"]
if "models" in llm_dict:
llm_dict["models"] = [LLMModelConfig(**m) for m in llm_dict["models"]]
if "evaluator_models" in llm_dict:
llm_dict["evaluator_models"] = [
LLMModelConfig(**m) for m in llm_dict["evaluator_models"]
]
config.llm = LLMConfig(**llm_dict)
if "prompt" in config_dict:
config.prompt = PromptConfig(**config_dict["prompt"])
if "database" in config_dict:
Expand All @@ -176,10 +258,8 @@ def to_dict(self) -> Dict[str, Any]:
"random_seed": self.random_seed,
# Component configurations
"llm": {
"primary_model": self.llm.primary_model,
"primary_model_weight": self.llm.primary_model_weight,
"secondary_model": self.llm.secondary_model,
"secondary_model_weight": self.llm.secondary_model_weight,
"models": self.llm.models,
"evaluator_models": self.llm.evaluator_models,
"api_base": self.llm.api_base,
"temperature": self.llm.temperature,
"top_p": self.llm.top_p,
Expand All @@ -191,6 +271,7 @@ def to_dict(self) -> Dict[str, Any]:
"prompt": {
"template_dir": self.prompt.template_dir,
"system_message": self.prompt.system_message,
"evaluator_system_message": self.prompt.evaluator_system_message,
"num_top_programs": self.prompt.num_top_programs,
"num_diverse_programs": self.prompt.num_diverse_programs,
"use_template_stochasticity": self.prompt.use_template_stochasticity,
Expand Down Expand Up @@ -245,16 +326,17 @@ def to_yaml(self, path: Union[str, Path]) -> None:
def load_config(config_path: Optional[Union[str, Path]] = None) -> Config:
"""Load configuration from a YAML file or use defaults"""
if config_path and os.path.exists(config_path):
return Config.from_yaml(config_path)
config = Config.from_yaml(config_path)
else:
config = Config()

# Use environment variables if available
api_key = os.environ.get("OPENAI_API_KEY")
api_base = os.environ.get("OPENAI_API_BASE", "https://api.openai.com/v1")

# Use environment variables if available
api_key = os.environ.get("OPENAI_API_KEY")
api_base = os.environ.get("OPENAI_API_BASE", "https://api.openai.com/v1")
config.llm.update_model_params({"api_key": api_key, "api_base": api_base})

config = Config()
if api_key:
config.llm.api_key = api_key
if api_base:
config.llm.api_base = api_base
# Make the system message available to the individual models, in case it is not provided from the prompt sampler
config.llm.update_model_params({"system_message": config.prompt.system_message})

return config
14 changes: 12 additions & 2 deletions openevolve/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,15 +96,25 @@ def __init__(
self.file_extension = f".{self.file_extension}"

# Initialize components
self.llm_ensemble = LLMEnsemble(self.config.llm)
self.llm_ensemble = LLMEnsemble(self.config.llm.models)
self.llm_evaluator_ensemble = LLMEnsemble(self.config.llm.evaluator_models)

self.prompt_sampler = PromptSampler(self.config.prompt)
self.evaluator_prompt_sampler = PromptSampler(self.config.prompt)
self.evaluator_prompt_sampler.set_templates("evaluator_system_message")

# Pass random seed to database if specified
if self.config.random_seed is not None:
self.config.database.random_seed = self.config.random_seed

self.database = ProgramDatabase(self.config.database)
self.evaluator = Evaluator(self.config.evaluator, evaluation_file, self.llm_ensemble)

self.evaluator = Evaluator(
self.config.evaluator,
evaluation_file,
self.llm_evaluator_ensemble,
self.evaluator_prompt_sampler,
)

logger.info(f"Initialized OpenEvolve with {initial_program_path} " f"and {evaluation_file}")

Expand Down
93 changes: 48 additions & 45 deletions openevolve/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,12 @@
import uuid
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
import traceback

from openevolve.config import EvaluatorConfig
from openevolve.llm.ensemble import LLMEnsemble
from openevolve.utils.async_utils import TaskPool, run_in_executor
from openevolve.prompt.sampler import PromptSampler
from openevolve.utils.format_utils import format_metrics_safe

logger = logging.getLogger(__name__)
Expand All @@ -36,10 +38,12 @@ def __init__(
config: EvaluatorConfig,
evaluation_file: str,
llm_ensemble: Optional[LLMEnsemble] = None,
prompt_sampler: Optional[PromptSampler] = None,
):
self.config = config
self.evaluation_file = evaluation_file
self.llm_ensemble = llm_ensemble
self.prompt_sampler = prompt_sampler

# Create a task pool for parallel evaluation
self.task_pool = TaskPool(max_concurrency=config.parallel_evaluations)
Expand Down Expand Up @@ -286,67 +290,66 @@ async def _llm_evaluate(self, program_code: str) -> Dict[str, float]:

try:
# Create prompt for LLM
prompt = f"""
Evaluate the following code on a scale of 0.0 to 1.0 for the following metrics:
1. Readability: How easy is the code to read and understand?
2. Maintainability: How easy would the code be to maintain and modify?
3. Efficiency: How efficient is the code in terms of time and space complexity?

For each metric, provide a score between 0.0 and 1.0, where 1.0 is best.

Code to evaluate:
```python
{program_code}
```

Return your evaluation as a JSON object with the following format:
{{
"readability": [score],
"maintainability": [score],
"efficiency": [score],
"reasoning": "[brief explanation of scores]"
}}
"""
prompt = self.prompt_sampler.build_prompt(
current_program=program_code, template_key="evaluation"
)

# Get LLM response
response = await self.llm_ensemble.generate(prompt)
responses = await self.llm_ensemble.generate_all_with_context(
prompt["system"], [{"role": "user", "content": prompt["user"]}]
)

# Extract JSON from response
try:
# Try to find JSON block
json_pattern = r"```json\n(.*?)\n```"
import re

json_match = re.search(json_pattern, response, re.DOTALL)

if json_match:
json_str = json_match.group(1)
else:
# Try to extract JSON directly
json_str = response
# Remove non-JSON parts
start_idx = json_str.find("{")
end_idx = json_str.rfind("}") + 1
if start_idx >= 0 and end_idx > start_idx:
json_str = json_str[start_idx:end_idx]

# Parse JSON
result = json.loads(json_str)

# Extract metrics
metrics = {}
for key in ["readability", "maintainability", "efficiency"]:
if key in result:
metrics[key] = float(result[key])

return metrics
avg_metrics = {}
for i, response in enumerate(responses):
json_match = re.search(json_pattern, response, re.DOTALL)

if json_match:
json_str = json_match.group(1)
else:
# Try to extract JSON directly
json_str = response
# Remove non-JSON parts
start_idx = json_str.find("{")
end_idx = json_str.rfind("}") + 1
if start_idx >= 0 and end_idx > start_idx:
json_str = json_str[start_idx:end_idx]

# Parse JSON
result = json.loads(json_str)

# Filter all non-numeric values
metrics = {
name: float(value)
for name, value in result.items()
if isinstance(value, (int, float))
}

# Weight of the model in the ensemble
weight = self.llm_ensemble.weights[i] if self.llm_ensemble.weights else 1.0

# Average the metrics
for name, value in metrics.items():
if name in avg_metrics:
avg_metrics[name] += value * weight
else:
avg_metrics[name] = value * weight

return avg_metrics

except Exception as e:
logger.warning(f"Error parsing LLM response: {str(e)}")
traceback.print_exc()
return {}

except Exception as e:
logger.error(f"Error in LLM evaluation: {str(e)}")
traceback.print_exc()
return {}

def _passes_threshold(self, metrics: Dict[str, float], threshold: float) -> bool:
Expand Down
Loading