Skip to content
Merged

f #249

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion openevolve/_version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""Version information for openevolve package."""

__version__ = "0.2.9"
__version__ = "0.2.10"
29 changes: 18 additions & 11 deletions openevolve/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,7 @@ def __post_init__(self):
if self.primary_model:
# Create primary model
primary_model = LLMModelConfig(
name=self.primary_model,
weight=self.primary_model_weight or 1.0
name=self.primary_model, weight=self.primary_model_weight or 1.0
)
self.models.append(primary_model)

Expand All @@ -83,14 +82,22 @@ def __post_init__(self):
if self.secondary_model_weight is None or self.secondary_model_weight > 0:
secondary_model = LLMModelConfig(
name=self.secondary_model,
weight=self.secondary_model_weight if self.secondary_model_weight is not None else 0.2
weight=(
self.secondary_model_weight
if self.secondary_model_weight is not None
else 0.2
),
)
self.models.append(secondary_model)

# Only validate if this looks like a user config (has some model info)
# Don't validate during internal/default initialization
if (self.primary_model or self.secondary_model or
self.primary_model_weight or self.secondary_model_weight) and not self.models:
if (
self.primary_model
or self.secondary_model
or self.primary_model_weight
or self.secondary_model_weight
) and not self.models:
raise ValueError(
"No LLM models configured. Please specify 'models' array or "
"'primary_model' in your configuration."
Expand Down Expand Up @@ -198,11 +205,11 @@ class DatabaseConfig:
default_factory=lambda: ["complexity", "diversity"],
metadata={
"help": "List of feature dimensions for MAP-Elites grid. "
"Built-in dimensions: 'complexity', 'diversity', 'score'. "
"Custom dimensions: Must match metric names from evaluator. "
"IMPORTANT: Evaluators must return raw continuous values for custom dimensions, "
"NOT pre-computed bin indices. OpenEvolve handles all scaling and binning internally."
}
"Built-in dimensions: 'complexity', 'diversity', 'score'. "
"Custom dimensions: Must match metric names from evaluator. "
"IMPORTANT: Evaluators must return raw continuous values for custom dimensions, "
"NOT pre-computed bin indices. OpenEvolve handles all scaling and binning internally."
},
)
feature_bins: Union[int, Dict[str, int]] = 10 # Can be int (all dims) or dict (per-dim)
diversity_reference_size: int = 20 # Size of reference set for diversity calculation
Expand Down Expand Up @@ -271,7 +278,7 @@ class Config:
# Evolution settings
diff_based_evolution: bool = True
max_code_length: int = 10000

# Early stopping settings
early_stopping_patience: Optional[int] = None
convergence_threshold: float = 0.001
Expand Down
23 changes: 18 additions & 5 deletions openevolve/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,10 +353,20 @@ def force_exit_handler(signum, frame):
best_program = best_by_combined

if best_program:
logger.info(
f"Evolution complete. Best program has metrics: "
f"{format_metrics_safe(best_program.metrics)}"
)
if (
hasattr(self, "parallel_controller")
and self.parallel_controller
and self.parallel_controller.early_stopping_triggered
):
logger.info(
f"🛑 Evolution complete via early stopping. Best program has metrics: "
f"{format_metrics_safe(best_program.metrics)}"
)
else:
logger.info(
f"Evolution complete. Best program has metrics: "
f"{format_metrics_safe(best_program.metrics)}"
)
self._save_best_program(best_program)
return best_program
else:
Expand Down Expand Up @@ -467,10 +477,13 @@ async def _run_evolution_with_checkpoints(
start_iteration, max_iterations, target_score, checkpoint_callback=self._save_checkpoint
)

# Check if shutdown was requested
# Check if shutdown or early stopping was triggered
if self.parallel_controller.shutdown_event.is_set():
logger.info("Evolution stopped due to shutdown request")
return
elif self.parallel_controller.early_stopping_triggered:
logger.info("Evolution stopped due to early stopping - saving final checkpoint")
# Continue to save final checkpoint for early stopping

# Save final checkpoint if needed
# Note: start_iteration here is the evolution start (1 for fresh start, not 0)
Expand Down
35 changes: 21 additions & 14 deletions openevolve/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,9 @@ def add(
if existing_program_id in self.programs:
existing_program = self.programs[existing_program_id]
new_fitness = get_fitness_score(program.metrics, self.config.feature_dimensions)
existing_fitness = get_fitness_score(existing_program.metrics, self.config.feature_dimensions)
existing_fitness = get_fitness_score(
existing_program.metrics, self.config.feature_dimensions
)
logger.info(
"MAP-Elites cell improved: %s (fitness: %.3f -> %.3f)",
coords_dict,
Expand Down Expand Up @@ -290,7 +292,7 @@ def add(
else:
# No parent and no target specified, use current island
island_idx = self.current_island

island_idx = island_idx % len(self.islands) # Ensure valid island
self.islands[island_idx].add(program.id)

Expand Down Expand Up @@ -547,7 +549,7 @@ def load(self, path: str) -> None:
self.current_island = metadata.get("current_island", 0)
self.island_generations = metadata.get("island_generations", [0] * len(saved_islands))
self.last_migration_generation = metadata.get("last_migration_generation", 0)

# Load feature_stats for MAP-Elites grid stability
self.feature_stats = self._deserialize_feature_stats(metadata.get("feature_stats", {}))

Expand Down Expand Up @@ -839,7 +841,7 @@ def _feature_coords_to_key(self, coords: List[int]) -> str:
def _is_better(self, program1: Program, program2: Program) -> bool:
"""
Determine if program1 has better FITNESS than program2

Uses fitness calculation that excludes MAP-Elites feature dimensions
to prevent pollution of fitness comparisons.

Expand Down Expand Up @@ -901,7 +903,8 @@ def _update_archive(self, program: Program) -> None:
# Find worst program among valid programs
if valid_archive_programs:
worst_program = min(
valid_archive_programs, key=lambda p: get_fitness_score(p.metrics, self.config.feature_dimensions)
valid_archive_programs,
key=lambda p: get_fitness_score(p.metrics, self.config.feature_dimensions),
)

# Replace if new program is better
Expand Down Expand Up @@ -1848,7 +1851,7 @@ def _scale_feature_value_minmax(self, feature_name: str, value: float) -> float:
def _serialize_feature_stats(self) -> Dict[str, Any]:
"""
Serialize feature_stats for JSON storage

Returns:
Dictionary that can be JSON-serialized
"""
Expand All @@ -1866,26 +1869,28 @@ def _serialize_feature_stats(self) -> Dict[str, Any]:
serialized_stats[key] = value
else:
# Convert numpy types to Python native types
if hasattr(value, 'item'): # numpy scalar
if hasattr(value, "item"): # numpy scalar
serialized_stats[key] = value.item()
else:
serialized_stats[key] = value
serialized[feature_name] = serialized_stats
return serialized

def _deserialize_feature_stats(self, stats_dict: Dict[str, Any]) -> Dict[str, Dict[str, Union[float, List[float]]]]:

def _deserialize_feature_stats(
self, stats_dict: Dict[str, Any]
) -> Dict[str, Dict[str, Union[float, List[float]]]]:
"""
Deserialize feature_stats from loaded JSON

Args:
stats_dict: Dictionary loaded from JSON

Returns:
Properly formatted feature_stats dictionary
"""
if not stats_dict:
return {}

deserialized = {}
for feature_name, stats in stats_dict.items():
if isinstance(stats, dict):
Expand All @@ -1897,8 +1902,10 @@ def _deserialize_feature_stats(self, stats_dict: Dict[str, Any]) -> Dict[str, Di
}
deserialized[feature_name] = deserialized_stats
else:
logger.warning(f"Skipping malformed feature_stats entry for '{feature_name}': {stats}")

logger.warning(
f"Skipping malformed feature_stats entry for '{feature_name}': {stats}"
)

return deserialized

def log_island_status(self) -> None:
Expand Down
4 changes: 2 additions & 2 deletions openevolve/evaluation_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ class EvaluationResult:
This maintains backward compatibility with the existing dict[str, float] contract
while adding a side-channel for arbitrary artifacts (text or binary data).

IMPORTANT: For custom MAP-Elites features, metrics values must be raw continuous
scores (e.g., actual counts, percentages, continuous measurements), NOT pre-computed
IMPORTANT: For custom MAP-Elites features, metrics values must be raw continuous
scores (e.g., actual counts, percentages, continuous measurements), NOT pre-computed
bin indices. The database handles all binning internally using min-max scaling.

Examples:
Expand Down
6 changes: 4 additions & 2 deletions openevolve/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def __init__(
llm_ensemble: Optional[LLMEnsemble] = None,
prompt_sampler: Optional[PromptSampler] = None,
database: Optional[ProgramDatabase] = None,
suffix: Optional[str]=".py",
suffix: Optional[str] = ".py",
):
self.config = config
self.evaluation_file = evaluation_file
Expand Down Expand Up @@ -565,7 +565,9 @@ async def _llm_evaluate(self, program_code: str, program_id: str = "") -> Dict[s
# Create prompt for LLM
feature_dimensions = self.database.config.feature_dimensions if self.database else []
prompt = self.prompt_sampler.build_prompt(
current_program=program_code, template_key="evaluation", feature_dimensions=feature_dimensions
current_program=program_code,
template_key="evaluation",
feature_dimensions=feature_dimensions,
)

# Get LLM response
Expand Down
18 changes: 11 additions & 7 deletions openevolve/llm/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,20 +70,24 @@ async def generate_with_context(
# These models don't support temperature/top_p and use different parameters
OPENAI_REASONING_MODEL_PREFIXES = (
# O-series reasoning models
"o1-", "o1", # o1, o1-mini, o1-preview
"o3-", "o3", # o3, o3-mini, o3-pro
"o4-", # o4-mini
"o1-",
"o1", # o1, o1-mini, o1-preview
"o3-",
"o3", # o3, o3-mini, o3-pro
"o4-", # o4-mini
# GPT-5 series are also reasoning models
"gpt-5-", "gpt-5", # gpt-5, gpt-5-mini, gpt-5-nano
"gpt-5-",
"gpt-5", # gpt-5, gpt-5-mini, gpt-5-nano
# The GPT OSS series are also reasoning models
"gpt-oss-120b", "gpt-oss-20b"
"gpt-oss-120b",
"gpt-oss-20b",
)

# Check if this is an OpenAI reasoning model
model_lower = str(self.model).lower()
is_openai_reasoning_model = (
self.api_base == "https://api.openai.com/v1" and
model_lower.startswith(OPENAI_REASONING_MODEL_PREFIXES)
self.api_base == "https://api.openai.com/v1"
and model_lower.startswith(OPENAI_REASONING_MODEL_PREFIXES)
)

if is_openai_reasoning_model:
Expand Down
Loading