Skip to content

Commit 2c7fa8f

Browse files
committed
f
1 parent 4d15932 commit 2c7fa8f

File tree

10 files changed

+177
-131
lines changed

10 files changed

+177
-131
lines changed

openevolve/config.py

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,7 @@ def __post_init__(self):
7373
if self.primary_model:
7474
# Create primary model
7575
primary_model = LLMModelConfig(
76-
name=self.primary_model,
77-
weight=self.primary_model_weight or 1.0
76+
name=self.primary_model, weight=self.primary_model_weight or 1.0
7877
)
7978
self.models.append(primary_model)
8079

@@ -83,14 +82,22 @@ def __post_init__(self):
8382
if self.secondary_model_weight is None or self.secondary_model_weight > 0:
8483
secondary_model = LLMModelConfig(
8584
name=self.secondary_model,
86-
weight=self.secondary_model_weight if self.secondary_model_weight is not None else 0.2
85+
weight=(
86+
self.secondary_model_weight
87+
if self.secondary_model_weight is not None
88+
else 0.2
89+
),
8790
)
8891
self.models.append(secondary_model)
8992

9093
# Only validate if this looks like a user config (has some model info)
9194
# Don't validate during internal/default initialization
92-
if (self.primary_model or self.secondary_model or
93-
self.primary_model_weight or self.secondary_model_weight) and not self.models:
95+
if (
96+
self.primary_model
97+
or self.secondary_model
98+
or self.primary_model_weight
99+
or self.secondary_model_weight
100+
) and not self.models:
94101
raise ValueError(
95102
"No LLM models configured. Please specify 'models' array or "
96103
"'primary_model' in your configuration."
@@ -198,11 +205,11 @@ class DatabaseConfig:
198205
default_factory=lambda: ["complexity", "diversity"],
199206
metadata={
200207
"help": "List of feature dimensions for MAP-Elites grid. "
201-
"Built-in dimensions: 'complexity', 'diversity', 'score'. "
202-
"Custom dimensions: Must match metric names from evaluator. "
203-
"IMPORTANT: Evaluators must return raw continuous values for custom dimensions, "
204-
"NOT pre-computed bin indices. OpenEvolve handles all scaling and binning internally."
205-
}
208+
"Built-in dimensions: 'complexity', 'diversity', 'score'. "
209+
"Custom dimensions: Must match metric names from evaluator. "
210+
"IMPORTANT: Evaluators must return raw continuous values for custom dimensions, "
211+
"NOT pre-computed bin indices. OpenEvolve handles all scaling and binning internally."
212+
},
206213
)
207214
feature_bins: Union[int, Dict[str, int]] = 10 # Can be int (all dims) or dict (per-dim)
208215
diversity_reference_size: int = 20 # Size of reference set for diversity calculation
@@ -271,7 +278,7 @@ class Config:
271278
# Evolution settings
272279
diff_based_evolution: bool = True
273280
max_code_length: int = 10000
274-
281+
275282
# Early stopping settings
276283
early_stopping_patience: Optional[int] = None
277284
convergence_threshold: float = 0.001

openevolve/controller.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -353,10 +353,20 @@ def force_exit_handler(signum, frame):
353353
best_program = best_by_combined
354354

355355
if best_program:
356-
logger.info(
357-
f"Evolution complete. Best program has metrics: "
358-
f"{format_metrics_safe(best_program.metrics)}"
359-
)
356+
if (
357+
hasattr(self, "parallel_controller")
358+
and self.parallel_controller
359+
and self.parallel_controller.early_stopping_triggered
360+
):
361+
logger.info(
362+
f"🛑 Evolution complete via early stopping. Best program has metrics: "
363+
f"{format_metrics_safe(best_program.metrics)}"
364+
)
365+
else:
366+
logger.info(
367+
f"Evolution complete. Best program has metrics: "
368+
f"{format_metrics_safe(best_program.metrics)}"
369+
)
360370
self._save_best_program(best_program)
361371
return best_program
362372
else:
@@ -467,10 +477,13 @@ async def _run_evolution_with_checkpoints(
467477
start_iteration, max_iterations, target_score, checkpoint_callback=self._save_checkpoint
468478
)
469479

470-
# Check if shutdown was requested
480+
# Check if shutdown or early stopping was triggered
471481
if self.parallel_controller.shutdown_event.is_set():
472482
logger.info("Evolution stopped due to shutdown request")
473483
return
484+
elif self.parallel_controller.early_stopping_triggered:
485+
logger.info("Evolution stopped due to early stopping - saving final checkpoint")
486+
# Continue to save final checkpoint for early stopping
474487

475488
# Save final checkpoint if needed
476489
# Note: start_iteration here is the evolution start (1 for fresh start, not 0)

openevolve/database.py

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,9 @@ def add(
248248
if existing_program_id in self.programs:
249249
existing_program = self.programs[existing_program_id]
250250
new_fitness = get_fitness_score(program.metrics, self.config.feature_dimensions)
251-
existing_fitness = get_fitness_score(existing_program.metrics, self.config.feature_dimensions)
251+
existing_fitness = get_fitness_score(
252+
existing_program.metrics, self.config.feature_dimensions
253+
)
252254
logger.info(
253255
"MAP-Elites cell improved: %s (fitness: %.3f -> %.3f)",
254256
coords_dict,
@@ -290,7 +292,7 @@ def add(
290292
else:
291293
# No parent and no target specified, use current island
292294
island_idx = self.current_island
293-
295+
294296
island_idx = island_idx % len(self.islands) # Ensure valid island
295297
self.islands[island_idx].add(program.id)
296298

@@ -547,7 +549,7 @@ def load(self, path: str) -> None:
547549
self.current_island = metadata.get("current_island", 0)
548550
self.island_generations = metadata.get("island_generations", [0] * len(saved_islands))
549551
self.last_migration_generation = metadata.get("last_migration_generation", 0)
550-
552+
551553
# Load feature_stats for MAP-Elites grid stability
552554
self.feature_stats = self._deserialize_feature_stats(metadata.get("feature_stats", {}))
553555

@@ -839,7 +841,7 @@ def _feature_coords_to_key(self, coords: List[int]) -> str:
839841
def _is_better(self, program1: Program, program2: Program) -> bool:
840842
"""
841843
Determine if program1 has better FITNESS than program2
842-
844+
843845
Uses fitness calculation that excludes MAP-Elites feature dimensions
844846
to prevent pollution of fitness comparisons.
845847
@@ -901,7 +903,8 @@ def _update_archive(self, program: Program) -> None:
901903
# Find worst program among valid programs
902904
if valid_archive_programs:
903905
worst_program = min(
904-
valid_archive_programs, key=lambda p: get_fitness_score(p.metrics, self.config.feature_dimensions)
906+
valid_archive_programs,
907+
key=lambda p: get_fitness_score(p.metrics, self.config.feature_dimensions),
905908
)
906909

907910
# Replace if new program is better
@@ -1848,7 +1851,7 @@ def _scale_feature_value_minmax(self, feature_name: str, value: float) -> float:
18481851
def _serialize_feature_stats(self) -> Dict[str, Any]:
18491852
"""
18501853
Serialize feature_stats for JSON storage
1851-
1854+
18521855
Returns:
18531856
Dictionary that can be JSON-serialized
18541857
"""
@@ -1866,26 +1869,28 @@ def _serialize_feature_stats(self) -> Dict[str, Any]:
18661869
serialized_stats[key] = value
18671870
else:
18681871
# Convert numpy types to Python native types
1869-
if hasattr(value, 'item'): # numpy scalar
1872+
if hasattr(value, "item"): # numpy scalar
18701873
serialized_stats[key] = value.item()
18711874
else:
18721875
serialized_stats[key] = value
18731876
serialized[feature_name] = serialized_stats
18741877
return serialized
1875-
1876-
def _deserialize_feature_stats(self, stats_dict: Dict[str, Any]) -> Dict[str, Dict[str, Union[float, List[float]]]]:
1878+
1879+
def _deserialize_feature_stats(
1880+
self, stats_dict: Dict[str, Any]
1881+
) -> Dict[str, Dict[str, Union[float, List[float]]]]:
18771882
"""
18781883
Deserialize feature_stats from loaded JSON
1879-
1884+
18801885
Args:
18811886
stats_dict: Dictionary loaded from JSON
1882-
1887+
18831888
Returns:
18841889
Properly formatted feature_stats dictionary
18851890
"""
18861891
if not stats_dict:
18871892
return {}
1888-
1893+
18891894
deserialized = {}
18901895
for feature_name, stats in stats_dict.items():
18911896
if isinstance(stats, dict):
@@ -1897,8 +1902,10 @@ def _deserialize_feature_stats(self, stats_dict: Dict[str, Any]) -> Dict[str, Di
18971902
}
18981903
deserialized[feature_name] = deserialized_stats
18991904
else:
1900-
logger.warning(f"Skipping malformed feature_stats entry for '{feature_name}': {stats}")
1901-
1905+
logger.warning(
1906+
f"Skipping malformed feature_stats entry for '{feature_name}': {stats}"
1907+
)
1908+
19021909
return deserialized
19031910

19041911
def log_island_status(self) -> None:

openevolve/evaluation_result.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ class EvaluationResult:
1515
This maintains backward compatibility with the existing dict[str, float] contract
1616
while adding a side-channel for arbitrary artifacts (text or binary data).
1717
18-
IMPORTANT: For custom MAP-Elites features, metrics values must be raw continuous
19-
scores (e.g., actual counts, percentages, continuous measurements), NOT pre-computed
18+
IMPORTANT: For custom MAP-Elites features, metrics values must be raw continuous
19+
scores (e.g., actual counts, percentages, continuous measurements), NOT pre-computed
2020
bin indices. The database handles all binning internally using min-max scaling.
2121
2222
Examples:

openevolve/evaluator.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def __init__(
4444
llm_ensemble: Optional[LLMEnsemble] = None,
4545
prompt_sampler: Optional[PromptSampler] = None,
4646
database: Optional[ProgramDatabase] = None,
47-
suffix: Optional[str]=".py",
47+
suffix: Optional[str] = ".py",
4848
):
4949
self.config = config
5050
self.evaluation_file = evaluation_file
@@ -565,7 +565,9 @@ async def _llm_evaluate(self, program_code: str, program_id: str = "") -> Dict[s
565565
# Create prompt for LLM
566566
feature_dimensions = self.database.config.feature_dimensions if self.database else []
567567
prompt = self.prompt_sampler.build_prompt(
568-
current_program=program_code, template_key="evaluation", feature_dimensions=feature_dimensions
568+
current_program=program_code,
569+
template_key="evaluation",
570+
feature_dimensions=feature_dimensions,
569571
)
570572

571573
# Get LLM response

openevolve/llm/openai.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -70,20 +70,24 @@ async def generate_with_context(
7070
# These models don't support temperature/top_p and use different parameters
7171
OPENAI_REASONING_MODEL_PREFIXES = (
7272
# O-series reasoning models
73-
"o1-", "o1", # o1, o1-mini, o1-preview
74-
"o3-", "o3", # o3, o3-mini, o3-pro
75-
"o4-", # o4-mini
73+
"o1-",
74+
"o1", # o1, o1-mini, o1-preview
75+
"o3-",
76+
"o3", # o3, o3-mini, o3-pro
77+
"o4-", # o4-mini
7678
# GPT-5 series are also reasoning models
77-
"gpt-5-", "gpt-5", # gpt-5, gpt-5-mini, gpt-5-nano
79+
"gpt-5-",
80+
"gpt-5", # gpt-5, gpt-5-mini, gpt-5-nano
7881
# The GPT OSS series are also reasoning models
79-
"gpt-oss-120b", "gpt-oss-20b"
82+
"gpt-oss-120b",
83+
"gpt-oss-20b",
8084
)
8185

8286
# Check if this is an OpenAI reasoning model
8387
model_lower = str(self.model).lower()
8488
is_openai_reasoning_model = (
85-
self.api_base == "https://api.openai.com/v1" and
86-
model_lower.startswith(OPENAI_REASONING_MODEL_PREFIXES)
89+
self.api_base == "https://api.openai.com/v1"
90+
and model_lower.startswith(OPENAI_REASONING_MODEL_PREFIXES)
8791
)
8892

8993
if is_openai_reasoning_model:

0 commit comments

Comments
 (0)