Skip to content

Commit 5782888

Browse files
committed
more config fixes
1 parent 33ccfc1 commit 5782888

File tree

6 files changed

+237
-72
lines changed

6 files changed

+237
-72
lines changed

configs/default_config.yaml

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,7 @@ prompt:
5555
- "I suggest the following improvements:"
5656
- "We can enhance this code by:"
5757

58-
# Meta-prompting (experimental)
59-
use_meta_prompting: false # Use LLM to generate parts of the prompt
60-
meta_prompt_weight: 0.1 # Weight for meta-prompting influence
58+
# Note: meta-prompting features are not yet implemented
6159

6260
# Database configuration
6361
database:
@@ -80,7 +78,7 @@ database:
8078
elite_selection_ratio: 0.1 # Ratio of elite programs to select
8179
exploration_ratio: 0.2 # Ratio of exploration vs exploitation
8280
exploitation_ratio: 0.7 # Ratio of exploitation vs random selection
83-
diversity_metric: "edit_distance" # Diversity metric (edit_distance, feature_based)
81+
# Note: diversity_metric is fixed to "edit_distance" (feature_based not implemented)
8482

8583
# Feature map dimensions for MAP-Elites
8684
feature_dimensions: # Dimensions for MAP-Elites feature map
@@ -94,9 +92,7 @@ evaluator:
9492
timeout: 300 # Maximum evaluation time in seconds
9593
max_retries: 3 # Maximum number of retries for evaluation
9694

97-
# Resource limits
98-
memory_limit_mb: null # Memory limit for evaluation (null = no limit)
99-
cpu_limit: null # CPU limit for evaluation (null = no limit)
95+
# Note: resource limits (memory_limit_mb, cpu_limit) are not yet implemented
10096

10197
# Evaluation strategies
10298
cascade_evaluation: true # Use cascade evaluation to filter bad solutions early
@@ -107,7 +103,7 @@ evaluator:
107103

108104
# Parallel evaluation
109105
parallel_evaluations: 4 # Number of parallel evaluations
110-
distributed: false # Use distributed evaluation
106+
# Note: distributed evaluation is not yet implemented
111107

112108
# LLM-based feedback (experimental)
113109
use_llm_feedback: false # Use LLM to evaluate code quality

configs/island_config_example.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ database:
3030
elite_selection_ratio: 0.1
3131
exploration_ratio: 0.3
3232
exploitation_ratio: 0.7
33+
# Note: diversity_metric fixed to "edit_distance"
3334

3435
# Feature map dimensions for MAP-Elites
3536
feature_dimensions: ["score", "complexity"]

openevolve/config.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -195,8 +195,9 @@ def to_dict(self) -> Dict[str, Any]:
195195
"num_diverse_programs": self.prompt.num_diverse_programs,
196196
"use_template_stochasticity": self.prompt.use_template_stochasticity,
197197
"template_variations": self.prompt.template_variations,
198-
"use_meta_prompting": self.prompt.use_meta_prompting,
199-
"meta_prompt_weight": self.prompt.meta_prompt_weight,
198+
# Note: meta-prompting features not implemented
199+
# "use_meta_prompting": self.prompt.use_meta_prompting,
200+
# "meta_prompt_weight": self.prompt.meta_prompt_weight,
200201
},
201202
"database": {
202203
"db_path": self.database.db_path,
@@ -207,7 +208,8 @@ def to_dict(self) -> Dict[str, Any]:
207208
"elite_selection_ratio": self.database.elite_selection_ratio,
208209
"exploration_ratio": self.database.exploration_ratio,
209210
"exploitation_ratio": self.database.exploitation_ratio,
210-
"diversity_metric": self.database.diversity_metric,
211+
# Note: diversity_metric fixed to "edit_distance"
212+
# "diversity_metric": self.database.diversity_metric,
211213
"feature_dimensions": self.database.feature_dimensions,
212214
"feature_bins": self.database.feature_bins,
213215
"migration_interval": self.database.migration_interval,
@@ -217,12 +219,14 @@ def to_dict(self) -> Dict[str, Any]:
217219
"evaluator": {
218220
"timeout": self.evaluator.timeout,
219221
"max_retries": self.evaluator.max_retries,
220-
"memory_limit_mb": self.evaluator.memory_limit_mb,
221-
"cpu_limit": self.evaluator.cpu_limit,
222+
# Note: resource limits not implemented
223+
# "memory_limit_mb": self.evaluator.memory_limit_mb,
224+
# "cpu_limit": self.evaluator.cpu_limit,
222225
"cascade_evaluation": self.evaluator.cascade_evaluation,
223226
"cascade_thresholds": self.evaluator.cascade_thresholds,
224227
"parallel_evaluations": self.evaluator.parallel_evaluations,
225-
"distributed": self.evaluator.distributed,
228+
# Note: distributed evaluation not implemented
229+
# "distributed": self.evaluator.distributed,
226230
"use_llm_feedback": self.evaluator.use_llm_feedback,
227231
"llm_feedback_weight": self.evaluator.llm_feedback_weight,
228232
},

openevolve/database.py

Lines changed: 124 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,9 @@ def add(
130130

131131
self.programs[program.id] = program
132132

133+
# Enforce population size limit
134+
self._enforce_population_limit()
135+
133136
# Calculate feature coordinates for MAP-Elites
134137
feature_coords = self._calculate_feature_coords(program)
135138

@@ -552,25 +555,23 @@ def _sample_parent(self) -> Program:
552555
Returns:
553556
Parent program from current island
554557
"""
555-
# Decide between exploitation and exploration
556-
if random.random() < self.config.exploitation_ratio and self.archive:
557-
# Even for exploitation, prefer programs from current island
558-
archive_programs_in_island = [
559-
pid
560-
for pid in self.archive
561-
if pid in self.programs
562-
and self.programs[pid].metadata.get("island") == self.current_island
563-
]
564-
565-
if archive_programs_in_island:
566-
parent_id = random.choice(archive_programs_in_island)
567-
return self.programs[parent_id]
568-
else:
569-
# Fall back to any archive program if current island has none
570-
parent_id = random.choice(list(self.archive))
571-
return self.programs[parent_id]
558+
# Use exploration_ratio and exploitation_ratio to decide sampling strategy
559+
rand_val = random.random()
560+
561+
if rand_val < self.config.exploration_ratio:
562+
# EXPLORATION: Sample from current island (diverse sampling)
563+
return self._sample_exploration_parent()
564+
elif rand_val < self.config.exploration_ratio + self.config.exploitation_ratio:
565+
# EXPLOITATION: Sample from archive (elite programs)
566+
return self._sample_exploitation_parent()
567+
else:
568+
# RANDOM: Sample from any program (remaining probability)
569+
return self._sample_random_parent()
572570

573-
# Exploration: Sample from current island only
571+
def _sample_exploration_parent(self) -> Program:
572+
"""
573+
Sample a parent for exploration (from current island)
574+
"""
574575
current_island_programs = self.islands[self.current_island]
575576

576577
if not current_island_programs:
@@ -589,6 +590,41 @@ def _sample_parent(self) -> Program:
589590
# Sample from current island
590591
parent_id = random.choice(list(current_island_programs))
591592
return self.programs[parent_id]
593+
594+
def _sample_exploitation_parent(self) -> Program:
595+
"""
596+
Sample a parent for exploitation (from archive/elite programs)
597+
"""
598+
if not self.archive:
599+
# Fallback to exploration if no archive
600+
return self._sample_exploration_parent()
601+
602+
# Prefer programs from current island in archive
603+
archive_programs_in_island = [
604+
pid
605+
for pid in self.archive
606+
if pid in self.programs
607+
and self.programs[pid].metadata.get("island") == self.current_island
608+
]
609+
610+
if archive_programs_in_island:
611+
parent_id = random.choice(archive_programs_in_island)
612+
return self.programs[parent_id]
613+
else:
614+
# Fall back to any archive program if current island has none
615+
parent_id = random.choice(list(self.archive))
616+
return self.programs[parent_id]
617+
618+
def _sample_random_parent(self) -> Program:
619+
"""
620+
Sample a completely random parent from all programs
621+
"""
622+
if not self.programs:
623+
raise ValueError("No programs available for sampling")
624+
625+
# Sample randomly from all programs
626+
program_id = random.choice(list(self.programs.keys()))
627+
return self.programs[program_id]
592628

593629
def _sample_inspirations(self, parent: Program, n: int = 5) -> List[Program]:
594630
"""
@@ -616,14 +652,17 @@ def _sample_inspirations(self, parent: Program, n: int = 5) -> List[Program]:
616652
if program.id not in [p.id for p in inspirations] and program.id != parent.id:
617653
inspirations.append(program)
618654

619-
# Add diverse programs
655+
# Add diverse programs using config.num_diverse_programs
620656
if len(self.programs) > n and len(inspirations) < n:
621-
# Sample from different feature cells
657+
# Calculate how many diverse programs to add (up to remaining slots)
658+
remaining_slots = n - len(inspirations)
659+
660+
# Sample from different feature cells for diversity
622661
feature_coords = self._calculate_feature_coords(parent)
623662

624663
# Get programs from nearby feature cells
625664
nearby_programs = []
626-
for _ in range(n - len(inspirations)):
665+
for _ in range(remaining_slots):
627666
# Perturb coordinates
628667
perturbed_coords = [
629668
max(0, min(self.feature_bins - 1, c + random.randint(-1, 1)))
@@ -657,6 +696,70 @@ def _sample_inspirations(self, parent: Program, n: int = 5) -> List[Program]:
657696

658697
return inspirations[:n]
659698

699+
def _enforce_population_limit(self) -> None:
700+
"""
701+
Enforce the population size limit by removing worst programs if needed
702+
"""
703+
if len(self.programs) <= self.config.population_size:
704+
return
705+
706+
# Calculate how many programs to remove
707+
num_to_remove = len(self.programs) - self.config.population_size
708+
709+
logger.info(f"Population size ({len(self.programs)}) exceeds limit ({self.config.population_size}), removing {num_to_remove} programs")
710+
711+
# Get programs sorted by fitness (worst first)
712+
all_programs = list(self.programs.values())
713+
714+
# Sort by average metric (worst first)
715+
sorted_programs = sorted(
716+
all_programs,
717+
key=lambda p: sum(p.metrics.values()) / max(1, len(p.metrics)) if p.metrics else 0.0
718+
)
719+
720+
# Remove worst programs, but never remove the best program
721+
programs_to_remove = []
722+
for program in sorted_programs:
723+
if len(programs_to_remove) >= num_to_remove:
724+
break
725+
# Don't remove the best program
726+
if program.id != self.best_program_id:
727+
programs_to_remove.append(program)
728+
729+
# If we still need to remove more and only have the best program protected,
730+
# remove from the remaining programs anyway (but keep the absolute best)
731+
if len(programs_to_remove) < num_to_remove:
732+
remaining_programs = [p for p in sorted_programs if p not in programs_to_remove and p.id != self.best_program_id]
733+
additional_removals = remaining_programs[:num_to_remove - len(programs_to_remove)]
734+
programs_to_remove.extend(additional_removals)
735+
736+
# Remove the selected programs
737+
for program in programs_to_remove:
738+
program_id = program.id
739+
740+
# Remove from main programs dict
741+
if program_id in self.programs:
742+
del self.programs[program_id]
743+
744+
# Remove from feature map
745+
keys_to_remove = []
746+
for key, pid in self.feature_map.items():
747+
if pid == program_id:
748+
keys_to_remove.append(key)
749+
for key in keys_to_remove:
750+
del self.feature_map[key]
751+
752+
# Remove from islands
753+
for island in self.islands:
754+
island.discard(program_id)
755+
756+
# Remove from archive
757+
self.archive.discard(program_id)
758+
759+
logger.debug(f"Removed program {program_id} due to population limit")
760+
761+
logger.info(f"Population size after cleanup: {len(self.programs)}")
762+
660763
# Island management methods
661764
def set_current_island(self, island_idx: int) -> None:
662765
"""Set which island is currently being evolved"""

openevolve/evaluator.py

Lines changed: 47 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -89,46 +89,57 @@ async def evaluate_program(
8989
Dictionary of metric name to score
9090
"""
9191
start_time = time.time()
92+
program_id_str = f" {program_id}" if program_id else ""
93+
94+
# Retry logic for evaluation
95+
last_exception = None
96+
for attempt in range(self.config.max_retries + 1):
97+
# Create a temporary file for the program
98+
with tempfile.NamedTemporaryFile(suffix=".py", delete=False) as temp_file:
99+
temp_file.write(program_code.encode("utf-8"))
100+
temp_file_path = temp_file.name
92101

93-
# Create a temporary file for the program
94-
with tempfile.NamedTemporaryFile(suffix=".py", delete=False) as temp_file:
95-
temp_file.write(program_code.encode("utf-8"))
96-
temp_file_path = temp_file.name
102+
try:
103+
# Run evaluation
104+
if self.config.cascade_evaluation:
105+
# Run cascade evaluation
106+
metrics = await self._cascade_evaluate(temp_file_path)
107+
else:
108+
# Run direct evaluation
109+
metrics = await self._direct_evaluate(temp_file_path)
97110

98-
try:
99-
# Run evaluation
100-
if self.config.cascade_evaluation:
101-
# Run cascade evaluation
102-
metrics = await self._cascade_evaluate(temp_file_path)
103-
else:
104-
# Run direct evaluation
105-
metrics = await self._direct_evaluate(temp_file_path)
106-
107-
# Add LLM feedback if configured
108-
if self.config.use_llm_feedback and self.llm_ensemble:
109-
feedback_metrics = await self._llm_evaluate(program_code)
110-
111-
# Combine metrics
112-
for name, value in feedback_metrics.items():
113-
metrics[f"llm_{name}"] = value * self.config.llm_feedback_weight
114-
115-
elapsed = time.time() - start_time
116-
program_id_str = f" {program_id}" if program_id else ""
117-
logger.info(
118-
f"Evaluated program{program_id_str} in {elapsed:.2f}s: "
119-
f"{', '.join(f'{name}={value:.4f}' for name, value in metrics.items())}"
120-
)
121-
122-
return metrics
111+
# Add LLM feedback if configured
112+
if self.config.use_llm_feedback and self.llm_ensemble:
113+
feedback_metrics = await self._llm_evaluate(program_code)
123114

124-
except Exception as e:
125-
logger.error(f"Error evaluating program: {str(e)}")
126-
return {"error": 0.0}
115+
# Combine metrics
116+
for name, value in feedback_metrics.items():
117+
metrics[f"llm_{name}"] = value * self.config.llm_feedback_weight
127118

128-
finally:
129-
# Clean up temporary file
130-
if os.path.exists(temp_file_path):
131-
os.unlink(temp_file_path)
119+
elapsed = time.time() - start_time
120+
logger.info(
121+
f"Evaluated program{program_id_str} in {elapsed:.2f}s: "
122+
f"{', '.join(f'{name}={value:.4f}' for name, value in metrics.items())}"
123+
)
124+
125+
return metrics
126+
127+
except Exception as e:
128+
last_exception = e
129+
logger.warning(f"Evaluation attempt {attempt + 1}/{self.config.max_retries + 1} failed for program{program_id_str}: {str(e)}")
130+
131+
# If this is not the last attempt, wait a bit before retrying
132+
if attempt < self.config.max_retries:
133+
await asyncio.sleep(1.0) # Wait 1 second before retry
134+
135+
finally:
136+
# Clean up temporary file
137+
if os.path.exists(temp_file_path):
138+
os.unlink(temp_file_path)
139+
140+
# All retries failed
141+
logger.error(f"All evaluation attempts failed for program{program_id_str}. Last error: {str(last_exception)}")
142+
return {"error": 0.0}
132143

133144
@run_in_executor
134145
def _direct_evaluate(self, program_path: str) -> Dict[str, float]:

0 commit comments

Comments
 (0)