-
Notifications
You must be signed in to change notification settings - Fork 666
Closed
Labels
questionFurther information is requestedFurther information is requested
Description
To verify whether the evolving process helps, I compared it with a baseline parallel ablation that only includes the initial program as a prompt for each iteration. I selected the best-of-N as the best program and verified this on the circle_packing problem. Following a two-stage setting, I used the initial program in stage 1, and in stage 2, I used the best program from checkpoint 100 as the initial program. The results show that even without the evaluation process, history, and evolution tree, the system performs well.
Best Evolve score: 0.9927
Best Parallel score: 0.9924
This ablation study raises doubts about the necessity of incorporating the evolutionary process.
The parallel template is:
PARALLEL_TEMPLATE = '''\
# Current Program
```{language}
{current_program}
```
# Task
Rewrite the program to improve its performance on the specified metrics.
Provide the complete new program code.
IMPORTANT: Make sure your rewritten program maintains the same inputs and outputs
as the original program, but with improved internal implementation.
```{language}
# Your rewritten program here
```
'''This is code for parallel run:
async def run_parallel(
self,
iterations: Optional[int] = None,
target_score: Optional[float] = None,
) -> Program:
max_iterations = iterations or self.config.max_iterations
# Always start fresh - no resuming for parallel mode
start_iteration = 0
# Add initial program to database
logger.info("Adding initial program to database for parallel evolution")
initial_program_id = str(uuid.uuid4())
# Evaluate the initial program
initial_metrics = await self.evaluator.evaluate_program(
self.initial_program_code, initial_program_id
)
initial_program = Program(
id=initial_program_id,
code=self.initial_program_code,
language=self.language,
metrics=initial_metrics,
iteration_found=start_iteration,
)
# Create a simple database for parallel mode (no islands)
all_programs = [initial_program]
best_program = initial_program
logger.info(
f"Starting parallel evolution for {max_iterations} iterations"
)
logger.info(f"Initial program metrics: {format_metrics_safe(initial_metrics)}")
for i in range(max_iterations):
logger.info(f"############### PARALLEL_ITERATION {i+1} ############### ")
iteration_start = time.time()
# Build prompt always based on initial program (no evolution)
prompt = self.prompt_sampler.build_prompt(
current_program=self.initial_program_code,
parent_program=self.initial_program_code,
program_metrics=initial_metrics,
previous_programs=[], # No previous programs in parallel mode
top_programs=[], # No top programs evolution
language=self.language,
evolution_round=i,
allow_full_rewrite=True, # Always allow full rewrite in parallel mode
program_artifacts=None,
template_key="parallel"
)
logger.info(
f"------ PROMPT ------\n"
f"Iteration {i+1}: Generating independent program from initial code\n"
f"System prompt: {prompt['system']}\n"
f"User prompt: {prompt['user']}\n"
f"------------------"
)
# Generate new program independently
llm_response = await self.llm_ensemble.generate_with_context(
system_message=prompt["system"],
messages=[{"role": "user", "content": prompt["user"]}],
)
# Parse the response as full rewrite (always full rewrite in parallel mode)
new_code = parse_full_rewrite(llm_response, self.language)
if not new_code:
logger.warning(f"Iteration {i+1}: No valid code found in response")
continue
# Check code length
if len(new_code) > self.config.max_code_length:
logger.warning(
f"Iteration {i+1}: Generated code exceeds maximum length "
f"({len(new_code)} > {self.config.max_code_length})"
)
continue
# Evaluate the new program
program_id = str(uuid.uuid4())
program_metrics = await self.evaluator.evaluate_program(new_code, program_id)
# Create new program (no parent-child relationship)
new_program = Program(
id=program_id,
code=new_code,
language=self.language,
parent_id=None, # No parent in parallel mode
generation=0, # All programs are generation 0
metrics=program_metrics,
iteration_found=i + 1,
metadata={
"changes": "Independent generation from initial program",
"mode": "parallel",
},
)
logger.info("------------------ GENERATED PROGRAM --------------")
logger.info(f"Generated program {program_id}:")
logger.info(f"Code:\n{new_code}")
logger.info("------------------------------------------------")
# Add to our list of programs
all_programs.append(new_program)
# Update best program using best-of-N selection
if self._is_better_program(new_program, best_program):
best_program = new_program
logger.info(
f"π New best solution found at iteration {i+1}: {new_program.id}"
)
logger.info(f"Metrics: {format_metrics_safe(new_program.metrics)}")
# Log progress
iteration_time = time.time() - iteration_start
improvement_str = format_improvement_safe(initial_metrics, program_metrics)
logger.info(
f"Iteration {i+1}: Generated program {program_id} "
f"in {iteration_time:.2f}s. Metrics: "
f"{format_metrics_safe(program_metrics)} "
f"(Ξ from initial: {improvement_str})"
)
# Save checkpoint periodically
if (i + 1) % self.config.checkpoint_interval == 0:
self._save_parallel_checkpoint(i + 1, all_programs, best_program)
# Check if target score reached
if target_score is not None and 'combined_score' in program_metrics:
if program_metrics['combined_score'] >= target_score:
logger.info(f"Target score {target_score} reached after {i+1} iterations")
break
# Final best-of-N selection
logger.info(f"Parallel evolution complete. Generated {len(all_programs)} programs total.")
logger.info(f"Best program: {best_program.id}")
logger.info(f"Best program metrics: {format_metrics_safe(best_program.metrics)}")
# Save the best program
self._save_parallel_best_program(best_program, all_programs)
return best_program
def _is_better_program(self, program1: Program, program2: Program) -> bool:
"""
Compare two programs to determine which is better
Uses combined_score if available, otherwise uses a heuristic
"""
metrics1 = program1.metrics
metrics2 = program2.metrics
# Primary comparison: combined_score
if 'combined_score' in metrics1 and 'combined_score' in metrics2:
return metrics1['combined_score'] > metrics2['combined_score']
# Fallback: use other metrics (customize based on your evaluation metrics)
# This is a simple heuristic - adjust based on your specific metrics
if 'efficiency' in metrics1 and 'efficiency' in metrics2:
return metrics1['efficiency'] > metrics2['efficiency']
# Final fallback: assume program1 is not better
return False
def _save_parallel_checkpoint(self, iteration: int, all_programs: List[Program], best_program: Program) -> None:
"""
Save a checkpoint for parallel evolution
"""
checkpoint_dir = os.path.join(self.output_dir, "checkpoints")
os.makedirs(checkpoint_dir, exist_ok=True)
# Create specific checkpoint directory
checkpoint_path = os.path.join(checkpoint_dir, f"parallel_checkpoint_{iteration}")
os.makedirs(checkpoint_path, exist_ok=True)
# Save all programs as JSON
programs_data = []
for program in all_programs:
programs_data.append({
"id": program.id,
"code": program.code,
"language": program.language,
"metrics": program.metrics,
"iteration_found": program.iteration_found,
"timestamp": program.timestamp,
"metadata": program.metadata,
})
programs_file = os.path.join(checkpoint_path, "all_programs.json")
with open(programs_file, "w") as f:
json.dump(programs_data, f, indent=2)
# Save the best program at this checkpoint
if best_program:
best_program_path = os.path.join(checkpoint_path, f"best_program{self.file_extension}")
with open(best_program_path, "w") as f:
f.write(best_program.code)
# Save metrics
best_program_info_path = os.path.join(checkpoint_path, "best_program_info.json")
with open(best_program_info_path, "w") as f:
json.dump(
{
"id": best_program.id,
"metrics": best_program.metrics,
"iteration_found": best_program.iteration_found,
"current_iteration": iteration,
"language": best_program.language,
"timestamp": best_program.timestamp,
"total_programs": len(all_programs),
"saved_at": time.time(),
},
f,
indent=2,
)
logger.info(f"Saved parallel checkpoint at iteration {iteration} to {checkpoint_path}")
def _save_parallel_best_program(self, best_program: Program, all_programs: List[Program]) -> None:
"""
Save the best program from parallel evolution
"""
best_dir = os.path.join(self.output_dir, "best")
os.makedirs(best_dir, exist_ok=True)
# Save best program code
filename = f"best_program{self.file_extension}"
code_path = os.path.join(best_dir, filename)
with open(code_path, "w") as f:
f.write(best_program.code)
# Save complete program info
info_path = os.path.join(best_dir, "best_program_info.json")
with open(info_path, "w") as f:
json.dump(
{
"id": best_program.id,
"iteration_found": best_program.iteration_found,
"timestamp": best_program.timestamp,
"metrics": best_program.metrics,
"language": best_program.language,
"mode": "parallel",
"total_programs_generated": len(all_programs),
"saved_at": time.time(),
},
f,
indent=2,
)
# Save all programs for analysis
all_programs_path = os.path.join(best_dir, "all_programs.json")
programs_data = []
for program in all_programs:
programs_data.append({
"id": program.id,
"metrics": program.metrics,
"iteration_found": program.iteration_found,
"timestamp": program.timestamp,
"is_best": program.id == best_program.id,
})
with open(all_programs_path, "w") as f:
json.dump(programs_data, f, indent=2)
logger.info(f"Saved best program to {code_path} with program info to {info_path}")
logger.info(f"Saved all {len(all_programs)} programs summary to {all_programs_path}")jvm123, rujialiu and ypwang61codelion, lucidrains, rujialiu and ypwang61
Metadata
Metadata
Assignees
Labels
questionFurther information is requestedFurther information is requested
