Skip to content

Commit 0490d6e

Browse files
committed
Prompt logging to database added
1 parent ef07099 commit 0490d6e

File tree

5 files changed

+109
-6
lines changed

5 files changed

+109
-6
lines changed

configs/default_config.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ database:
7171
# General settings
7272
db_path: null # Path to persist database (null = in-memory only)
7373
in_memory: true # Keep database in memory for faster access
74+
log_prompts: true # If true, log all prompts and responses into the database
7475

7576
# Evolutionary parameters
7677
population_size: 1000 # Maximum number of programs to keep in memory

openevolve/config.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,9 @@ class DatabaseConfig:
142142
db_path: Optional[str] = None # Path to store database on disk
143143
in_memory: bool = True
144144

145+
# Prompt and response logging to programs/<id>.json
146+
log_prompts: bool = True
147+
145148
# Evolutionary parameters
146149
population_size: int = 1000
147150
archive_size: int = 100
@@ -308,6 +311,7 @@ def to_dict(self) -> Dict[str, Any]:
308311
"migration_interval": self.database.migration_interval,
309312
"migration_rate": self.database.migration_rate,
310313
"random_seed": self.database.random_seed,
314+
"log_prompts": self.database.log_prompts,
311315
},
312316
"evaluator": {
313317
"timeout": self.evaluator.timeout,

openevolve/controller.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99
import time
1010
import uuid
1111
from pathlib import Path
12-
from typing import Any, Dict, List, Optional, Tuple, Union
12+
from typing import List, Optional, Tuple, Union
13+
import traceback
1314

1415
from openevolve.config import Config, load_config
1516
from openevolve.database import Program, ProgramDatabase
@@ -114,6 +115,7 @@ def __init__(
114115
evaluation_file,
115116
self.llm_evaluator_ensemble,
116117
self.evaluator_prompt_sampler,
118+
database=self.database,
117119
)
118120

119121
logger.info(f"Initialized OpenEvolve with {initial_program_path} " f"and {evaluation_file}")
@@ -307,10 +309,30 @@ async def run(
307309
# Add to database (will be added to current island)
308310
self.database.add(child_program, iteration=i + 1)
309311

312+
# Log prompts
313+
self.database.log_prompt(
314+
template_key=(
315+
"full_rewrite_user" if self.config.allow_full_rewrites else "diff_user"
316+
),
317+
program_id=child_id,
318+
prompt=prompt,
319+
responses=[llm_response],
320+
)
321+
310322
# Store artifacts if they exist
311323
if artifacts:
312324
self.database.store_artifacts(child_id, artifacts)
313325

326+
# Log prompts
327+
self.database.log_prompt(
328+
template_key=(
329+
"full_rewrite_user" if self.config.allow_full_rewrites else "diff_user"
330+
),
331+
program_id=child_id,
332+
prompt=prompt,
333+
responses=[llm_response],
334+
)
335+
314336
# Increment generation for current island
315337
self.database.increment_island_generation()
316338

@@ -347,6 +369,7 @@ async def run(
347369

348370
except Exception as e:
349371
logger.error(f"Error in iteration {i+1}: {str(e)}")
372+
traceback.print_exc()
350373
continue
351374

352375
# Get the best program using our tracking mechanism

openevolve/database.py

Lines changed: 55 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,9 @@ def __init__(self, config: DatabaseConfig):
104104
if config.db_path and os.path.exists(config.db_path):
105105
self.load(config.db_path)
106106

107+
# Prompt log
108+
self.prompts_by_program: Dict[str, Dict[str, Dict[str, str]]] = None
109+
107110
# Set random seed for reproducible sampling if specified
108111
if config.random_seed is not None:
109112
import random
@@ -314,7 +317,14 @@ def save(self, path: Optional[str] = None, iteration: int = 0) -> None:
314317

315318
# Save each program
316319
for program in self.programs.values():
317-
self._save_program(program, save_path)
320+
prompts = None
321+
if (
322+
self.config.log_prompts
323+
and self.prompts_by_program
324+
and program.id in self.prompts_by_program
325+
):
326+
prompts = self.prompts_by_program[program.id]
327+
self._save_program(program, save_path, prompts=prompts)
318328

319329
# Save metadata
320330
metadata = {
@@ -382,13 +392,19 @@ def load(self, path: str) -> None:
382392

383393
logger.info(f"Loaded database with {len(self.programs)} programs from {path}")
384394

385-
def _save_program(self, program: Program, base_path: Optional[str] = None) -> None:
395+
def _save_program(
396+
self,
397+
program: Program,
398+
base_path: Optional[str] = None,
399+
prompts: Optional[Dict[str, Dict[str, str]]] = None,
400+
) -> None:
386401
"""
387402
Save a program to disk
388403
389404
Args:
390405
program: Program to save
391406
base_path: Base path to save to (uses config.db_path if None)
407+
prompts: Optional prompts to save with the program, in the format {template_key: { 'system': str, 'user': str }}
392408
"""
393409
save_path = base_path or self.config.db_path
394410
if not save_path:
@@ -399,9 +415,13 @@ def _save_program(self, program: Program, base_path: Optional[str] = None) -> No
399415
os.makedirs(programs_dir, exist_ok=True)
400416

401417
# Save program
418+
program_dict = program.to_dict()
419+
if prompts:
420+
program_dict["prompts"] = prompts
402421
program_path = os.path.join(programs_dir, f"{program.id}.json")
422+
403423
with open(program_path, "w") as f:
404-
json.dump(program.to_dict(), f)
424+
json.dump(program_dict, f)
405425

406426
def _calculate_feature_coords(self, program: Program) -> List[int]:
407427
"""
@@ -1079,3 +1099,35 @@ def _load_artifact_dir(self, artifact_dir: str) -> Dict[str, Union[str, bytes]]:
10791099
logger.warning(f"Failed to list artifact directory {artifact_dir}: {e}")
10801100

10811101
return artifacts
1102+
1103+
def log_prompt(
1104+
self,
1105+
program_id: str,
1106+
template_key: str,
1107+
prompt: Dict[str, str],
1108+
responses: Optional[List[str]] = None,
1109+
) -> None:
1110+
"""
1111+
Log a prompt for a program.
1112+
Only logs if self.config.log_prompts is True.
1113+
1114+
Args:
1115+
program_id: ID of the program to log the prompt for
1116+
template_key: Key for the prompt template
1117+
prompt: Prompts in the format {template_key: { 'system': str, 'user': str }}.
1118+
responses: Optional list of responses to the prompt, if available.
1119+
"""
1120+
1121+
if not self.config.log_prompts:
1122+
return
1123+
1124+
if responses is None:
1125+
responses = []
1126+
prompt["responses"] = responses
1127+
1128+
if self.prompts_by_program is None:
1129+
self.prompts_by_program = {}
1130+
1131+
if program_id not in self.prompts_by_program:
1132+
self.prompts_by_program[program_id] = {}
1133+
self.prompts_by_program[program_id][template_key] = prompt

openevolve/evaluator.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818
import traceback
1919

2020
from openevolve.config import EvaluatorConfig
21+
from openevolve.database import ProgramDatabase
2122
from openevolve.evaluation_result import EvaluationResult
23+
from openevolve.database import ProgramDatabase
2224
from openevolve.llm.ensemble import LLMEnsemble
2325
from openevolve.utils.async_utils import TaskPool, run_in_executor
2426
from openevolve.prompt.sampler import PromptSampler
@@ -41,11 +43,13 @@ def __init__(
4143
evaluation_file: str,
4244
llm_ensemble: Optional[LLMEnsemble] = None,
4345
prompt_sampler: Optional[PromptSampler] = None,
46+
database: Optional[ProgramDatabase] = None,
4447
):
4548
self.config = config
4649
self.evaluation_file = evaluation_file
4750
self.llm_ensemble = llm_ensemble
4851
self.prompt_sampler = prompt_sampler
52+
self.database = database
4953

5054
# Create a task pool for parallel evaluation
5155
self.task_pool = TaskPool(max_concurrency=config.parallel_evaluations)
@@ -127,7 +131,7 @@ async def evaluate_program(
127131
# Add LLM feedback if configured
128132
llm_eval_result = None
129133
if self.config.use_llm_feedback and self.llm_ensemble:
130-
llm_result = await self._llm_evaluate(program_code)
134+
llm_result = await self._llm_evaluate(program_code, program_id=program_id)
131135
llm_eval_result = self._process_evaluation_result(llm_result)
132136

133137
# Combine metrics
@@ -148,9 +152,17 @@ async def evaluate_program(
148152
# Merge eval_result artifacts with llm artifacts if they exist
149153
if eval_result.has_artifacts():
150154
self._pending_artifacts[program_id].update(eval_result.artifacts)
155+
logger.debug(
156+
f"Program{program_id_str} returned artifacts: "
157+
f"{eval_result.artifacts}"
158+
)
151159

152160
if llm_eval_result and llm_eval_result.has_artifacts():
153161
self._pending_artifacts[program_id].update(llm_eval_result.artifacts)
162+
logger.debug(
163+
f"Program{program_id_str} returned LLM artifacts: "
164+
f"{llm_eval_result.artifacts}"
165+
)
154166

155167
elapsed = time.time() - start_time
156168
logger.info(
@@ -166,6 +178,7 @@ async def evaluate_program(
166178
logger.warning(
167179
f"Evaluation attempt {attempt + 1}/{self.config.max_retries + 1} failed for program{program_id_str}: {str(e)}"
168180
)
181+
traceback.print_exc()
169182

170183
# Capture failure artifacts if enabled
171184
if artifacts_enabled and program_id:
@@ -382,12 +395,13 @@ async def _cascade_evaluate(
382395
},
383396
)
384397

385-
async def _llm_evaluate(self, program_code: str) -> Dict[str, float]:
398+
async def _llm_evaluate(self, program_code: str, program_id: str = "") -> Dict[str, float]:
386399
"""
387400
Use LLM to evaluate code quality
388401
389402
Args:
390403
program_code: Code to evaluate
404+
program_id: Optional ID for logging
391405
392406
Returns:
393407
Dictionary of metric name to score
@@ -406,6 +420,15 @@ async def _llm_evaluate(self, program_code: str) -> Dict[str, float]:
406420
prompt["system"], [{"role": "user", "content": prompt["user"]}]
407421
)
408422

423+
# Log prompt and response to database
424+
if self.database and program_id:
425+
self.database.log_prompt(
426+
program_id=program_id,
427+
template_key="evaluation",
428+
prompt=prompt,
429+
responses=responses,
430+
)
431+
409432
# Extract JSON from response
410433
try:
411434
# Try to find JSON block

0 commit comments

Comments
 (0)