Skip to content

Commit 37afa09

Browse files
committed
Merge latest main into parallel-iterations fork
1 parent 8312b0f commit 37afa09

File tree

2 files changed

+218
-3
lines changed

2 files changed

+218
-3
lines changed

openevolve/database.py

Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1346,6 +1346,201 @@ def _load_artifact_dir(self, artifact_dir: str) -> Dict[str, Union[str, bytes]]:
13461346

13471347
return artifacts
13481348

1349+
def log_prompt(
1350+
self,
1351+
program_id: str,
1352+
template_key: str,
1353+
prompt: Dict[str, str],
1354+
responses: Optional[List[str]] = None,
1355+
) -> None:
1356+
"""
1357+
Log a prompt for a program.
1358+
Only logs if self.config.log_prompts is True.
1359+
1360+
Args:
1361+
program_id: ID of the program to log the prompt for
1362+
template_key: Key for the prompt template
1363+
prompt: Prompts in the format {template_key: { 'system': str, 'user': str }}.
1364+
responses: Optional list of responses to the prompt, if available.
1365+
"""
1366+
1367+
if not self.config.log_prompts:
1368+
return
1369+
1370+
if responses is None:
1371+
responses = []
1372+
prompt["responses"] = responses
1373+
1374+
if self.prompts_by_program is None:
1375+
self.prompts_by_program = {}
1376+
1377+
if program_id not in self.prompts_by_program:
1378+
self.prompts_by_program[program_id] = {}
1379+
self.prompts_by_program[program_id][template_key] = prompt
1380+
1381+
# Artifact storage and retrieval methods
1382+
1383+
def store_artifacts(self, program_id: str, artifacts: Dict[str, Union[str, bytes]]) -> None:
1384+
"""
1385+
Store artifacts for a program
1386+
1387+
Args:
1388+
program_id: ID of the program
1389+
artifacts: Dictionary of artifact name to content
1390+
"""
1391+
if not artifacts:
1392+
return
1393+
1394+
program = self.get(program_id)
1395+
if not program:
1396+
logger.warning(f"Cannot store artifacts: program {program_id} not found")
1397+
return
1398+
1399+
# Check if artifacts are enabled
1400+
artifacts_enabled = os.environ.get("ENABLE_ARTIFACTS", "true").lower() == "true"
1401+
if not artifacts_enabled:
1402+
logger.debug("Artifacts disabled, skipping storage")
1403+
return
1404+
1405+
# Split artifacts by size
1406+
small_artifacts = {}
1407+
large_artifacts = {}
1408+
size_threshold = getattr(self.config, "artifact_size_threshold", 32 * 1024) # 32KB default
1409+
1410+
for key, value in artifacts.items():
1411+
size = self._get_artifact_size(value)
1412+
if size <= size_threshold:
1413+
small_artifacts[key] = value
1414+
else:
1415+
large_artifacts[key] = value
1416+
1417+
# Store small artifacts as JSON
1418+
if small_artifacts:
1419+
program.artifacts_json = json.dumps(small_artifacts, default=self._artifact_serializer)
1420+
logger.debug(f"Stored {len(small_artifacts)} small artifacts for program {program_id}")
1421+
1422+
# Store large artifacts to disk
1423+
if large_artifacts:
1424+
artifact_dir = self._create_artifact_dir(program_id)
1425+
program.artifact_dir = artifact_dir
1426+
for key, value in large_artifacts.items():
1427+
self._write_artifact_file(artifact_dir, key, value)
1428+
logger.debug(f"Stored {len(large_artifacts)} large artifacts for program {program_id}")
1429+
1430+
def get_artifacts(self, program_id: str) -> Dict[str, Union[str, bytes]]:
1431+
"""
1432+
Retrieve all artifacts for a program
1433+
1434+
Args:
1435+
program_id: ID of the program
1436+
1437+
Returns:
1438+
Dictionary of artifact name to content
1439+
"""
1440+
program = self.get(program_id)
1441+
if not program:
1442+
return {}
1443+
1444+
artifacts = {}
1445+
1446+
# Load small artifacts from JSON
1447+
if program.artifacts_json:
1448+
try:
1449+
small_artifacts = json.loads(program.artifacts_json)
1450+
artifacts.update(small_artifacts)
1451+
except json.JSONDecodeError as e:
1452+
logger.warning(f"Failed to decode artifacts JSON for program {program_id}: {e}")
1453+
1454+
# Load large artifacts from disk
1455+
if program.artifact_dir and os.path.exists(program.artifact_dir):
1456+
disk_artifacts = self._load_artifact_dir(program.artifact_dir)
1457+
artifacts.update(disk_artifacts)
1458+
1459+
return artifacts
1460+
1461+
def _get_artifact_size(self, value: Union[str, bytes]) -> int:
1462+
"""Get size of an artifact value in bytes"""
1463+
if isinstance(value, str):
1464+
return len(value.encode("utf-8"))
1465+
elif isinstance(value, bytes):
1466+
return len(value)
1467+
else:
1468+
return len(str(value).encode("utf-8"))
1469+
1470+
def _artifact_serializer(self, obj):
1471+
"""JSON serializer for artifacts that handles bytes"""
1472+
if isinstance(obj, bytes):
1473+
return {"__bytes__": base64.b64encode(obj).decode("utf-8")}
1474+
raise TypeError(f"Object of type {type(obj)} is not JSON serializable")
1475+
1476+
def _artifact_deserializer(self, dct):
1477+
"""JSON deserializer for artifacts that handles bytes"""
1478+
if "__bytes__" in dct:
1479+
return base64.b64decode(dct["__bytes__"])
1480+
return dct
1481+
1482+
def _create_artifact_dir(self, program_id: str) -> str:
1483+
"""Create artifact directory for a program"""
1484+
base_path = getattr(self.config, "artifacts_base_path", None)
1485+
if not base_path:
1486+
base_path = (
1487+
os.path.join(self.config.db_path or ".", "artifacts")
1488+
if self.config.db_path
1489+
else "./artifacts"
1490+
)
1491+
1492+
artifact_dir = os.path.join(base_path, program_id)
1493+
os.makedirs(artifact_dir, exist_ok=True)
1494+
return artifact_dir
1495+
1496+
def _write_artifact_file(self, artifact_dir: str, key: str, value: Union[str, bytes]) -> None:
1497+
"""Write an artifact to a file"""
1498+
# Sanitize filename
1499+
safe_key = "".join(c for c in key if c.isalnum() or c in "._-")
1500+
if not safe_key:
1501+
safe_key = "artifact"
1502+
1503+
file_path = os.path.join(artifact_dir, safe_key)
1504+
1505+
try:
1506+
if isinstance(value, str):
1507+
with open(file_path, "w", encoding="utf-8") as f:
1508+
f.write(value)
1509+
elif isinstance(value, bytes):
1510+
with open(file_path, "wb") as f:
1511+
f.write(value)
1512+
else:
1513+
# Convert to string and write
1514+
with open(file_path, "w", encoding="utf-8") as f:
1515+
f.write(str(value))
1516+
except Exception as e:
1517+
logger.warning(f"Failed to write artifact {key} to {file_path}: {e}")
1518+
1519+
def _load_artifact_dir(self, artifact_dir: str) -> Dict[str, Union[str, bytes]]:
1520+
"""Load artifacts from a directory"""
1521+
artifacts = {}
1522+
1523+
try:
1524+
for filename in os.listdir(artifact_dir):
1525+
file_path = os.path.join(artifact_dir, filename)
1526+
if os.path.isfile(file_path):
1527+
try:
1528+
# Try to read as text first
1529+
with open(file_path, "r", encoding="utf-8") as f:
1530+
content = f.read()
1531+
artifacts[filename] = content
1532+
except UnicodeDecodeError:
1533+
# If text fails, read as binary
1534+
with open(file_path, "rb") as f:
1535+
content = f.read()
1536+
artifacts[filename] = content
1537+
except Exception as e:
1538+
logger.warning(f"Failed to read artifact file {file_path}: {e}")
1539+
except Exception as e:
1540+
logger.warning(f"Failed to list artifact directory {artifact_dir}: {e}")
1541+
1542+
return artifacts
1543+
13491544
def log_prompt(
13501545
self,
13511546
program_id: str,

openevolve/iteration.py

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@ class Result:
2626
parent: str = None
2727
child_metrics: str = None
2828
iteration_time: float = None
29+
prompt: str = None
30+
llm_response: str = None
31+
artifacts: dict = None
32+
2933

3034

3135
def run_iteration_sync(iteration: int, config: Config, evaluation_file: str, database_path: str):
@@ -59,21 +63,32 @@ def run_iteration_sync(iteration: int, config: Config, evaluation_file: str, dat
5963
evaluation_file,
6064
llm_evaluator_ensemble,
6165
evaluator_prompt_sampler,
66+
database=database,
6267
)
6368

6469
# Sample parent and inspirations from current island
6570
parent, inspirations = database.sample()
6671

72+
# Get artifacts for the parent program if available
73+
parent_artifacts = database.get_artifacts(parent.id)
74+
75+
# Get actual top programs for prompt context (separate from inspirations)
76+
# This ensures the LLM sees only high-performing programs as examples
77+
actual_top_programs = database.get_top_programs(5)
78+
79+
6780
# Build prompt
6881
prompt = prompt_sampler.build_prompt(
6982
current_program=parent.code,
7083
parent_program=parent.code, # We don't have the parent's code, use the same
7184
program_metrics=parent.metrics,
7285
previous_programs=[p.to_dict() for p in database.get_top_programs(3)],
73-
top_programs=[p.to_dict() for p in inspirations],
86+
top_programs=[p.to_dict() for p in actual_top_programs],
87+
inspirations=[p.to_dict() for p in inspirations],
7488
language=config.language,
7589
evolution_round=iteration,
76-
allow_full_rewrite=config.allow_full_rewrites,
90+
diff_based_evolution=config.diff_based_evolution,
91+
program_artifacts=parent_artifacts if parent_artifacts else None,
7792
)
7893

7994
async def _run():
@@ -120,7 +135,8 @@ async def _run():
120135
# Evaluate the child program
121136
child_id = str(uuid.uuid4())
122137
result.child_metrics = await evaluator.evaluate_program(child_code, child_id)
123-
138+
# Handle artifacts if they exist
139+
artifacts = evaluator.get_pending_artifacts(child_id)
124140
# Create a child program
125141
result.child_program = Program(
126142
id=child_id,
@@ -134,6 +150,10 @@ async def _run():
134150
"parent_metrics": parent.metrics,
135151
},
136152
)
153+
result.prompt = prompt
154+
result.llm_response = llm_response
155+
# Store artifacts in the result so they can be saved later
156+
result.artifacts = artifacts
137157

138158
except Exception as e:
139159
logger.exception("Error in PID %s:", os.getpid())

0 commit comments

Comments
 (0)