Skip to content

Commit 16e7dc9

Browse files
committed
Fixed large artifact store/load issue
1 parent 37afa09 commit 16e7dc9

File tree

1 file changed

+4
-198
lines changed

1 file changed

+4
-198
lines changed

openevolve/database.py

Lines changed: 4 additions & 198 deletions
Original file line numberDiff line numberDiff line change
@@ -359,9 +359,10 @@ def save(self, path: Optional[str] = None, iteration: int = 0) -> None:
359359
try:
360360
with FileLock(lock_path, timeout=10):
361361
# Create directory and remove old path if it exists
362-
if os.path.exists(save_path):
363-
shutil.rmtree(save_path)
364-
os.makedirs(save_path)
362+
# if os.path.exists(save_path):
363+
# shutil.rmtree(save_path)
364+
# create directory if it doesn't exist
365+
os.makedirs(save_path, exist_ok=True)
365366

366367
# Save each program
367368
for program in self.programs.values():
@@ -1377,198 +1378,3 @@ def log_prompt(
13771378
if program_id not in self.prompts_by_program:
13781379
self.prompts_by_program[program_id] = {}
13791380
self.prompts_by_program[program_id][template_key] = prompt
1380-
1381-
# Artifact storage and retrieval methods
1382-
1383-
def store_artifacts(self, program_id: str, artifacts: Dict[str, Union[str, bytes]]) -> None:
1384-
"""
1385-
Store artifacts for a program
1386-
1387-
Args:
1388-
program_id: ID of the program
1389-
artifacts: Dictionary of artifact name to content
1390-
"""
1391-
if not artifacts:
1392-
return
1393-
1394-
program = self.get(program_id)
1395-
if not program:
1396-
logger.warning(f"Cannot store artifacts: program {program_id} not found")
1397-
return
1398-
1399-
# Check if artifacts are enabled
1400-
artifacts_enabled = os.environ.get("ENABLE_ARTIFACTS", "true").lower() == "true"
1401-
if not artifacts_enabled:
1402-
logger.debug("Artifacts disabled, skipping storage")
1403-
return
1404-
1405-
# Split artifacts by size
1406-
small_artifacts = {}
1407-
large_artifacts = {}
1408-
size_threshold = getattr(self.config, "artifact_size_threshold", 32 * 1024) # 32KB default
1409-
1410-
for key, value in artifacts.items():
1411-
size = self._get_artifact_size(value)
1412-
if size <= size_threshold:
1413-
small_artifacts[key] = value
1414-
else:
1415-
large_artifacts[key] = value
1416-
1417-
# Store small artifacts as JSON
1418-
if small_artifacts:
1419-
program.artifacts_json = json.dumps(small_artifacts, default=self._artifact_serializer)
1420-
logger.debug(f"Stored {len(small_artifacts)} small artifacts for program {program_id}")
1421-
1422-
# Store large artifacts to disk
1423-
if large_artifacts:
1424-
artifact_dir = self._create_artifact_dir(program_id)
1425-
program.artifact_dir = artifact_dir
1426-
for key, value in large_artifacts.items():
1427-
self._write_artifact_file(artifact_dir, key, value)
1428-
logger.debug(f"Stored {len(large_artifacts)} large artifacts for program {program_id}")
1429-
1430-
def get_artifacts(self, program_id: str) -> Dict[str, Union[str, bytes]]:
1431-
"""
1432-
Retrieve all artifacts for a program
1433-
1434-
Args:
1435-
program_id: ID of the program
1436-
1437-
Returns:
1438-
Dictionary of artifact name to content
1439-
"""
1440-
program = self.get(program_id)
1441-
if not program:
1442-
return {}
1443-
1444-
artifacts = {}
1445-
1446-
# Load small artifacts from JSON
1447-
if program.artifacts_json:
1448-
try:
1449-
small_artifacts = json.loads(program.artifacts_json)
1450-
artifacts.update(small_artifacts)
1451-
except json.JSONDecodeError as e:
1452-
logger.warning(f"Failed to decode artifacts JSON for program {program_id}: {e}")
1453-
1454-
# Load large artifacts from disk
1455-
if program.artifact_dir and os.path.exists(program.artifact_dir):
1456-
disk_artifacts = self._load_artifact_dir(program.artifact_dir)
1457-
artifacts.update(disk_artifacts)
1458-
1459-
return artifacts
1460-
1461-
def _get_artifact_size(self, value: Union[str, bytes]) -> int:
1462-
"""Get size of an artifact value in bytes"""
1463-
if isinstance(value, str):
1464-
return len(value.encode("utf-8"))
1465-
elif isinstance(value, bytes):
1466-
return len(value)
1467-
else:
1468-
return len(str(value).encode("utf-8"))
1469-
1470-
def _artifact_serializer(self, obj):
1471-
"""JSON serializer for artifacts that handles bytes"""
1472-
if isinstance(obj, bytes):
1473-
return {"__bytes__": base64.b64encode(obj).decode("utf-8")}
1474-
raise TypeError(f"Object of type {type(obj)} is not JSON serializable")
1475-
1476-
def _artifact_deserializer(self, dct):
1477-
"""JSON deserializer for artifacts that handles bytes"""
1478-
if "__bytes__" in dct:
1479-
return base64.b64decode(dct["__bytes__"])
1480-
return dct
1481-
1482-
def _create_artifact_dir(self, program_id: str) -> str:
1483-
"""Create artifact directory for a program"""
1484-
base_path = getattr(self.config, "artifacts_base_path", None)
1485-
if not base_path:
1486-
base_path = (
1487-
os.path.join(self.config.db_path or ".", "artifacts")
1488-
if self.config.db_path
1489-
else "./artifacts"
1490-
)
1491-
1492-
artifact_dir = os.path.join(base_path, program_id)
1493-
os.makedirs(artifact_dir, exist_ok=True)
1494-
return artifact_dir
1495-
1496-
def _write_artifact_file(self, artifact_dir: str, key: str, value: Union[str, bytes]) -> None:
1497-
"""Write an artifact to a file"""
1498-
# Sanitize filename
1499-
safe_key = "".join(c for c in key if c.isalnum() or c in "._-")
1500-
if not safe_key:
1501-
safe_key = "artifact"
1502-
1503-
file_path = os.path.join(artifact_dir, safe_key)
1504-
1505-
try:
1506-
if isinstance(value, str):
1507-
with open(file_path, "w", encoding="utf-8") as f:
1508-
f.write(value)
1509-
elif isinstance(value, bytes):
1510-
with open(file_path, "wb") as f:
1511-
f.write(value)
1512-
else:
1513-
# Convert to string and write
1514-
with open(file_path, "w", encoding="utf-8") as f:
1515-
f.write(str(value))
1516-
except Exception as e:
1517-
logger.warning(f"Failed to write artifact {key} to {file_path}: {e}")
1518-
1519-
def _load_artifact_dir(self, artifact_dir: str) -> Dict[str, Union[str, bytes]]:
1520-
"""Load artifacts from a directory"""
1521-
artifacts = {}
1522-
1523-
try:
1524-
for filename in os.listdir(artifact_dir):
1525-
file_path = os.path.join(artifact_dir, filename)
1526-
if os.path.isfile(file_path):
1527-
try:
1528-
# Try to read as text first
1529-
with open(file_path, "r", encoding="utf-8") as f:
1530-
content = f.read()
1531-
artifacts[filename] = content
1532-
except UnicodeDecodeError:
1533-
# If text fails, read as binary
1534-
with open(file_path, "rb") as f:
1535-
content = f.read()
1536-
artifacts[filename] = content
1537-
except Exception as e:
1538-
logger.warning(f"Failed to read artifact file {file_path}: {e}")
1539-
except Exception as e:
1540-
logger.warning(f"Failed to list artifact directory {artifact_dir}: {e}")
1541-
1542-
return artifacts
1543-
1544-
def log_prompt(
1545-
self,
1546-
program_id: str,
1547-
template_key: str,
1548-
prompt: Dict[str, str],
1549-
responses: Optional[List[str]] = None,
1550-
) -> None:
1551-
"""
1552-
Log a prompt for a program.
1553-
Only logs if self.config.log_prompts is True.
1554-
1555-
Args:
1556-
program_id: ID of the program to log the prompt for
1557-
template_key: Key for the prompt template
1558-
prompt: Prompts in the format {template_key: { 'system': str, 'user': str }}.
1559-
responses: Optional list of responses to the prompt, if available.
1560-
"""
1561-
1562-
if not self.config.log_prompts:
1563-
return
1564-
1565-
if responses is None:
1566-
responses = []
1567-
prompt["responses"] = responses
1568-
1569-
if self.prompts_by_program is None:
1570-
self.prompts_by_program = {}
1571-
1572-
if program_id not in self.prompts_by_program:
1573-
self.prompts_by_program[program_id] = {}
1574-
self.prompts_by_program[program_id][template_key] = prompt

0 commit comments

Comments
 (0)