Skip to content

Commit 549b39c

Browse files
committed
_llm_evaluate returns artifacts if LLM returned string responses
1 parent c779ac9 commit 549b39c

File tree

1 file changed

+32
-11
lines changed

1 file changed

+32
-11
lines changed

openevolve/evaluator.py

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -125,16 +125,32 @@ async def evaluate_program(
125125
eval_result = self._process_evaluation_result(result)
126126

127127
# Add LLM feedback if configured
128+
llm_eval_result = None
128129
if self.config.use_llm_feedback and self.llm_ensemble:
129-
feedback_metrics = await self._llm_evaluate(program_code)
130+
llm_result = await self._llm_evaluate(program_code)
131+
llm_eval_result = self._process_evaluation_result(llm_result)
130132

131133
# Combine metrics
132-
for name, value in feedback_metrics.items():
134+
for name, value in llm_result.metrics.items():
133135
eval_result.metrics[f"llm_{name}"] = value * self.config.llm_feedback_weight
134136

135137
# Store artifacts if enabled and present
136-
if artifacts_enabled and eval_result.has_artifacts() and program_id:
137-
self._pending_artifacts[program_id] = eval_result.artifacts
138+
if (
139+
artifacts_enabled
140+
and (
141+
eval_result.has_artifacts()
142+
or (llm_eval_result and llm_eval_result.has_artifacts())
143+
)
144+
and program_id
145+
):
146+
self._pending_artifacts[program_id] = {}
147+
148+
# Merge eval_result artifacts with llm artifacts if they exist
149+
if eval_result.has_artifacts():
150+
self._pending_artifacts[program_id].update(eval_result.artifacts)
151+
152+
if llm_eval_result and llm_eval_result.has_artifacts():
153+
self._pending_artifacts[program_id].update(llm_eval_result.artifacts)
138154

139155
elapsed = time.time() - start_time
140156
logger.info(
@@ -396,6 +412,7 @@ async def _llm_evaluate(self, program_code: str) -> Dict[str, float]:
396412
json_pattern = r"```json\n(.*?)\n```"
397413
import re
398414

415+
artifacts = {}
399416
avg_metrics = {}
400417
for i, response in enumerate(responses):
401418
json_match = re.search(json_pattern, response, re.DOTALL)
@@ -414,12 +431,13 @@ async def _llm_evaluate(self, program_code: str) -> Dict[str, float]:
414431
# Parse JSON
415432
result = json.loads(json_str)
416433

417-
# Filter all non-numeric values
418-
metrics = {
419-
name: float(value)
420-
for name, value in result.items()
421-
if isinstance(value, (int, float))
422-
}
434+
# All non-numeric values are artifacts, all numeric values are metrics
435+
metrics = {}
436+
for key, value in result.items():
437+
if not isinstance(value, (int, float)):
438+
artifacts[key] = value
439+
else:
440+
metrics[key] = float(value)
423441

424442
# Weight of the model in the ensemble
425443
weight = self.llm_ensemble.weights[i] if self.llm_ensemble.weights else 1.0
@@ -431,7 +449,10 @@ async def _llm_evaluate(self, program_code: str) -> Dict[str, float]:
431449
else:
432450
avg_metrics[name] = value * weight
433451

434-
return avg_metrics
452+
return EvaluationResult(
453+
metrics=avg_metrics,
454+
artifacts=artifacts,
455+
)
435456

436457
except Exception as e:
437458
logger.warning(f"Error parsing LLM response: {str(e)}")

0 commit comments

Comments
 (0)