Skip to content

Commit 5845e2f

Browse files
committed
Add weighted LLM average to evaluation metrics
Calculates and stores a weighted average of LLM metrics in eval_result.metrics. Updates combined_score to blend accuracy and LLM quality, improving overall evaluation granularity.
1 parent bc66c5b commit 5845e2f

File tree

1 file changed

+16
-1
lines changed

1 file changed

+16
-1
lines changed

openevolve/evaluator.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,8 +189,23 @@ async def evaluate_program(
189189
llm_eval_result = self._process_evaluation_result(llm_result)
190190

191191
# Combine metrics
192+
llm_scores = []
192193
for name, value in llm_result.metrics.items():
193-
eval_result.metrics[f"llm_{name}"] = value * self.config.llm_feedback_weight
194+
weighted_value = value * self.config.llm_feedback_weight
195+
eval_result.metrics[f"llm_{name}"] = weighted_value
196+
llm_scores.append(value) # Use unweighted value for average
197+
198+
# Add average of LLM metrics
199+
if llm_scores:
200+
llm_average = sum(llm_scores) / len(llm_scores)
201+
eval_result.metrics["llm_average"] = llm_average * self.config.llm_feedback_weight
202+
203+
# Recalculate combined_score if it exists
204+
if "combined_score" in eval_result.metrics:
205+
# Original combined_score is just accuracy
206+
accuracy = eval_result.metrics["combined_score"]
207+
# Combine with LLM average (70% accuracy, 30% LLM quality)
208+
eval_result.metrics["combined_score"] = accuracy * 0.7 + llm_average * 0.3
194209

195210
# Store artifacts if enabled and present
196211
if (

0 commit comments

Comments
 (0)