Skip to content

Commit 11f7602

Browse files
committed
add judgeConfigKey
1 parent 86acd6e commit 11f7602

File tree

3 files changed

+20
-11
lines changed

3 files changed

+20
-11
lines changed

ldai/chat/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,8 @@ async def evaluate_judge(judge_config):
112112
)
113113

114114
if eval_result and eval_result.success:
115-
self._tracker.track_eval_scores(eval_result.evals)
115+
eval_result.judge_config_key = judge_config.key
116+
self._tracker.track_judge_response(eval_result)
116117

117118
return eval_result
118119

ldai/providers/types.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ class JudgeResponse:
7474
"""
7575
Response from a judge evaluation containing scores and reasoning for multiple metrics.
7676
"""
77+
judge_config_key: Optional[str] = None # The key of the judge configuration that was used to generate this response
7778
evals: Dict[str, EvalScore] # Dictionary where keys are metric names and values contain score and reasoning
7879
success: bool # Whether the evaluation completed successfully
7980
error: Optional[str] = None # Error message if evaluation failed
@@ -86,6 +87,8 @@ def to_dict(self) -> Dict[str, Any]:
8687
'evals': {key: eval_score.to_dict() for key, eval_score in self.evals.items()},
8788
'success': self.success,
8889
}
90+
if self.judge_config_key is not None:
91+
result['judgeConfigKey'] = self.judge_config_key
8992
if self.error is not None:
9093
result['error'] = self.error
9194
return result

ldai/tracker.py

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -224,22 +224,27 @@ def track_eval_scores(self, scores: Dict[str, Any]) -> None:
224224

225225
def track_judge_response(self, judge_response: Any) -> None:
226226
"""
227-
Track a judge response, including evaluation scores and success status.
227+
Track a judge response, including evaluation scores with judge config key.
228228
229229
:param judge_response: JudgeResponse object containing evals and success status
230230
"""
231-
from ldai.providers.types import JudgeResponse
231+
from ldai.providers.types import JudgeResponse, EvalScore
232232

233233
if isinstance(judge_response, JudgeResponse):
234-
# Track evaluation scores
234+
# Track evaluation scores with judge config key included in metadata
235235
if judge_response.evals:
236-
self.track_eval_scores(judge_response.evals)
237-
238-
# Track success/error based on judge response
239-
if judge_response.success:
240-
self.track_success()
241-
else:
242-
self.track_error()
236+
track_data = self.__get_track_data()
237+
if judge_response.judge_config_key:
238+
track_data = {**track_data, 'judgeConfigKey': judge_response.judge_config_key}
239+
240+
for metric_key, eval_score in judge_response.evals.items():
241+
if isinstance(eval_score, EvalScore):
242+
self._ld_client.track(
243+
metric_key,
244+
self._context,
245+
track_data,
246+
eval_score.score
247+
)
243248

244249
def track_feedback(self, feedback: Dict[str, FeedbackKind]) -> None:
245250
"""

0 commit comments

Comments
 (0)