Skip to content

Commit 125bb66

Browse files
committed
judge respose should be async
1 parent c3c939f commit 125bb66

File tree

2 files changed

+52
-18
lines changed

2 files changed

+52
-18
lines changed

ldai/chat/__init__.py

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""TrackedChat implementation for managing AI chat conversations."""
22

3+
import asyncio
34
from typing import Any, Dict, List, Optional
45

56
from ldai.models import AICompletionConfig, LDMessage
@@ -65,39 +66,39 @@ async def invoke(self, prompt: str) -> ChatResponse:
6566
lambda: self._provider.invoke_model(all_messages),
6667
)
6768

68-
# Evaluate with judges if configured
69+
# Start judge evaluations as async tasks (don't await them)
6970
if (
7071
self._ai_config.judge_configuration
7172
and self._ai_config.judge_configuration.judges
7273
and len(self._ai_config.judge_configuration.judges) > 0
7374
):
74-
evaluations = await self._evaluate_with_judges(self._messages, response)
75-
response.evaluations = evaluations
75+
evaluation_tasks = self._start_judge_evaluations(self._messages, response)
76+
response.evaluations = evaluation_tasks
7677

7778
# Add the response message to conversation history
7879
self._messages.append(response.message)
7980
return response
8081

81-
async def _evaluate_with_judges(
82+
def _start_judge_evaluations(
8283
self,
8384
messages: List[LDMessage],
8485
response: ChatResponse,
85-
) -> List[Optional[JudgeResponse]]:
86+
) -> List[asyncio.Task[Optional[JudgeResponse]]]:
8687
"""
87-
Evaluates the response with all configured judges.
88+
Start judge evaluations as async tasks without awaiting them.
8889
89-
Returns a list of evaluation results.
90+
Returns a list of async tasks that can be awaited later.
9091
9192
:param messages: Array of messages representing the conversation history
9293
:param response: The AI response to be evaluated
93-
:return: List of judge evaluation results (may contain None for failed evaluations)
94+
:return: List of async tasks that will return judge evaluation results
9495
"""
9596
if not self._ai_config.judge_configuration or not self._ai_config.judge_configuration.judges:
9697
return []
9798

9899
judge_configs = self._ai_config.judge_configuration.judges
99100

100-
# Start all judge evaluations in parallel
101+
# Start all judge evaluations as tasks
101102
async def evaluate_judge(judge_config):
102103
judge = self._judges.get(judge_config.key)
103104
if not judge:
@@ -116,16 +117,13 @@ async def evaluate_judge(judge_config):
116117

117118
return eval_result
118119

119-
# Ensure all evaluations complete even if some fail
120-
import asyncio
121-
evaluation_promises = [evaluate_judge(judge_config) for judge_config in judge_configs]
122-
results = await asyncio.gather(*evaluation_promises, return_exceptions=True)
123-
124-
# Map exceptions to None
125-
return [
126-
None if isinstance(result, Exception) else result
127-
for result in results
120+
# Create tasks for each judge evaluation
121+
tasks = [
122+
asyncio.create_task(evaluate_judge(judge_config))
123+
for judge_config in judge_configs
128124
]
125+
126+
return tasks
129127

130128
def get_config(self) -> AICompletionConfig:
131129
"""

ldai/providers/types.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,21 @@ class LDAIMetrics:
1515
success: bool
1616
usage: Optional[TokenUsage] = None
1717

18+
def to_dict(self) -> Dict[str, Any]:
19+
"""
20+
Render the metrics as a dictionary object.
21+
"""
22+
result: Dict[str, Any] = {
23+
'success': self.success,
24+
}
25+
if self.usage is not None:
26+
result['usage'] = {
27+
'total': self.usage.total,
28+
'input': self.usage.input,
29+
'output': self.usage.output,
30+
}
31+
return result
32+
1833

1934
@dataclass
2035
class ChatResponse:
@@ -44,6 +59,15 @@ class EvalScore:
4459
score: float # Score between 0.0 and 1.0
4560
reasoning: str # Reasoning behind the provided score
4661

62+
def to_dict(self) -> Dict[str, Any]:
63+
"""
64+
Render the evaluation score as a dictionary object.
65+
"""
66+
return {
67+
'score': self.score,
68+
'reasoning': self.reasoning,
69+
}
70+
4771

4872
@dataclass
4973
class JudgeResponse:
@@ -54,3 +78,15 @@ class JudgeResponse:
5478
success: bool # Whether the evaluation completed successfully
5579
error: Optional[str] = None # Error message if evaluation failed
5680

81+
def to_dict(self) -> Dict[str, Any]:
82+
"""
83+
Render the judge response as a dictionary object.
84+
"""
85+
result: Dict[str, Any] = {
86+
'evals': {key: eval_score.to_dict() for key, eval_score in self.evals.items()},
87+
'success': self.success,
88+
}
89+
if self.error is not None:
90+
result['error'] = self.error
91+
return result
92+

0 commit comments

Comments
 (0)