55
66import chevron
77
8+ from ldai .judge .evaluation_schema_builder import EvaluationSchemaBuilder
89from ldai .models import AIJudgeConfig , LDMessage
910from ldai .providers .ai_provider import AIProvider
10- from ldai .providers .types import ChatResponse , EvalScore , JudgeResponse , StructuredResponse
11+ from ldai .providers .types import (ChatResponse , EvalScore , JudgeResponse ,
12+ StructuredResponse )
1113from ldai .tracker import LDAIConfigTracker
12- from ldai .judge .evaluation_schema_builder import EvaluationSchemaBuilder
1314
1415
1516class AIJudge :
1617 """
1718 Judge implementation that handles evaluation functionality and conversation management.
18-
19+
1920 According to the AIEval spec, judges are AI Configs with mode: "judge" that evaluate
2021 other AI Configs using structured output.
2122 """
@@ -29,7 +30,7 @@ def __init__(
2930 ):
3031 """
3132 Initialize the Judge.
32-
33+
3334 :param ai_config: The judge AI configuration
3435 :param ai_config_tracker: The tracker for the judge configuration
3536 :param ai_provider: The AI provider to use for evaluation
@@ -51,7 +52,7 @@ async def evaluate(
5152 ) -> Optional [JudgeResponse ]:
5253 """
5354 Evaluates an AI response using the judge's configuration.
54-
55+
5556 :param input_text: The input prompt or question that was provided to the AI
5657 :param output_text: The AI-generated response to be evaluated
5758 :param sampling_rate: Sampling rate (0-1) to determine if evaluation should be processed (defaults to 1)
@@ -113,7 +114,7 @@ async def evaluate_messages(
113114 ) -> Optional [JudgeResponse ]:
114115 """
115116 Evaluates an AI response from chat messages and response.
116-
117+
117118 :param messages: Array of messages representing the conversation history
118119 :param response: The AI response to be evaluated
119120 :param sampling_ratio: Sampling ratio (0-1) to determine if evaluation should be processed (defaults to 1)
@@ -127,31 +128,31 @@ async def evaluate_messages(
127128 def get_ai_config (self ) -> AIJudgeConfig :
128129 """
129130 Returns the AI Config used by this judge.
130-
131+
131132 :return: The judge AI configuration
132133 """
133134 return self ._ai_config
134135
135136 def get_tracker (self ) -> LDAIConfigTracker :
136137 """
137138 Returns the tracker associated with this judge.
138-
139+
139140 :return: The tracker for the judge configuration
140141 """
141142 return self ._ai_config_tracker
142143
143144 def get_provider (self ) -> AIProvider :
144145 """
145146 Returns the AI provider used by this judge.
146-
147+
147148 :return: The AI provider
148149 """
149150 return self ._ai_provider
150151
151152 def _construct_evaluation_messages (self , input_text : str , output_text : str ) -> list [LDMessage ]:
152153 """
153154 Constructs evaluation messages by combining judge's config messages with input/output.
154-
155+
155156 :param input_text: The input text
156157 :param output_text: The output text to evaluate
157158 :return: List of messages for evaluation
@@ -173,7 +174,7 @@ def _construct_evaluation_messages(self, input_text: str, output_text: str) -> l
173174 def _interpolate_message (self , content : str , variables : Dict [str , str ]) -> str :
174175 """
175176 Interpolates message content with variables using Mustache templating.
176-
177+
177178 :param content: The message content template
178179 :param variables: Variables to interpolate
179180 :return: Interpolated message content
@@ -184,7 +185,7 @@ def _interpolate_message(self, content: str, variables: Dict[str, str]) -> str:
184185 def _parse_evaluation_response (self , data : Dict [str , Any ]) -> Dict [str , EvalScore ]:
185186 """
186187 Parses the structured evaluation response from the AI provider.
187-
188+
188189 :param data: The structured response data
189190 :return: Dictionary of evaluation scores keyed by metric key
190191 """
@@ -227,5 +228,3 @@ def _parse_evaluation_response(self, data: Dict[str, Any]) -> Dict[str, EvalScor
227228 results [metric_key ] = EvalScore (score = float (score ), reasoning = reasoning )
228229
229230 return results
230-
231-
0 commit comments