Skip to content

Commit 2bf8872

Browse files
committed
feat: Automatically judge chat results based on AI Config
1 parent 2f9854c commit 2bf8872

File tree

4 files changed

+112
-4
lines changed

4 files changed

+112
-4
lines changed

packages/sdk/server-ai/src/LDAIClientImpl.ts

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -217,8 +217,25 @@ export class LDAIClientImpl implements LDAIClient {
217217
return undefined;
218218
}
219219

220-
// Create the TrackedChat instance with the provider
221-
return new TrackedChat(config, config.tracker, provider);
220+
// Initialize judges if they are configured
221+
const judges: Record<string, Judge> = {};
222+
if (config.judgeConfiguration?.judges) {
223+
for (const judgeConfig of config.judgeConfiguration.judges) {
224+
const judge = await this.initJudge(
225+
judgeConfig.key,
226+
context,
227+
{ enabled: false },
228+
variables,
229+
defaultAiProvider,
230+
);
231+
if (judge) {
232+
judges[judgeConfig.key] = judge;
233+
}
234+
}
235+
}
236+
237+
// Create the TrackedChat instance with the provider, judges, and logger
238+
return new TrackedChat(config, config.tracker, provider, judges, this._logger);
222239
}
223240

224241
async initJudge(
@@ -232,7 +249,19 @@ export class LDAIClientImpl implements LDAIClient {
232249
this._ldClient.track(TRACK_JUDGE_INIT, context, key, 1);
233250

234251
try {
235-
// Add standard judge variables to incoming variables
252+
// Logging warnings if reserved keys are present
253+
if (variables?.message_history !== undefined) {
254+
this._logger?.warn(
255+
"The variable 'message_history' is reserved by the judge and will be ignored."
256+
);
257+
}
258+
if (variables?.response_to_evaluate !== undefined) {
259+
this._logger?.warn(
260+
"The variable 'response_to_evaluate' is reserved by the judge and will be ignored."
261+
);
262+
}
263+
264+
// Add overwrite standard judge variables to incoming variables
236265
const extendedVariables = {
237266
...variables,
238267
message_history: '{{message_history}}',

packages/sdk/server-ai/src/api/chat/TrackedChat.ts

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
1+
import { LDLogger } from '@launchdarkly/js-server-sdk-common';
2+
13
import { LDAIConfigTracker } from '../config/LDAIConfigTracker';
24
import { LDAIConversationConfig, LDMessage } from '../config/types';
5+
import { Judge } from '../judge/Judge';
6+
import { JudgeResponse } from '../judge/types';
37
import { AIProvider } from '../providers/AIProvider';
48
import { ChatResponse } from './types';
59

@@ -11,13 +15,19 @@ import { ChatResponse } from './types';
1115
*/
1216
export class TrackedChat {
1317
protected messages: LDMessage[];
18+
protected judges: Record<string, Judge>;
19+
private readonly _logger?: LDLogger;
1420

1521
constructor(
1622
protected readonly aiConfig: LDAIConversationConfig,
1723
protected readonly tracker: LDAIConfigTracker,
1824
protected readonly provider: AIProvider,
25+
judges?: Record<string, Judge>,
26+
logger?: LDLogger,
1927
) {
2028
this.messages = [];
29+
this.judges = judges || {};
30+
this._logger = logger;
2131
}
2232

2333
/**
@@ -45,9 +55,63 @@ export class TrackedChat {
4555
// Add the assistant response to the conversation history
4656
this.messages.push(response.message);
4757

58+
// Start judge evaluations if configured
59+
if (
60+
this.aiConfig.judgeConfiguration?.judges &&
61+
this.aiConfig.judgeConfiguration.judges.length > 0
62+
) {
63+
response.evaluations = this._evaluateWithJudges(this.messages, response);
64+
}
65+
4866
return response;
4967
}
5068

69+
/**
70+
* Evaluates the response with all configured judges.
71+
* Returns a promise that resolves to an array of evaluation results.
72+
*
73+
* @param messages Array of messages representing the conversation history
74+
* @param response The AI response to be evaluated
75+
* @returns Promise resolving to array of judge evaluation results
76+
*/
77+
private async _evaluateWithJudges(
78+
messages: LDMessage[],
79+
response: ChatResponse,
80+
): Promise<Array<JudgeResponse | undefined>> {
81+
const judgeConfigs = this.aiConfig.judgeConfiguration!.judges;
82+
83+
// Start all judge evaluations in parallel
84+
const evaluationPromises = judgeConfigs.map(async (judgeConfig) => {
85+
const judge = this.judges[judgeConfig.key];
86+
if (!judge) {
87+
this._logger?.warn(
88+
`Judge configuration is not enabled: ${judgeConfig.key}`,
89+
this.tracker.getTrackData(),
90+
);
91+
return undefined;
92+
}
93+
94+
const evalResult = await judge.evaluateMessages(
95+
messages,
96+
response,
97+
judgeConfig.samplingRate,
98+
);
99+
100+
// Track scores if evaluation was successful
101+
if (evalResult && evalResult.success) {
102+
this.tracker.trackEvalScores(evalResult.evals);
103+
}
104+
105+
return evalResult;
106+
});
107+
108+
// Use Promise.allSettled to ensure all evaluations complete
109+
// even if some fail
110+
const results = await Promise.allSettled(evaluationPromises);
111+
112+
return results.map((result) => (result.status === 'fulfilled' ? result.value : undefined));
113+
}
114+
51115
/**
52116
* Get the underlying AI configuration used to initialize this TrackedChat.
53117
*/
@@ -70,6 +134,14 @@ export class TrackedChat {
70134
return this.provider;
71135
}
72136

137+
/**
138+
* Get the judges associated with this TrackedChat.
139+
* Returns a record of judge instances keyed by their configuration keys.
140+
*/
141+
getJudges(): Record<string, Judge> {
142+
return this.judges;
143+
}
144+
73145
/**
74146
* Append messages to the conversation history.
75147
* Adds messages to the conversation history without invoking the model,

packages/sdk/server-ai/src/api/chat/types.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { LDMessage } from '../config/types';
2+
import { JudgeResponse } from '../judge/types';
23
import { LDAIMetrics } from '../metrics/LDAIMetrics';
34

45
/**
@@ -14,4 +15,10 @@ export interface ChatResponse {
1415
* Metrics information including success status and token usage.
1516
*/
1617
metrics: LDAIMetrics;
18+
19+
/**
20+
* Promise that resolves to judge evaluation results.
21+
* Only present when judges are configured for evaluation.
22+
*/
23+
evaluations?: Promise<Array<JudgeResponse | undefined>>;
1724
}

packages/sdk/server-ai/src/api/config/types.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ export interface LDAIJudgeConfigDefault extends LDAIConfigDefault {
114114
* Evaluation metric keys for judge configurations.
115115
* The keys of the metrics that this judge can evaluate.
116116
*/
117-
evaluationMetricKeys: string[];
117+
evaluationMetricKeys?: string[];
118118
}
119119

120120
/**

0 commit comments

Comments
 (0)