Skip to content

Commit e0624c1

Browse files
authored
Patch evaluator response casting (#931)
# why Evaluator was failing to parse responses from LLMs # what changed temporary cast for `createChatCompletion` # test plan
1 parent 02d2b62 commit e0624c1

File tree

1 file changed

+10
-5
lines changed

1 file changed

+10
-5
lines changed

evals/evaluator.ts

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ import {
1717
EvaluationResult,
1818
BatchEvaluateOptions,
1919
} from "@/types/evaluator";
20+
import { LLMParsedResponse } from "@/lib/inference";
21+
import { LLMResponse } from "@/lib/llm/LLMClient";
2022

2123
dotenv.config();
2224

@@ -66,7 +68,9 @@ export class Evaluator {
6668
this.modelClientOptions,
6769
);
6870

69-
const response = await llmClient.createChatCompletion({
71+
const response = await llmClient.createChatCompletion<
72+
LLMParsedResponse<LLMResponse>
73+
>({
7074
logger: this.stagehand.logger,
7175
options: {
7276
messages: [
@@ -76,8 +80,7 @@ export class Evaluator {
7680
image: { buffer: imageBuffer },
7781
},
7882
});
79-
80-
const rawResponse = response.choices[0].message.content;
83+
const rawResponse = response.data as unknown as string;
8184
let evaluationResult: "YES" | "NO" | "INVALID" = "INVALID";
8285
let reasoning = `Failed to process response. Raw response: ${rawResponse}`;
8386

@@ -183,7 +186,9 @@ export class Evaluator {
183186
);
184187

185188
// Use the model-specific LLM client to evaluate the screenshot with all questions
186-
const response = await llmClient.createChatCompletion({
189+
const response = await llmClient.createChatCompletion<
190+
LLMParsedResponse<LLMResponse>
191+
>({
187192
logger: this.stagehand.logger,
188193
options: {
189194
messages: [
@@ -202,7 +207,7 @@ export class Evaluator {
202207
},
203208
});
204209

205-
const rawResponse = response.choices[0].message.content;
210+
const rawResponse = response.data as unknown as string;
206211
let finalResults: EvaluationResult[] = [];
207212

208213
try {

0 commit comments

Comments
 (0)