Patch evaluator response casting (#931)

miguelg719 · web-flow · commit e0624c126a33 · 2025-07-30T14:16:48.000-07:00
# why
Evaluator was failing to parse responses from LLMs

# what changed
temporary cast for `createChatCompletion`

# test plan
diff --git a/evals/evaluator.ts b/evals/evaluator.ts
@@ -17,6 +17,8 @@ import {
   EvaluationResult,
   BatchEvaluateOptions,
 } from "@/types/evaluator";
+import { LLMParsedResponse } from "@/lib/inference";
+import { LLMResponse } from "@/lib/llm/LLMClient";
 
 dotenv.config();
 
@@ -66,7 +68,9 @@ export class Evaluator {
       this.modelClientOptions,
     );
 
-    const response = await llmClient.createChatCompletion({
+    const response = await llmClient.createChatCompletion<
+      LLMParsedResponse<LLMResponse>
+    >({
       logger: this.stagehand.logger,
       options: {
         messages: [
@@ -76,8 +80,7 @@ export class Evaluator {
         image: { buffer: imageBuffer },
       },
     });
-
-    const rawResponse = response.choices[0].message.content;
+    const rawResponse = response.data as unknown as string;
     let evaluationResult: "YES" | "NO" | "INVALID" = "INVALID";
     let reasoning = `Failed to process response. Raw response: ${rawResponse}`;
 
@@ -183,7 +186,9 @@ export class Evaluator {
     );
 
     // Use the model-specific LLM client to evaluate the screenshot with all questions
-    const response = await llmClient.createChatCompletion({
+    const response = await llmClient.createChatCompletion<
+      LLMParsedResponse<LLMResponse>
+    >({
       logger: this.stagehand.logger,
       options: {
         messages: [
@@ -202,7 +207,7 @@ export class Evaluator {
       },
     });
 
-    const rawResponse = response.choices[0].message.content;
+    const rawResponse = response.data as unknown as string;
     let finalResults: EvaluationResult[] = [];
 
     try {