@@ -9,7 +9,7 @@ import { createClient } from '@arizeai/phoenix-client';
99import { getDatasetInfo } from '@arizeai/phoenix-client/datasets' ;
1010// eslint-disable-next-line import/extensions
1111import { asEvaluator , runExperiment } from '@arizeai/phoenix-client/experiments' ;
12- import type { ExperimentTask } from '@arizeai/phoenix-client/types/experiments' ;
12+ import type { ExperimentEvaluationRun , ExperimentTask } from '@arizeai/phoenix-client/types/experiments' ;
1313import dotenv from 'dotenv' ;
1414import OpenAI from 'openai' ;
1515
@@ -31,16 +31,6 @@ process.env.PHOENIX_API_KEY = sanitizeHeaderValue(process.env.PHOENIX_API_KEY);
3131
3232type ExampleInputOnly = { input : Record < string , unknown > , metadata ?: Record < string , unknown > , output ?: never } ;
3333
34- // Type for Phoenix evaluation run results
35- interface EvaluationRun {
36- name : string ;
37- result ?: {
38- score ?: number ;
39- [ key : string ] : unknown ;
40- } ;
41- [ key : string ] : unknown ;
42- }
43-
4434async function loadTools ( ) : Promise < ToolBase [ ] > {
4535 const apifyClient = new ApifyClient ( { token : process . env . APIFY_API_TOKEN || '' } ) ;
4636 const urlTools = await processParamsGetTools ( '' , apifyClient ) ;
@@ -187,12 +177,13 @@ async function main(): Promise<number> {
187177 try {
188178 const info = await getDatasetInfo ( { client, dataset : { datasetName : DATASET_NAME } } ) ;
189179 datasetId = info ?. id as string | undefined ;
190- if ( ! datasetId ) throw new Error ( `Dataset "${ DATASET_NAME } " not found` ) ;
191180 } catch ( e ) {
192181 log . error ( `Error loading dataset: ${ e } ` ) ;
193182 return 1 ;
194183 }
195184
185+ if ( ! datasetId ) throw new Error ( `Dataset "${ DATASET_NAME } " not found` ) ;
186+
196187 log . info ( `Loaded dataset "${ DATASET_NAME } " with ID: ${ datasetId } ` ) ;
197188
198189 const results : { model : string ; accuracy : number ; correct : number ; total : number ; experiment_id ?: string ; error ?: string } [ ] = [ ] ;
@@ -236,8 +227,8 @@ async function main(): Promise<number> {
236227 const runsMap = experiment . runs ?? { } ;
237228 const evalRuns = experiment . evaluationRuns ?? [ ] ;
238229 totalCases = Object . keys ( runsMap ) . length ;
239- const toolMatchEvals = evalRuns . filter ( ( er : EvaluationRun ) => er . name === 'tools_match' ) ;
240- correctCases = toolMatchEvals . filter ( ( er : EvaluationRun ) => ( er . result ?. score ?? 0 ) > 0.5 ) . length ;
230+ const toolMatchEvals = evalRuns . filter ( ( er : ExperimentEvaluationRun ) => er . name === 'tools_match' ) ;
231+ correctCases = toolMatchEvals . filter ( ( er : ExperimentEvaluationRun ) => ( er . result ?. score ?? 0 ) > 0.5 ) . length ;
241232 accuracy = totalCases > 0 ? correctCases / totalCases : 0 ;
242233 experimentId = experiment . id ;
243234
@@ -261,7 +252,7 @@ async function main(): Promise<number> {
261252 }
262253 }
263254
264- const allPassed = results . filter ( ( r ) => ! r . error ) . every ( ( r ) => r . accuracy >= PASS_THRESHOLD ) ;
255+ const allPassed = results . every ( ( r ) => ! r . error && r . accuracy >= PASS_THRESHOLD ) ;
265256 log . info ( `Pass threshold: ${ ( PASS_THRESHOLD * 100 ) . toFixed ( 1 ) } %` ) ;
266257 if ( allPassed ) {
267258 log . info ( '✅ All models passed the threshold' ) ;
0 commit comments