@@ -37,28 +37,37 @@ export function parameterMatchingAccuracyScorer(
37
37
return 1 ;
38
38
}
39
39
40
- const toolCallScores : number [ ] = [ ] ;
41
- const checkedToolCallIds = new Set < string > ( ) ;
40
+ const usedActualIndexes = new Set < number > ( ) ;
41
+ const scores : number [ ] = [ ] ;
42
42
43
- for ( const expectedToolCall of expectedToolCalls ) {
44
- const matchingActualToolCall = actualToolCalls . find (
45
- ( actualToolCall ) =>
46
- actualToolCall . toolName === expectedToolCall . toolName &&
47
- ! checkedToolCallIds . has ( actualToolCall . toolCallId )
48
- ) ;
43
+ for ( const expectedCall of expectedToolCalls ) {
44
+ // Find all unmatched actual tool calls with the same tool name
45
+ const candidates = actualToolCalls
46
+ . map ( ( call , index ) => ( { call, index } ) )
47
+ . filter ( ( { call, index } ) => ! usedActualIndexes . has ( index ) && call . toolName === expectedCall . toolName ) ;
49
48
50
- if ( ! matchingActualToolCall ) {
51
- toolCallScores . push ( 0 ) ;
49
+ if ( candidates . length === 0 ) {
50
+ scores . push ( 0 ) ;
52
51
continue ;
53
52
}
54
53
55
- checkedToolCallIds . add ( matchingActualToolCall . toolCallId ) ;
56
- const score = compareParams ( expectedToolCall . parameters , matchingActualToolCall . parameters ) ;
57
- toolCallScores . push ( score ) ;
54
+ // Pick the candidate with the best parameter match
55
+ let bestScore = - 1 ;
56
+ let bestIndex = - 1 ;
57
+ for ( const { call, index } of candidates ) {
58
+ const score = compareParams ( expectedCall . parameters , call . parameters ) ;
59
+ if ( score > bestScore ) {
60
+ bestScore = score ;
61
+ bestIndex = index ;
62
+ }
63
+ }
64
+
65
+ usedActualIndexes . add ( bestIndex ) ;
66
+ scores . push ( bestScore ) ;
58
67
}
59
68
60
- const totalScore = toolCallScores . reduce ( ( sum , score ) => sum + score , 0 ) ;
61
- return totalScore / toolCallScores . length ;
69
+ const totalScore = scores . reduce ( ( sum , score ) => sum + score , 0 ) ;
70
+ return totalScore / scores . length ;
62
71
}
63
72
64
73
/**
0 commit comments