Skip to content

Commit f79faca

Browse files
chore: fixes accuracy scorer for position independent matching
1 parent c5365ac commit f79faca

File tree

1 file changed

+24
-15
lines changed

1 file changed

+24
-15
lines changed

tests/accuracy/sdk/accuracy-scorers.ts

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -37,28 +37,37 @@ export function parameterMatchingAccuracyScorer(
3737
return 1;
3838
}
3939

40-
const toolCallScores: number[] = [];
41-
const checkedToolCallIds = new Set<string>();
40+
const usedActualIndexes = new Set<number>();
41+
const scores: number[] = [];
4242

43-
for (const expectedToolCall of expectedToolCalls) {
44-
const matchingActualToolCall = actualToolCalls.find(
45-
(actualToolCall) =>
46-
actualToolCall.toolName === expectedToolCall.toolName &&
47-
!checkedToolCallIds.has(actualToolCall.toolCallId)
48-
);
43+
for (const expectedCall of expectedToolCalls) {
44+
// Find all unmatched actual tool calls with the same tool name
45+
const candidates = actualToolCalls
46+
.map((call, index) => ({ call, index }))
47+
.filter(({ call, index }) => !usedActualIndexes.has(index) && call.toolName === expectedCall.toolName);
4948

50-
if (!matchingActualToolCall) {
51-
toolCallScores.push(0);
49+
if (candidates.length === 0) {
50+
scores.push(0);
5251
continue;
5352
}
5453

55-
checkedToolCallIds.add(matchingActualToolCall.toolCallId);
56-
const score = compareParams(expectedToolCall.parameters, matchingActualToolCall.parameters);
57-
toolCallScores.push(score);
54+
// Pick the candidate with the best parameter match
55+
let bestScore = -1;
56+
let bestIndex = -1;
57+
for (const { call, index } of candidates) {
58+
const score = compareParams(expectedCall.parameters, call.parameters);
59+
if (score > bestScore) {
60+
bestScore = score;
61+
bestIndex = index;
62+
}
63+
}
64+
65+
usedActualIndexes.add(bestIndex);
66+
scores.push(bestScore);
5867
}
5968

60-
const totalScore = toolCallScores.reduce((sum, score) => sum + score, 0);
61-
return totalScore / toolCallScores.length;
69+
const totalScore = scores.reduce((sum, score) => sum + score, 0);
70+
return totalScore / scores.length;
6271
}
6372

6473
/**

0 commit comments

Comments
 (0)