Skip to content

Commit d3081d3

Browse files
committed
Fixed bug in output where it double reported
1 parent c03c3ce commit d3081d3

File tree

2 files changed

+28
-4
lines changed

2 files changed

+28
-4
lines changed

src/testing/evals/runner.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ export class EvalTestRunner {
169169
// Validate tool call success for required tools
170170
if (test.expected_tool_calls?.required && conversationResult.success) {
171171
const toolSuccessErrors = this.validateToolCallSuccess(
172+
conversationResult.toolCalls,
172173
conversationResult.toolResults,
173174
test.expected_tool_calls.required
174175
);
@@ -268,12 +269,19 @@ export class EvalTestRunner {
268269
}
269270

270271
private validateToolCallSuccess(
272+
toolCalls: ToolCall<string, Record<string, unknown>>[],
271273
toolResults: ToolResult<string, Record<string, unknown>, unknown>[],
272274
requiredTools: string[]
273275
): string[] {
274276
const errors: string[] = [];
277+
const calledToolNames = toolCalls.map(call => call.toolName);
275278

276279
for (const requiredTool of requiredTools) {
280+
// Only validate success if the tool was actually called
281+
if (!calledToolNames.includes(requiredTool)) {
282+
continue; // Skip - this case is already handled by validateToolCalls
283+
}
284+
277285
// Find results for this required tool
278286
const resultsForTool = toolResults.filter(tr => tr.toolName === requiredTool);
279287

test/unit/tool-validation.test.ts

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,15 @@ describe('Tool Call Validation', () => {
119119
// Test the validateToolCallSuccess method
120120
const validateToolCallSuccess = (runner as any).validateToolCallSuccess.bind(runner);
121121

122-
// Mock tool results with error
122+
// Mock tool calls and results with error
123+
const toolCalls = [
124+
{
125+
toolCallId: '1',
126+
toolName: 'run_flow_files',
127+
args: {},
128+
},
129+
];
130+
123131
const toolResults = [
124132
{
125133
toolCallId: '1',
@@ -134,7 +142,7 @@ describe('Tool Call Validation', () => {
134142

135143
const requiredTools = ['run_flow_files'];
136144

137-
const errors = validateToolCallSuccess(toolResults, requiredTools);
145+
const errors = validateToolCallSuccess(toolCalls, toolResults, requiredTools);
138146

139147
// Should produce an error because the required tool failed
140148
expect(errors.length).toBeGreaterThan(0);
@@ -159,7 +167,15 @@ describe('Tool Call Validation', () => {
159167
// Test the validateToolCallSuccess method
160168
const validateToolCallSuccess = (runner as any).validateToolCallSuccess.bind(runner);
161169

162-
// Mock tool results with success
170+
// Mock tool calls and results with success
171+
const toolCalls = [
172+
{
173+
toolCallId: '1',
174+
toolName: 'run_flow_files',
175+
args: {},
176+
},
177+
];
178+
163179
const toolResults = [
164180
{
165181
toolCallId: '1',
@@ -174,7 +190,7 @@ describe('Tool Call Validation', () => {
174190

175191
const requiredTools = ['run_flow_files'];
176192

177-
const errors = validateToolCallSuccess(toolResults, requiredTools);
193+
const errors = validateToolCallSuccess(toolCalls, toolResults, requiredTools);
178194

179195
// Should not produce any errors
180196
expect(errors).toEqual([]);

0 commit comments

Comments
 (0)