Skip to content

Commit 6c2b10c

Browse files
committed
fix: propagate eval failures in CLI exit code
1 parent 84bef76 commit 6c2b10c

File tree

2 files changed

+279
-2
lines changed

2 files changed

+279
-2
lines changed

packages/ai/src/evals/run-vitest.ts

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,24 @@ const printCollectedEvals = (result: TestRunResult, rootDir: string) => {
5050
console.log(c.bold(`Total: ${totalEvals} evaluations, ${totalCases} test cases\n`));
5151
};
5252

53+
const hasVitestFailures = (
54+
result: TestRunResult,
55+
failedTestCount: number,
56+
dangerouslyIgnoreUnhandledErrors: boolean,
57+
): boolean => {
58+
if (failedTestCount > 0) {
59+
return true;
60+
}
61+
62+
if (result.unhandledErrors.length > 0 && !dangerouslyIgnoreUnhandledErrors) {
63+
return true;
64+
}
65+
66+
return result.testModules.some(
67+
(testModule) => testModule.state() === 'failed' || !testModule.ok(),
68+
);
69+
};
70+
5371
export const runVitest = async (
5472
dir: string,
5573
opts: {
@@ -153,7 +171,7 @@ export const runVitest = async (
153171
}
154172

155173
// Start collection and execution
156-
await vi.start();
174+
const result = await vi.start();
157175

158176
// After execution, check if validation failed
159177
if (existsSync(abortFile)) {
@@ -166,10 +184,17 @@ export const runVitest = async (
166184
const dispose = registerConsoleShortcuts(vi, process.stdin, process.stdout);
167185

168186
if (!vi.shouldKeepServer()) {
187+
const exitCode = hasVitestFailures(
188+
result,
189+
vi.state.getCountOfFailedTests(),
190+
vi.config.dangerouslyIgnoreUnhandledErrors,
191+
)
192+
? 1
193+
: 0;
169194
dispose();
170195
await flush();
171196
await vi.close();
172-
process.exit(0);
197+
process.exit(exitCode);
173198
}
174199

175200
await flush();
Lines changed: 252 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,252 @@
1+
import { writeFileSync } from 'node:fs';
2+
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
3+
import type { ResolvedAxiomConfig } from '../../src/config/index';
4+
5+
const mocks = vi.hoisted(() => ({
6+
createVitest: vi.fn(),
7+
registerConsoleShortcuts: vi.fn(),
8+
flush: vi.fn(),
9+
initInstrumentation: vi.fn(),
10+
setAxiomConfig: vi.fn(),
11+
}));
12+
13+
vi.mock('vitest/node', () => ({
14+
createVitest: mocks.createVitest,
15+
registerConsoleShortcuts: mocks.registerConsoleShortcuts,
16+
}));
17+
18+
vi.mock('../../src/evals/instrument', () => ({
19+
flush: mocks.flush,
20+
initInstrumentation: mocks.initInstrumentation,
21+
}));
22+
23+
vi.mock('../../src/evals/context/storage', () => ({
24+
setAxiomConfig: mocks.setAxiomConfig,
25+
}));
26+
27+
import { runVitest } from '../../src/evals/run-vitest';
28+
29+
type MockTestModule = {
30+
ok: () => boolean;
31+
state: () => string;
32+
};
33+
34+
type MockVitestInstance = {
35+
collect: ReturnType<typeof vi.fn>;
36+
close: ReturnType<typeof vi.fn>;
37+
shouldKeepServer: ReturnType<typeof vi.fn>;
38+
start: ReturnType<typeof vi.fn>;
39+
config: {
40+
dangerouslyIgnoreUnhandledErrors: boolean;
41+
};
42+
state: {
43+
getCountOfFailedTests: ReturnType<typeof vi.fn>;
44+
};
45+
};
46+
47+
const resolvedConfig: ResolvedAxiomConfig = {
48+
eval: {
49+
url: 'https://api.axiom.co',
50+
edgeUrl: 'https://api.axiom.co',
51+
token: 'axiom-token',
52+
dataset: 'axiom-dataset',
53+
orgId: '',
54+
flagSchema: null,
55+
instrumentation: null,
56+
timeoutMs: 60_000,
57+
include: ['**/*.eval.ts'],
58+
exclude: ['**/node_modules/**'],
59+
},
60+
};
61+
62+
const baseOptions = {
63+
watch: false,
64+
include: ['**/*.eval.ts'],
65+
config: resolvedConfig,
66+
runId: 'RUNVITESTTEST',
67+
};
68+
69+
const createTestModule = (state = 'passed', ok = true): MockTestModule => ({
70+
ok: () => ok,
71+
state: () => state,
72+
});
73+
74+
const createVitestInstance = ({
75+
failedTests = 0,
76+
shouldKeepServer = false,
77+
dangerouslyIgnoreUnhandledErrors = false,
78+
startResult = {
79+
testModules: [createTestModule()],
80+
unhandledErrors: [],
81+
},
82+
collectResult = {
83+
testModules: [],
84+
unhandledErrors: [],
85+
},
86+
}: {
87+
failedTests?: number;
88+
shouldKeepServer?: boolean;
89+
dangerouslyIgnoreUnhandledErrors?: boolean;
90+
startResult?: {
91+
testModules: MockTestModule[];
92+
unhandledErrors: unknown[];
93+
};
94+
collectResult?: {
95+
testModules: MockTestModule[];
96+
unhandledErrors: unknown[];
97+
};
98+
} = {}): MockVitestInstance => ({
99+
collect: vi.fn().mockResolvedValue(collectResult),
100+
close: vi.fn().mockResolvedValue(undefined),
101+
shouldKeepServer: vi.fn(() => shouldKeepServer),
102+
start: vi.fn().mockResolvedValue(startResult),
103+
config: {
104+
dangerouslyIgnoreUnhandledErrors,
105+
},
106+
state: {
107+
getCountOfFailedTests: vi.fn(() => failedTests),
108+
},
109+
});
110+
111+
describe('runVitest', () => {
112+
let exitSpy: ReturnType<typeof vi.spyOn>;
113+
114+
beforeEach(() => {
115+
vi.clearAllMocks();
116+
mocks.flush.mockResolvedValue(undefined);
117+
mocks.initInstrumentation.mockResolvedValue(undefined);
118+
mocks.registerConsoleShortcuts.mockReturnValue(vi.fn());
119+
exitSpy = vi.spyOn(process, 'exit').mockImplementation(((code?: number) => {
120+
throw new Error(`process.exit:${code}`);
121+
}) as never);
122+
});
123+
124+
afterEach(() => {
125+
exitSpy.mockRestore();
126+
delete process.env.AXIOM_NAME_REGISTRY_FILE;
127+
delete process.env.AXIOM_ABORT_FILE;
128+
});
129+
130+
it('exits 0 after a successful non-watch run', async () => {
131+
const callOrder: string[] = [];
132+
const dispose = vi.fn(() => {
133+
callOrder.push('dispose');
134+
});
135+
const vitest = createVitestInstance();
136+
137+
mocks.createVitest.mockResolvedValue(vitest);
138+
mocks.registerConsoleShortcuts.mockReturnValue(dispose);
139+
mocks.flush.mockImplementation(async () => {
140+
callOrder.push('flush');
141+
});
142+
vitest.close.mockImplementation(async () => {
143+
callOrder.push('close');
144+
});
145+
146+
await expect(runVitest('.', baseOptions)).rejects.toThrow('process.exit:0');
147+
148+
expect(vitest.start).toHaveBeenCalledTimes(1);
149+
expect(vitest.state.getCountOfFailedTests).toHaveBeenCalledTimes(1);
150+
expect(dispose).toHaveBeenCalledTimes(1);
151+
expect(callOrder).toEqual(['dispose', 'flush', 'close']);
152+
});
153+
154+
it('exits 1 when Vitest reports failed tests', async () => {
155+
const vitest = createVitestInstance({ failedTests: 2 });
156+
157+
mocks.createVitest.mockResolvedValue(vitest);
158+
159+
await expect(runVitest('.', baseOptions)).rejects.toThrow('process.exit:1');
160+
161+
expect(vitest.start).toHaveBeenCalledTimes(1);
162+
expect(mocks.flush).toHaveBeenCalledTimes(1);
163+
expect(vitest.close).toHaveBeenCalledTimes(1);
164+
});
165+
166+
it('exits 1 when Vitest reports unhandled errors', async () => {
167+
const vitest = createVitestInstance({
168+
startResult: {
169+
testModules: [createTestModule()],
170+
unhandledErrors: [new Error('boom')],
171+
},
172+
});
173+
174+
mocks.createVitest.mockResolvedValue(vitest);
175+
176+
await expect(runVitest('.', baseOptions)).rejects.toThrow('process.exit:1');
177+
178+
expect(vitest.state.getCountOfFailedTests).toHaveBeenCalledTimes(1);
179+
expect(mocks.flush).toHaveBeenCalledTimes(1);
180+
expect(vitest.close).toHaveBeenCalledTimes(1);
181+
});
182+
183+
it('exits 0 when unhandled errors are ignored by Vitest config', async () => {
184+
const vitest = createVitestInstance({
185+
dangerouslyIgnoreUnhandledErrors: true,
186+
startResult: {
187+
testModules: [createTestModule()],
188+
unhandledErrors: [new Error('boom')],
189+
},
190+
});
191+
192+
mocks.createVitest.mockResolvedValue(vitest);
193+
194+
await expect(runVitest('.', baseOptions)).rejects.toThrow('process.exit:0');
195+
196+
expect(vitest.state.getCountOfFailedTests).toHaveBeenCalledTimes(1);
197+
expect(mocks.flush).toHaveBeenCalledTimes(1);
198+
expect(vitest.close).toHaveBeenCalledTimes(1);
199+
});
200+
201+
it('exits 1 when a test module fails outside failed test counting', async () => {
202+
const vitest = createVitestInstance({
203+
startResult: {
204+
testModules: [createTestModule('failed', false)],
205+
unhandledErrors: [],
206+
},
207+
});
208+
209+
mocks.createVitest.mockResolvedValue(vitest);
210+
211+
await expect(runVitest('.', baseOptions)).rejects.toThrow('process.exit:1');
212+
213+
expect(vitest.state.getCountOfFailedTests).toHaveBeenCalledTimes(1);
214+
});
215+
216+
it('keeps list mode exiting 0 without starting tests', async () => {
217+
const vitest = createVitestInstance();
218+
219+
mocks.createVitest.mockResolvedValue(vitest);
220+
221+
await expect(runVitest('.', { ...baseOptions, list: true })).rejects.toThrow('process.exit:0');
222+
223+
expect(vitest.collect).toHaveBeenCalledTimes(1);
224+
expect(vitest.start).not.toHaveBeenCalled();
225+
expect(mocks.registerConsoleShortcuts).not.toHaveBeenCalled();
226+
expect(mocks.flush).not.toHaveBeenCalled();
227+
});
228+
229+
it('preserves validation abort failures with exit code 1', async () => {
230+
const vitest = createVitestInstance({
231+
startResult: {
232+
testModules: [createTestModule()],
233+
unhandledErrors: [],
234+
},
235+
});
236+
237+
mocks.createVitest.mockResolvedValue(vitest);
238+
vitest.start.mockImplementation(async () => {
239+
writeFileSync(process.env.AXIOM_ABORT_FILE!, 'validation failed', 'utf8');
240+
return {
241+
testModules: [createTestModule()],
242+
unhandledErrors: [],
243+
};
244+
});
245+
246+
await expect(runVitest('.', baseOptions)).rejects.toThrow('process.exit:1');
247+
248+
expect(mocks.registerConsoleShortcuts).not.toHaveBeenCalled();
249+
expect(mocks.flush).not.toHaveBeenCalled();
250+
expect(vitest.close).toHaveBeenCalledTimes(1);
251+
});
252+
});

0 commit comments

Comments
 (0)