-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathassertions.test.ts
More file actions
137 lines (122 loc) · 4.03 KB
/
assertions.test.ts
File metadata and controls
137 lines (122 loc) · 4.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import { mkdtemp, rm, writeFile } from "node:fs/promises";
import { tmpdir } from "node:os";
import path from "node:path";
import { afterEach, beforeEach, describe, expect, it } from "vitest";
import { evaluateAssertions } from "../../agent/assertions.js";
import type { AgentAssertion } from "../../agent/types.js";
let workspaceDir: string;
beforeEach(async () => {
workspaceDir = await mkdtemp(path.join(tmpdir(), "eval-assert-"));
});
afterEach(async () => {
await rm(workspaceDir, { recursive: true, force: true });
});
describe("evaluateAssertions", () => {
it("contains: passes when text includes value", async () => {
const results = await evaluateAssertions(
"hello world",
[{ type: "contains", value: "hello" }],
workspaceDir,
);
expect(results).toHaveLength(1);
expect(results[0]?.passed).toBe(true);
});
it("contains: fails when text does not include value", async () => {
const results = await evaluateAssertions(
"hello world",
[{ type: "contains", value: "goodbye" }],
workspaceDir,
);
expect(results[0]?.passed).toBe(false);
});
it("not_contains: passes when text does not include value", async () => {
const results = await evaluateAssertions(
"hello world",
[{ type: "not_contains", value: "goodbye" }],
workspaceDir,
);
expect(results[0]?.passed).toBe(true);
});
it("not_contains: fails when text includes value", async () => {
const results = await evaluateAssertions(
"hello world",
[{ type: "not_contains", value: "hello" }],
workspaceDir,
);
expect(results[0]?.passed).toBe(false);
});
it("matches: passes with matching regex", async () => {
const results = await evaluateAssertions(
"Error code: 404",
[{ type: "matches", pattern: "\\d{3}" }],
workspaceDir,
);
expect(results[0]?.passed).toBe(true);
});
it("matches: supports flags", async () => {
const results = await evaluateAssertions(
"Hello World",
[{ type: "matches", pattern: "hello", flags: "i" }],
workspaceDir,
);
expect(results[0]?.passed).toBe(true);
});
it("matches: fails when pattern does not match", async () => {
const results = await evaluateAssertions(
"hello",
[{ type: "matches", pattern: "^\\d+$" }],
workspaceDir,
);
expect(results[0]?.passed).toBe(false);
});
it("script: passes when command exits 0", async () => {
const results = await evaluateAssertions(
"",
[{ type: "script", command: "exit 0", name: "trivial" }],
workspaceDir,
);
expect(results[0]?.passed).toBe(true);
});
it("script: fails when command exits non-zero", async () => {
const results = await evaluateAssertions(
"",
[{ type: "script", command: "exit 1", name: "failing" }],
workspaceDir,
);
expect(results[0]?.passed).toBe(false);
});
it("script: runs in workspace directory", async () => {
await writeFile(path.join(workspaceDir, "marker.txt"), "found");
const results = await evaluateAssertions(
"",
[{ type: "script", command: "test -f marker.txt", name: "cwd-check" }],
workspaceDir,
);
expect(results[0]?.passed).toBe(true);
});
it("script: captures stderr in failure message", async () => {
const results = await evaluateAssertions(
"",
[
{
type: "script",
command: "echo 'bad stuff' >&2; exit 1",
name: "stderr-capture",
},
],
workspaceDir,
);
expect(results[0]?.passed).toBe(false);
expect(results[0]?.message).toContain("bad stuff");
});
it("evaluates multiple assertions in order", async () => {
const assertions: AgentAssertion[] = [
{ type: "contains", value: "dub" },
{ type: "not_contains", value: "error" },
{ type: "script", command: "exit 0", name: "ok" },
];
const results = await evaluateAssertions("using dub sdk", assertions, workspaceDir);
expect(results).toHaveLength(3);
expect(results.every((r) => r.passed)).toBe(true);
});
});