-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Expand file tree
/
Copy pathgemini-cli-runner.ts
More file actions
210 lines (187 loc) · 6.12 KB
/
gemini-cli-runner.ts
File metadata and controls
210 lines (187 loc) · 6.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
import { mkdirSync, writeFileSync, readFileSync } from "node:fs";
import path from "node:path";
import { InteractiveCLI, poll } from "./interactive-cli.js";
import { AgentTestRunner } from "./agent-test-runner.js";
import {
ParsedToolLog,
getToolName,
toolArgumentsMatch,
getToolArgumentsDebug,
} from "./tool-matcher.js";
import fs from "fs";
import { throwFailure } from "./logging.js";
const READY_PROMPT = "Type your message";
interface ParsedTelemetryLog {
attributes?: {
"event.name"?: string;
function_name?: string;
function_args?: string;
success?: boolean;
duration_ms?: number;
};
scopeMetrics?: {
metrics: {
descriptor: {
name: string;
};
}[];
}[];
}
export class GeminiCliRunner implements AgentTestRunner {
private readonly cli: InteractiveCLI;
private readonly telemetryPath: string;
private readonly telemetryTimeout = 15000;
// Determines which tools to start from for this turn so we don't detect tool
// calls from previous turns
private turnToolIndex = 0;
constructor(
private readonly testName: string,
testDir: string,
runDir: string,
) {
// Create a settings file to point the CLI to a local telemetry log
this.telemetryPath = path.join(testDir, "telemetry.log");
const settings = {
general: {
disableAutoUpdate: true,
},
telemetry: {
enabled: true,
target: "local",
otlpEndpoint: "",
outfile: this.telemetryPath,
},
mcpServers: {
firebase: {
command: "firebase",
args: ["experimental:mcp"],
},
},
};
const geminiDir = path.join(runDir, ".gemini");
mkdirSync(geminiDir, { recursive: true });
writeFileSync(path.join(geminiDir, "settings.json"), JSON.stringify(settings, null, 2));
this.cli = new InteractiveCLI("gemini", ["--yolo"], {
cwd: runDir,
readyPrompt: READY_PROMPT,
showOutput: true,
});
}
async waitForReadyPrompt(): Promise<void> {
return this.cli.waitForReadyPrompt();
}
async type(text: string): Promise<void> {
const toolLogs = this.readToolLogs();
this.turnToolIndex = toolLogs.length;
return this.cli.type(text);
}
async expectText(text: string | RegExp): Promise<void> {
return this.cli.expectText(text);
}
async exit(): Promise<void> {
await this.cli.kill();
}
/**
* Reads the agent's telemetry file and looks for the given event. Throws if
* the event is not found
*/
async expectToolCalls(tools: string[]): Promise<void> {
await this.waitForTelemetryReady();
// We still need to poll because telemetry can take time to write each turn
let messages: string[] = [];
const success = await poll(() => {
messages = [];
let allSucceeded = true;
// Start at this.turnToolIndex so we only read the tools used this turn
const toolLogs = this.readToolLogs().slice(this.turnToolIndex);
const foundToolNames = toolLogs.map((log) => log.name);
for (const toolDef of tools) {
const toolName = getToolName(toolDef);
const matchingTool = toolLogs.find((log) => log.name === toolName);
if (!matchingTool) {
messages.push(
`Did not find expected tool call: "${toolName}" in the telemetry log. Found [${foundToolNames}]`,
);
allSucceeded = false;
} else {
const foundMatchingArguments = toolLogs.some(
(log) => log.name === toolName && toolArgumentsMatch(toolDef, log),
);
if (!foundMatchingArguments) {
messages.push(
`Tool arguments matcher "${getToolArgumentsDebug(toolDef)}" for "${toolName}" did not match any tool results in the telemetry log. All tools are: [${JSON.stringify(toolLogs)}]`,
);
allSucceeded = false;
}
}
}
return allSucceeded;
}, this.telemetryTimeout);
if (!success) {
throwFailure(messages.join("\n"));
}
}
// Implementation for this is borrowed from the Gemini CLI's test-helper
private async waitForTelemetryReady() {
// Wait for telemetry file to exist and have content
await poll(() => {
if (!fs.existsSync(this.telemetryPath)) return false;
try {
const content = readFileSync(this.telemetryPath, "utf-8");
// Check if file has at lease one event in it
return content.includes('"event.name"');
} catch {
return false;
}
}, this.telemetryTimeout);
}
// Implementation for this is borrowed from the Gemini CLI's test-helper
private readToolLogs(): ParsedToolLog[] {
const parsedLogs = this.readAndParseTelemetryLog();
const logs: ParsedToolLog[] = [];
for (const logData of parsedLogs) {
// Look for tool call logs
if (
logData.attributes?.function_name &&
logData.attributes["event.name"] === "gemini_cli.tool_call"
) {
logs.push({
name: logData.attributes.function_name,
args: logData.attributes.function_args ?? "{}",
success: logData.attributes.success ?? false,
duration_ms: logData.attributes.duration_ms ?? 0,
});
}
}
return logs;
}
// Implementation for this is borrowed from the Gemini CLI's test-helper
private readAndParseTelemetryLog(): ParsedTelemetryLog[] {
const logFilePath = this.telemetryPath;
if (!logFilePath || !fs.existsSync(logFilePath)) {
return [];
}
const content = readFileSync(logFilePath, "utf-8");
// Split the content into individual JSON objects
// They are separated by "}\n{"
const jsonObjects = content
.split(/}\n{/)
.map((obj, index, array) => {
// Add back the braces we removed during split
if (index > 0) obj = "{" + obj;
if (index < array.length - 1) obj = obj + "}";
return obj.trim();
})
.filter((obj) => obj);
const logs: ParsedTelemetryLog[] = [];
for (const jsonStr of jsonObjects) {
try {
const logData = JSON.parse(jsonStr);
logs.push(logData);
} catch (e) {
// Skip objects that aren't valid JSON
}
}
return logs;
}
}