Skip to content

Commit cc51343

Browse files
committed
feat(create-kernel-app): Add Gemini CUA template
Co-authored-by: null <>
1 parent 7a08da2 commit cc51343

File tree

6 files changed

+265
-2
lines changed

6 files changed

+265
-2
lines changed

index.ts

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ type TemplateKey =
2020
| "advanced-sample"
2121
| "computer-use"
2222
| "cua"
23-
| "magnitude";
23+
| "magnitude"
24+
| "gemini-cua";
2425
type LanguageInfo = { name: string; shorthand: string };
2526
type TemplateInfo = {
2627
name: string;
@@ -38,6 +39,7 @@ const TEMPLATE_ADVANCED_SAMPLE = "advanced-sample";
3839
const TEMPLATE_COMPUTER_USE = "computer-use";
3940
const TEMPLATE_CUA = "cua";
4041
const TEMPLATE_MAGNITUDE = "magnitude";
42+
const TEMPLATE_GEMINI_CUA = "gemini-cua";
4143
const LANGUAGE_SHORTHAND_TS = "ts";
4244
const LANGUAGE_SHORTHAND_PY = "py";
4345

@@ -86,6 +88,11 @@ const TEMPLATES: Record<TemplateKey, TemplateInfo> = {
8688
description: "Implements the Magnitude.run SDK",
8789
languages: [LANGUAGE_TYPESCRIPT],
8890
},
91+
[TEMPLATE_GEMINI_CUA]: {
92+
name: "Gemini Computer Use",
93+
description: "Implements Gemini 2.5 Computer Use Agent with Stagehand",
94+
languages: [LANGUAGE_TYPESCRIPT],
95+
},
8996
};
9097

9198
const INVOKE_SAMPLES: Record<
@@ -104,6 +111,8 @@ const INVOKE_SAMPLES: Record<
104111
'kernel invoke ts-cua cua-task --payload \'{"task": "Go to https://news.ycombinator.com and get the top 5 articles"}\'',
105112
[TEMPLATE_MAGNITUDE]:
106113
'kernel invoke ts-magnitude mag-url-extract --payload \'{"url": "https://en.wikipedia.org/wiki/Special:Random"}\'',
114+
[TEMPLATE_GEMINI_CUA]:
115+
'kernel invoke ts-gemini-cua gemini-cua-task',
107116
},
108117
[LANGUAGE_PYTHON]: {
109118
[TEMPLATE_SAMPLE_APP]:
@@ -130,6 +139,7 @@ const REGISTERED_APP_NAMES: Record<
130139
[TEMPLATE_COMPUTER_USE]: "ts-cu",
131140
[TEMPLATE_CUA]: "ts-cua",
132141
[TEMPLATE_MAGNITUDE]: "ts-magnitude",
142+
[TEMPLATE_GEMINI_CUA]: "ts-gemini-cua",
133143
},
134144
[LANGUAGE_PYTHON]: {
135145
[TEMPLATE_SAMPLE_APP]: "python-basic",
@@ -372,6 +382,8 @@ function printNextSteps(
372382
? "kernel deploy index.ts --env ANTHROPIC_API_KEY=XXX"
373383
: language === LANGUAGE_TYPESCRIPT && template === TEMPLATE_CUA
374384
? "kernel deploy index.ts --env OPENAI_API_KEY=XXX"
385+
: language === LANGUAGE_TYPESCRIPT && template === TEMPLATE_GEMINI_CUA
386+
? "kernel deploy index.ts --env GOOGLE_API_KEY=XXX --env OPENAI_API_KEY=XXX"
375387
: language === LANGUAGE_PYTHON &&
376388
(template === TEMPLATE_SAMPLE_APP ||
377389
template === TEMPLATE_ADVANCED_SAMPLE)
@@ -415,7 +427,7 @@ program
415427
)
416428
.option(
417429
"-t, --template <template>",
418-
`Template type (${TEMPLATE_SAMPLE_APP}, ${TEMPLATE_BROWSER_USE}, ${TEMPLATE_STAGEHAND}, ${TEMPLATE_ADVANCED_SAMPLE}, ${TEMPLATE_COMPUTER_USE}, ${TEMPLATE_CUA}, ${TEMPLATE_MAGNITUDE})`
430+
`Template type (${TEMPLATE_SAMPLE_APP}, ${TEMPLATE_BROWSER_USE}, ${TEMPLATE_STAGEHAND}, ${TEMPLATE_ADVANCED_SAMPLE}, ${TEMPLATE_COMPUTER_USE}, ${TEMPLATE_CUA}, ${TEMPLATE_MAGNITUDE}, ${TEMPLATE_GEMINI_CUA})`
419431
)
420432
.action(
421433
async (
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# Kernel TypeScript SDK + Stagehand + Gemini Computer Use Agent
2+
3+
A Kernel application that demonstrates Computer Use Agent (CUA) capabilities using Google's Gemini 2.5 model with Stagehand for browser automation.
4+
5+
## What It Does
6+
7+
This app uses [Gemini 2.5's computer use model](https://blog.google/technology/google-deepmind/gemini-computer-use-model/) capabilities to autonomously navigate websites and complete tasks. The example task searches for Kernel's company page on YCombinator and writes a blog post about their product.
8+
9+
## Setup
10+
11+
1. **Add your API keys as environment variables:**
12+
- `KERNEL_API_KEY` - Get from [Kernel dashboard](https://dashboard.onkernel.com/sign-in)
13+
- `GOOGLE_API_KEY` - Get from [Google AI Studio](https://aistudio.google.com/apikey)
14+
- `OPENAI_API_KEY` - Get from [OpenAI platform](https://platform.openai.com/api-keys)
15+
16+
## Running Locally
17+
18+
Execute the script directly with tsx:
19+
20+
```bash
21+
npx tsx index.ts
22+
```
23+
24+
This runs the agent without a Kernel invocation context and provides the browser live view URL for debugging.
25+
26+
## Deploying to Kernel
27+
28+
1. **Deploy the application:**
29+
```bash
30+
kernel deploy index.ts --env GOOGLE_API_KEY=XXX --env OPENAI_API_KEY=XXX
31+
```
32+
33+
2. **Invoke the action:**
34+
```bash
35+
kernel invoke ts-gemini-cua gemini-cua-task
36+
```
37+
38+
The action creates a Kernel-managed browser and associates it with the invocation for tracking and monitoring.
39+
40+
## Documentation
41+
42+
- [Kernel Documentation](https://docs.onkernel.com/quickstart)
43+
- [Kernel Stagehand Guide](https://www.onkernel.com/docs/integrations/stagehand)
44+
- [Gemini 2.5 Computer Use](https://blog.google/technology/google-deepmind/gemini-computer-use-model/)
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Dependencies
2+
node_modules/
3+
package-lock.json
4+
5+
# TypeScript
6+
*.tsbuildinfo
7+
dist/
8+
build/
9+
10+
# Environment
11+
.env
12+
.env.local
13+
.env.*.local
14+
15+
# IDE
16+
.vscode/
17+
.idea/
18+
*.swp
19+
*.swo
20+
21+
# OS
22+
.DS_Store
23+
Thumbs.db
24+
25+
# Logs
26+
logs/
27+
*.log
28+
npm-debug.log*
29+
yarn-debug.log*
30+
yarn-error.log*
31+
32+
# Testing
33+
coverage/
34+
.nyc_output/
35+
36+
# Misc
37+
.cache/
38+
.temp/
39+
.tmp/
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
import { Stagehand } from "@browserbasehq/stagehand";
2+
import client, { Kernel, type KernelContext } from '@onkernel/sdk';
3+
4+
const kernel = new Kernel({
5+
apiKey: process.env.KERNEL_API_KEY
6+
});
7+
8+
const app = kernel.app('ts-gemini-cua');
9+
10+
interface SearchQueryOutput {
11+
success: boolean;
12+
result: string;
13+
}
14+
15+
// API Keys for LLM providers
16+
// - GOOGLE_API_KEY: Required for Gemini 2.5 Computer Use Agent
17+
// - OPENAI_API_KEY: Required for Stagehand's GPT-4o model
18+
// Set via environment variables or `kernel deploy <filename> --env-file .env`
19+
// See https://docs.onkernel.com/launch/deploy#environment-variables
20+
const GOOGLE_API_KEY = process.env.GOOGLE_API_KEY;
21+
const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
22+
23+
if (!OPENAI_API_KEY) {
24+
throw new Error('OPENAI_API_KEY is not set');
25+
}
26+
27+
if (!GOOGLE_API_KEY) {
28+
throw new Error('GOOGLE_API_KEY is not set');
29+
}
30+
31+
async function runStagehandTask(invocationId?: string): Promise<SearchQueryOutput> {
32+
// Executes a Computer Use Agent (CUA) task using Gemini 2.5 and Stagehand
33+
//
34+
// This function supports dual execution modes:
35+
// - Action Handler Mode: Called with invocation_id from Kernel app action context
36+
// - Local Mode: Called without invocation_id for direct script execution
37+
//
38+
// Args:
39+
// invocationId: Optional Kernel invocation ID to associate browser with action
40+
//
41+
// App Actions Returns:
42+
// SearchQueryOutput: Success status and result message from the agent
43+
// Local Execution Returns:
44+
// Logs the result of the agent execution
45+
46+
const browserOptions = invocationId
47+
? { invocation_id: invocationId, stealth: true }
48+
: { stealth: true };
49+
50+
const kernelBrowser = await kernel.browsers.create(browserOptions);
51+
52+
console.log("Kernel browser live view url: ", kernelBrowser.browser_live_view_url);
53+
54+
const stagehand = new Stagehand({
55+
env: "LOCAL",
56+
verbose: 1,
57+
domSettleTimeoutMs: 30_000,
58+
modelName: "gpt-4o",
59+
modelClientOptions: {
60+
apiKey: OPENAI_API_KEY
61+
},
62+
localBrowserLaunchOptions: {
63+
cdpUrl: kernelBrowser.cdp_ws_url
64+
}
65+
});
66+
await stagehand.init();
67+
68+
/////////////////////////////////////
69+
// Your Stagehand implementation here
70+
/////////////////////////////////////
71+
try {
72+
const page = stagehand.page;
73+
74+
const agent = stagehand.agent({
75+
provider: "google",
76+
model: "gemini-2.5-computer-use-preview-10-2025",
77+
instructions: `You are a helpful assistant that can use a web browser.
78+
You are currently on the following page: ${page.url()}.
79+
Do not ask follow up questions, the user will trust your judgement.`,
80+
options: {
81+
apiKey: GOOGLE_API_KEY,
82+
}
83+
});
84+
85+
// Navigate to YCombinator's website
86+
await page.goto("https://www.ycombinator.com/companies");
87+
88+
// Define the instructions for the CUA agent
89+
const instruction = "Find Kernel's company page on the YCombinator website and write a blog post about their product offering.";
90+
91+
// Execute the instruction
92+
const result = await agent.execute({
93+
instruction,
94+
maxSteps: 20,
95+
});
96+
97+
console.log("result: ", result);
98+
99+
console.log("Deleting browser and closing stagehand...");
100+
await stagehand.close();
101+
await kernel.browsers.deleteByID(kernelBrowser.session_id);
102+
return { success: true, result: result.message };
103+
} catch (error) {
104+
console.error(error);
105+
console.log("Deleting browser and closing stagehand...");
106+
await stagehand.close();
107+
await kernel.browsers.deleteByID(kernelBrowser.session_id);
108+
return { success: false, result: "" };
109+
}
110+
}
111+
112+
// Register Kernel action handler for remote invocation
113+
// Invoked via: kernel invoke ts-gemini-cua gemini-cua-task
114+
app.action<void, SearchQueryOutput>(
115+
'gemini-cua-task',
116+
async (ctx: KernelContext): Promise<SearchQueryOutput> => {
117+
return runStagehandTask(ctx.invocation_id);
118+
},
119+
);
120+
121+
// Run locally if executed directly (not imported as a module)
122+
// Execute via: npx tsx index.ts
123+
if (import.meta.url === `file://${process.argv[1]}`) {
124+
runStagehandTask().then(result => {
125+
console.log('Local execution result:', result);
126+
process.exit(result.success ? 0 : 1);
127+
}).catch(error => {
128+
console.error('Local execution failed:', error);
129+
process.exit(1);
130+
});
131+
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
"name": "ts-gemini-cua",
3+
"module": "index.ts",
4+
"type": "module",
5+
"private": true,
6+
"peerDependencies": {
7+
"typescript": "^5"
8+
},
9+
"dependencies": {
10+
"@browserbasehq/stagehand": "^2.5.2",
11+
"@onkernel/sdk": "^0.14.0",
12+
"zod": "^3.25.7"
13+
}
14+
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
{
2+
"compilerOptions": {
3+
"lib": ["ESNext", "DOM"],
4+
"target": "ESNext",
5+
"module": "ESNext",
6+
"moduleDetection": "force",
7+
"jsx": "react-jsx",
8+
"allowJs": true,
9+
"moduleResolution": "bundler",
10+
"allowImportingTsExtensions": true,
11+
"verbatimModuleSyntax": true,
12+
"noEmit": true,
13+
"strict": true,
14+
"skipLibCheck": true,
15+
"noFallthroughCasesInSwitch": true,
16+
"noUncheckedIndexedAccess": true,
17+
"noUnusedLocals": false,
18+
"noUnusedParameters": false,
19+
"noPropertyAccessFromIndexSignature": false
20+
},
21+
"include": ["./**/*.ts", "./**/*.tsx"],
22+
"exclude": ["node_modules", "dist"]
23+
}

0 commit comments

Comments
 (0)