Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ type TemplateKey =
| "advanced-sample"
| "computer-use"
| "cua"
| "magnitude";
| "magnitude"
| "gemini-cua";
type LanguageInfo = { name: string; shorthand: string };
type TemplateInfo = {
name: string;
Expand All @@ -38,6 +39,7 @@ const TEMPLATE_ADVANCED_SAMPLE = "advanced-sample";
const TEMPLATE_COMPUTER_USE = "computer-use";
const TEMPLATE_CUA = "cua";
const TEMPLATE_MAGNITUDE = "magnitude";
const TEMPLATE_GEMINI_CUA = "gemini-cua";
const LANGUAGE_SHORTHAND_TS = "ts";
const LANGUAGE_SHORTHAND_PY = "py";

Expand Down Expand Up @@ -86,6 +88,11 @@ const TEMPLATES: Record<TemplateKey, TemplateInfo> = {
description: "Implements the Magnitude.run SDK",
languages: [LANGUAGE_TYPESCRIPT],
},
[TEMPLATE_GEMINI_CUA]: {
name: "Gemini Computer Use",
description: "Implements Gemini 2.5 Computer Use Agent",
languages: [LANGUAGE_TYPESCRIPT],
},
};

const INVOKE_SAMPLES: Record<
Expand All @@ -104,6 +111,8 @@ const INVOKE_SAMPLES: Record<
'kernel invoke ts-cua cua-task --payload \'{"task": "Go to https://news.ycombinator.com and get the top 5 articles"}\'',
[TEMPLATE_MAGNITUDE]:
'kernel invoke ts-magnitude mag-url-extract --payload \'{"url": "https://en.wikipedia.org/wiki/Special:Random"}\'',
[TEMPLATE_GEMINI_CUA]:
'kernel invoke ts-gemini-cua gemini-cua-task',
},
[LANGUAGE_PYTHON]: {
[TEMPLATE_SAMPLE_APP]:
Expand All @@ -130,6 +139,7 @@ const REGISTERED_APP_NAMES: Record<
[TEMPLATE_COMPUTER_USE]: "ts-cu",
[TEMPLATE_CUA]: "ts-cua",
[TEMPLATE_MAGNITUDE]: "ts-magnitude",
[TEMPLATE_GEMINI_CUA]: "ts-gemini-cua",
},
[LANGUAGE_PYTHON]: {
[TEMPLATE_SAMPLE_APP]: "python-basic",
Expand Down Expand Up @@ -372,6 +382,8 @@ function printNextSteps(
? "kernel deploy index.ts --env ANTHROPIC_API_KEY=XXX"
: language === LANGUAGE_TYPESCRIPT && template === TEMPLATE_CUA
? "kernel deploy index.ts --env OPENAI_API_KEY=XXX"
: language === LANGUAGE_TYPESCRIPT && template === TEMPLATE_GEMINI_CUA
? "kernel deploy index.ts --env GOOGLE_API_KEY=XXX --env OPENAI_API_KEY=XXX"
: language === LANGUAGE_PYTHON &&
(template === TEMPLATE_SAMPLE_APP ||
template === TEMPLATE_ADVANCED_SAMPLE)
Expand Down Expand Up @@ -415,7 +427,7 @@ program
)
.option(
"-t, --template <template>",
`Template type (${TEMPLATE_SAMPLE_APP}, ${TEMPLATE_BROWSER_USE}, ${TEMPLATE_STAGEHAND}, ${TEMPLATE_ADVANCED_SAMPLE}, ${TEMPLATE_COMPUTER_USE}, ${TEMPLATE_CUA}, ${TEMPLATE_MAGNITUDE})`
`Template type (${TEMPLATE_SAMPLE_APP}, ${TEMPLATE_BROWSER_USE}, ${TEMPLATE_STAGEHAND}, ${TEMPLATE_ADVANCED_SAMPLE}, ${TEMPLATE_COMPUTER_USE}, ${TEMPLATE_CUA}, ${TEMPLATE_MAGNITUDE}, ${TEMPLATE_GEMINI_CUA})`
)
.action(
async (
Expand Down
44 changes: 44 additions & 0 deletions templates/typescript/gemini-cua/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Kernel TypeScript SDK + Stagehand + Gemini Computer Use Agent

A Kernel application that demonstrates Computer Use Agent (CUA) capabilities using Google's Gemini 2.5 model with Stagehand for browser automation.

## What It Does

This app uses [Gemini 2.5's computer use model](https://blog.google/technology/google-deepmind/gemini-computer-use-model/) capabilities to autonomously navigate websites and complete tasks. The example task searches for Kernel's company page on YCombinator and writes a blog post about their product.

## Setup

1. **Add your API keys as environment variables:**
- `KERNEL_API_KEY` - Get from [Kernel dashboard](https://dashboard.onkernel.com/sign-in)
- `GOOGLE_API_KEY` - Get from [Google AI Studio](https://aistudio.google.com/apikey)
- `OPENAI_API_KEY` - Get from [OpenAI platform](https://platform.openai.com/api-keys)

## Running Locally

Execute the script directly with tsx:

```bash
npx tsx index.ts
```

This runs the agent without a Kernel invocation context and provides the browser live view URL for debugging.

## Deploying to Kernel

1. **Deploy the application:**
```bash
kernel deploy index.ts --env GOOGLE_API_KEY=XXX --env OPENAI_API_KEY=XXX
```

2. **Invoke the action:**
```bash
kernel invoke ts-gemini-cua gemini-cua-task
```

The action creates a Kernel-managed browser and associates it with the invocation for tracking and monitoring.

## Documentation

- [Kernel Documentation](https://docs.onkernel.com/quickstart)
- [Kernel Stagehand Guide](https://www.onkernel.com/docs/integrations/stagehand)
- [Gemini 2.5 Computer Use](https://blog.google/technology/google-deepmind/gemini-computer-use-model/)
39 changes: 39 additions & 0 deletions templates/typescript/gemini-cua/_gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Dependencies
node_modules/
package-lock.json

# TypeScript
*.tsbuildinfo
dist/
build/

# Environment
.env
.env.local
.env.*.local

# IDE
.vscode/
.idea/
*.swp
*.swo

# OS
.DS_Store
Thumbs.db

# Logs
logs/
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*

# Testing
coverage/
.nyc_output/

# Misc
.cache/
.temp/
.tmp/
125 changes: 125 additions & 0 deletions templates/typescript/gemini-cua/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import { Stagehand } from "@browserbasehq/stagehand";
import { Kernel, type KernelContext } from '@onkernel/sdk';

const kernel = new Kernel({
apiKey: process.env.KERNEL_API_KEY
});

const app = kernel.app('ts-gemini-cua');

interface SearchQueryOutput {
success: boolean;
result: string;
error?: string;
}

// API Keys for LLM providers
// - GOOGLE_API_KEY: Required for Gemini 2.5 Computer Use Agent
// - OPENAI_API_KEY: Required for Stagehand's GPT-4o model
// Set via environment variables or `kernel deploy <filename> --env-file .env`
// See https://docs.onkernel.com/launch/deploy#environment-variables
const GOOGLE_API_KEY = process.env.GOOGLE_API_KEY;
const OPENAI_API_KEY = process.env.OPENAI_API_KEY;

if (!OPENAI_API_KEY) {
throw new Error('OPENAI_API_KEY is not set');
}

if (!GOOGLE_API_KEY) {
throw new Error('GOOGLE_API_KEY is not set');
}

async function runStagehandTask(invocationId?: string): Promise<SearchQueryOutput> {
// Executes a Computer Use Agent (CUA) task using Gemini 2.5 and Stagehand

const browserOptions = {
stealth: true,
// viewport: {
// width: 1440,
// height: 900,
// refresh_rate: 25
// },
...(invocationId && { invocation_id: invocationId })
};

const kernelBrowser = await kernel.browsers.create(browserOptions);

console.log("Kernel browser live view url: ", kernelBrowser.browser_live_view_url);

const stagehand = new Stagehand({
env: "LOCAL",
verbose: 1,
domSettleTimeoutMs: 30_000,
modelName: "gpt-4o",
modelClientOptions: {
apiKey: OPENAI_API_KEY
},
localBrowserLaunchOptions: {
cdpUrl: kernelBrowser.cdp_ws_url
}
});
await stagehand.init();

/////////////////////////////////////
// Your Stagehand implementation here
/////////////////////////////////////
try {
const page = stagehand.page;

const agent = stagehand.agent({
provider: "google",
model: "gemini-2.5-computer-use-preview-10-2025",
instructions: `You are a helpful assistant that can use a web browser.
You are currently on the following page: ${page.url()}.
Do not ask follow up questions, the user will trust your judgement.`,
options: {
apiKey: GOOGLE_API_KEY,
}
});

// Navigate to YCombinator's website
await page.goto("https://www.ycombinator.com/companies");

// Define the instructions for the CUA agent
const instruction = "Find Kernel's company page on the YCombinator website and write a blog post about their product offering.";

// Execute the instruction
const result = await agent.execute({
instruction,
maxSteps: 20,
});

console.log("result: ", result);

return { success: true, result: result.message };
} catch (error) {
console.error(error);
const errorMessage = error instanceof Error ? error.message : String(error);
return { success: false, result: "", error: errorMessage };
} finally {
console.log("Deleting browser and closing stagehand...");
await stagehand.close();
await kernel.browsers.deleteByID(kernelBrowser.session_id);
}
}

// Register Kernel action handler for remote invocation
// Invoked via: kernel invoke ts-gemini-cua gemini-cua-task
app.action<void, SearchQueryOutput>(
'gemini-cua-task',
async (ctx: KernelContext): Promise<SearchQueryOutput> => {
return runStagehandTask(ctx.invocation_id);
},
);

// Run locally if executed directly (not imported as a module)
// Execute via: npx tsx index.ts
if (import.meta.url === `file://${process.argv[1]}`) {
runStagehandTask().then(result => {
console.log('Local execution result:', result);
process.exit(result.success ? 0 : 1);
}).catch(error => {
console.error('Local execution failed:', error);
process.exit(1);
});
}
14 changes: 14 additions & 0 deletions templates/typescript/gemini-cua/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"name": "ts-gemini-cua",
"module": "index.ts",
"type": "module",
"private": true,
"peerDependencies": {
"typescript": "^5"
},
"dependencies": {
"@browserbasehq/stagehand": "^2.5.2",
"@onkernel/sdk": "^0.15.0",
"zod": "^3.25.67"
}
}
23 changes: 23 additions & 0 deletions templates/typescript/gemini-cua/tsconfig.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"compilerOptions": {
"lib": ["ESNext", "DOM"],
"target": "ESNext",
"module": "ESNext",
"moduleDetection": "force",
"jsx": "react-jsx",
"allowJs": true,
"moduleResolution": "bundler",
"allowImportingTsExtensions": true,
"verbatimModuleSyntax": true,
"noEmit": true,
"strict": true,
"skipLibCheck": true,
"noFallthroughCasesInSwitch": true,
"noUncheckedIndexedAccess": true,
"noUnusedLocals": false,
"noUnusedParameters": false,
"noPropertyAccessFromIndexSignature": false
},
"include": ["./**/*.ts", "./**/*.tsx"],
"exclude": ["node_modules", "dist"]
}