Skip to content

Commit 27dbcca

Browse files
author
raidendotai
committed
ts-cua sample added
1 parent 37f7f2c commit 27dbcca

File tree

16 files changed

+895
-1
lines changed

16 files changed

+895
-1
lines changed

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ create-kernel-app [app-name] [options]
4747
- `stagehand`: Template with Stagehand SDK (Typescript only)
4848
- `advanced-sample`: Implements sample apps using advanced Kernel configs
4949
- `computer-use`: Implements a prompt loop using Anthropic Computer Use
50+
- `cua-sample`: Implements a Computer Use Agent (OpenAI CUA) sample (Typescript only)
5051

5152
### Examples
5253

@@ -121,6 +122,9 @@ kernel invoke python-basic get-page-title --payload '{"url": "https://www.google
121122

122123
# Python + Browser Use
123124
kernel invoke python-bu bu-task --payload '{"task": "Compare the price of gpt-4o and DeepSeek-V3"}'
125+
126+
# Typescript + CUA Sample
127+
kernel invoke ts-cua agent-run --payload '{"query": "open hackernews and get the top 5 articles"}'
124128
```
125129

126130
## Sample apps reference
@@ -134,6 +138,7 @@ These are the sample apps currently available when you run `npx @onkernel/create
134138
| **stagehand** | Returns the first result of a specified Google search | Stagehand | `{ query }` |
135139
| **advanced-sample** | Implements sample apps using advanced Kernel configs | n/a |
136140
| **computer-use** | Implements a prompt loop | Anthropic Computer Use API | `{ query }` |
141+
| **cua-sample** | Implements the OpenAI Computer Using Agent (CUA) | OpenAI CUA | `{ query }` |
137142

138143
## Documentation
139144

index.ts

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ type TemplateKey =
1818
| "browser-use"
1919
| "stagehand"
2020
| "advanced-sample"
21-
| "computer-use";
21+
| "computer-use"
22+
| "cua-sample";
2223
type LanguageInfo = { name: string; shorthand: string };
2324
type TemplateInfo = {
2425
name: string;
@@ -34,6 +35,7 @@ const TEMPLATE_BROWSER_USE = "browser-use";
3435
const TEMPLATE_STAGEHAND = "stagehand";
3536
const TEMPLATE_ADVANCED_SAMPLE = "advanced-sample";
3637
const TEMPLATE_COMPUTER_USE = "computer-use";
38+
const TEMPLATE_CUA_SAMPLE = "cua-sample";
3739
const LANGUAGE_SHORTHAND_TS = "ts";
3840
const LANGUAGE_SHORTHAND_PY = "py";
3941

@@ -73,6 +75,11 @@ const TEMPLATES: Record<TemplateKey, TemplateInfo> = {
7375
description: "Implements the Anthropic Computer Use SDK",
7476
languages: [LANGUAGE_TYPESCRIPT, LANGUAGE_PYTHON],
7577
},
78+
[TEMPLATE_CUA_SAMPLE]: {
79+
name: "CUA Sample",
80+
description: "Implements a Computer Use Agent (OpenAI CUA) sample",
81+
languages: [LANGUAGE_TYPESCRIPT],
82+
},
7683
};
7784

7885
const INVOKE_SAMPLES: Record<
@@ -88,6 +95,8 @@ const INVOKE_SAMPLES: Record<
8895
'kernel invoke ts-advanced test-captcha-solver',
8996
[TEMPLATE_COMPUTER_USE]:
9097
'kernel invoke ts-cu cu-task --payload \'{"query": "Return the first url of a search result for NYC restaurant reviews Pete Wells"}\'',
98+
[TEMPLATE_CUA_SAMPLE]:
99+
'kernel invoke ts-cua cua-task --payload \'{"query": "open hackernews and get the top 5 articles"}\'',
91100
},
92101
[LANGUAGE_PYTHON]: {
93102
[TEMPLATE_SAMPLE_APP]:
@@ -114,6 +123,8 @@ const REGISTERED_APP_NAMES: Record<
114123
'ts-advanced',
115124
[TEMPLATE_COMPUTER_USE]:
116125
'ts-cu',
126+
[TEMPLATE_CUA_SAMPLE]:
127+
'ts-cua',
117128
},
118129
[LANGUAGE_PYTHON]: {
119130
[TEMPLATE_SAMPLE_APP]:
@@ -354,6 +365,8 @@ function printNextSteps(
354365
? "kernel deploy index.ts --env OPENAI_API_KEY=XXX"
355366
: language === LANGUAGE_TYPESCRIPT && template === TEMPLATE_COMPUTER_USE
356367
? "kernel deploy index.ts --env ANTHROPIC_API_KEY=XXX"
368+
: language === LANGUAGE_TYPESCRIPT && template === TEMPLATE_CUA_SAMPLE
369+
? "kernel deploy index.ts --env OPENAI_API_KEY=XXX"
357370
: language === LANGUAGE_PYTHON && (template === TEMPLATE_SAMPLE_APP || template === TEMPLATE_ADVANCED_SAMPLE)
358371
? "kernel deploy main.py"
359372
: language === LANGUAGE_PYTHON && template === TEMPLATE_BROWSER_USE
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
node_modules
2+
bun.lockb
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
node_modules
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"tabWidth": 1,
3+
"useTabs": true
4+
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Kernel Typescript Sample App - CUA
2+
3+
This is a Kernel application that demonstrates using the Computer Using Agent (CUA) from OpenAI.
4+
5+
It generally follows the [OpenAI CUA Sample App Reference](https://github.com/openai/openai-cua-sample-app) and uses Playwright via Kernel for browser automation.
6+
Also makes use of the latest OpenAI SDK format.
7+
8+
See the [docs](https://docs.onkernel.com/quickstart) for information.
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
// @ts-nocheck
2+
3+
import "dotenv/config";
4+
import { Kernel, type KernelContext } from "@onkernel/sdk";
5+
import { chromium } from "playwright";
6+
import { Agent } from "./lib/agent";
7+
import computers from "./lib/computers";
8+
9+
const kernel = new Kernel();
10+
const app = kernel.app("ts-cua");
11+
12+
// LLM API Keys are set in the environment during `kernel deploy <filename> -e ANTHROPIC_API_KEY=XXX`
13+
// See https://docs.onkernel.com/launch/deploy#environment-variables
14+
if (!process.env.OPENAI_API_KEY) throw new Error('OPENAI_API_KEY is not set');
15+
16+
/**
17+
* Example app that run an agent using openai CUA
18+
* Args:
19+
* ctx: Kernel context containing invocation information
20+
* payload: An object with a `query` property
21+
* Returns:
22+
* An answer to the query, elapsed time and optionally the messages stack
23+
* Invoke this via CLI:
24+
* export KERNEL_API_KEY=<your_api_key>
25+
* kernel deploy index.ts -e OPENAI_API_KEY=XXXXX --force
26+
* kernel invoke ts-cua agent-run -p "{\"query\":\"current market price range for a used dreamcast\"}"
27+
* kernel logs ts-cua -f # Open in separate tab
28+
*/
29+
30+
interface CuaInput {
31+
query: string;
32+
}
33+
34+
interface CuaOutput {
35+
elapsed: number;
36+
response: Array<object>;
37+
answer: object;
38+
}
39+
40+
app.action<CuaInput, CuaOutput>(
41+
"agent-run",
42+
async (ctx: KernelContext, payload?: CuaInput): Promise<CuaOutput> => {
43+
const startTime = Date.now();
44+
const kernelBrowser = await kernel.browsers.create({
45+
invocation_id: ctx.invocation_id,
46+
});
47+
console.log(
48+
"> Kernel browser live view url: ",
49+
kernelBrowser.browser_live_view_url,
50+
);
51+
52+
try {
53+
54+
// kernel browser
55+
const { computer } = await computers.create({
56+
type: "kernel",
57+
cdp_ws_url: kernelBrowser.cdp_ws_url,
58+
});
59+
60+
// setup agent
61+
const agent = new Agent(
62+
"computer-use-preview",
63+
computer,
64+
[], // additional tools
65+
(message: string) => {
66+
console.log(`> safety check: ${message}`);
67+
return true; // Auto-acknowledge all safety checks for testing
68+
},
69+
);
70+
71+
// start agent run
72+
const response = await agent.runFullTurn(
73+
[
74+
{
75+
role: "system",
76+
content: `- Current date and time: ${new Date().toISOString()} (${new Date().toLocaleDateString("en-US", { weekday: "long" })})`,
77+
},
78+
{
79+
type: "message",
80+
role: "user",
81+
content: [
82+
{
83+
type: "input_text",
84+
text: payload.query,
85+
// text: "go to https://news.ycombinator.com , open top article , describe the target website design (in yaml format)"
86+
},
87+
],
88+
},
89+
],
90+
true, // print_steps
91+
true, // debug
92+
false, // show_images
93+
);
94+
95+
console.log("> agent run done");
96+
97+
const endTime = Date.now();
98+
const timeElapsed = (endTime - startTime) / 1000; // Convert to seconds
99+
100+
return {
101+
// response, // full messages stack trace
102+
elapsed: parseFloat(timeElapsed.toFixed(2)),
103+
answer: response?.slice(-1)?.[0]?.content?.[0]?.text ?? null,
104+
};
105+
} finally {
106+
// Note: KernelPlaywrightComputer handles browser cleanup internally
107+
// No need to manually close browser here
108+
}
109+
},
110+
);

0 commit comments

Comments
 (0)