Skip to content

Commit 04903b5

Browse files
authored
Implement Anthropic Computer Use as a sample app (#9)
* Replace xdotool and gnome-screenshot with playwright
1 parent c470fe1 commit 04903b5

File tree

18 files changed

+1184
-5
lines changed

18 files changed

+1184
-5
lines changed

README.md

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ create-kernel-app [app-name] [options]
3838
- `sample-app`: Basic template with Playwright integration
3939
- `browser-use`: Template with Browser Use SDK (Python only)
4040
- `stagehand`: Template with Stagehand SDK (Typescript only)
41+
- `persistent-browser`: Implements `sample-app` using a persistent browser
42+
- `computer-use`: Implements a prompt loop using Anthropic Computer Use
4143

4244
### Examples
4345

@@ -51,6 +53,11 @@ Create a Typescript application with Stagehand template:
5153
npx @onkernel/create-kernel-app my-app --language typescript --template stagehand
5254
```
5355

56+
Create a Typescript application with Computer Use template:
57+
```bash
58+
npx @onkernel/create-kernel-app my-app --language typescript --template computer-use
59+
```
60+
5461
Create a Python application with a sample app:
5562
```bash
5663
npx @onkernel/create-kernel-app my-app --language python --template sample-app
@@ -60,6 +67,7 @@ Create a Python application with Browser Use template:
6067
```bash
6168
npx @onkernel/create-kernel-app my-app --language python --template browser-use
6269
```
70+
```
6371
6472
## Next Steps
6573
@@ -82,7 +90,7 @@ export KERNEL_API_KEY=<YOUR_API_KEY>
8290
4. Deploy your application:
8391
```bash
8492
# Typscript
85-
kernel deploy index.ts # --env OPENAI_API_KEY=XXX if Stagehand
93+
kernel deploy index.ts # --env OPENAI_API_KEY=XXX if Stagehand; --env ANTHROPIC_API_KEY=XXX if Computer Use
8694

8795
# Python
8896
kernel deploy main.py # --env OPENAI_API_KEY=XXX if Browser Use
@@ -98,6 +106,9 @@ kernel invoke ts-basic get-page-title --payload '{"url": "https://www.google.com
98106
# Typescript + Stagehand
99107
kernel invoke ts-stagehand stagehand-task --payload '{"query": "Best wired earbuds"}'
100108

109+
# Typescript + Computer Use
110+
kernel invoke ts-cu cu-task --payload '{"query": "Search for the top 3 restaurants in NYC according to Pete Wells"}'
111+
101112
# Python + Sample App
102113
kernel invoke python-basic get-page-title --payload '{"url": "https://www.google.com"}'
103114

@@ -114,6 +125,8 @@ These are the sample apps currently available when you run `npx @onkernel/create
114125
| **sample-app** | Returns the page title of a specified URL | Playwright | `{ url }` |
115126
| **browser-use** | Completes a specified task | Browser Use | `{ task }` |
116127
| **stagehand** | Returns the first result of a specified Google search | Stagehand | `{ query }` |
128+
| **persistent-browser** | Implements `sample-app` using a persistent browser | Playwright | `{ url }` |
129+
| **computer-use** | Implements a prompt loop | Anthropic Computer Use API | `{ query }` |
117130

118131
## Documentation
119132

index.ts

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@ type TemplateKey =
1717
| "sample-app"
1818
| "browser-use"
1919
| "stagehand"
20-
| "persistent-browser";
20+
| "persistent-browser"
21+
| "computer-use";
2122
type LanguageInfo = { name: string; shorthand: string };
2223
type TemplateInfo = {
2324
name: string;
@@ -32,6 +33,7 @@ const TEMPLATE_SAMPLE_APP = "sample-app";
3233
const TEMPLATE_BROWSER_USE = "browser-use";
3334
const TEMPLATE_STAGEHAND = "stagehand";
3435
const TEMPLATE_PERSISTENT_BROWSER = "persistent-browser";
36+
const TEMPLATE_COMPUTER_USE = "computer-use";
3537
const LANGUAGE_SHORTHAND_TS = "ts";
3638
const LANGUAGE_SHORTHAND_PY = "py";
3739

@@ -66,6 +68,11 @@ const TEMPLATES: Record<TemplateKey, TemplateInfo> = {
6668
"Implements a persistent browser that maintains state across invocations",
6769
languages: [LANGUAGE_TYPESCRIPT],
6870
},
71+
[TEMPLATE_COMPUTER_USE]: {
72+
name: "Computer Use",
73+
description: "Implements the Anthropic Computer Use SDK",
74+
languages: [LANGUAGE_TYPESCRIPT],
75+
},
6976
};
7077

7178
const INVOKE_SAMPLES: Record<
@@ -79,6 +86,8 @@ const INVOKE_SAMPLES: Record<
7986
'kernel invoke ts-stagehand stagehand-task --payload \'{"query": "Best wired earbuds"}\'',
8087
[TEMPLATE_PERSISTENT_BROWSER]:
8188
'kernel invoke ts-persistent-browser persistent-browser-task --payload \'{"url": "https://news.ycombinator.com/"}\'',
89+
[TEMPLATE_COMPUTER_USE]:
90+
'kernel invoke ts-cu cu-task --payload \'{"query": "Return the first url of a search result for NYC restaurant reviews Pete Wells"}\'',
8291
},
8392
[LANGUAGE_PYTHON]: {
8493
[TEMPLATE_SAMPLE_APP]:
@@ -299,10 +308,12 @@ function printNextSteps(
299308
): void {
300309
// Determine which sample command to show based on language and template
301310
const deployCommand =
302-
language === LANGUAGE_TYPESCRIPT && template === TEMPLATE_SAMPLE_APP
311+
language === LANGUAGE_TYPESCRIPT && (template === TEMPLATE_SAMPLE_APP || template === TEMPLATE_PERSISTENT_BROWSER)
303312
? "kernel deploy index.ts"
304313
: language === LANGUAGE_TYPESCRIPT && template === TEMPLATE_STAGEHAND
305314
? "kernel deploy index.ts --env OPENAI_API_KEY=XXX"
315+
: language === LANGUAGE_TYPESCRIPT && template === TEMPLATE_COMPUTER_USE
316+
? "kernel deploy index.ts --env ANTHROPIC_API_KEY=XXX"
306317
: language === LANGUAGE_PYTHON && template === TEMPLATE_SAMPLE_APP
307318
? "kernel deploy main.py"
308319
: language === LANGUAGE_PYTHON && template === TEMPLATE_BROWSER_USE
@@ -341,7 +352,7 @@ program
341352
)
342353
.option(
343354
"-t, --template <template>",
344-
`Template type (${TEMPLATE_SAMPLE_APP}, ${TEMPLATE_BROWSER_USE}, ${TEMPLATE_STAGEHAND})`
355+
`Template type (${TEMPLATE_SAMPLE_APP}, ${TEMPLATE_BROWSER_USE}, ${TEMPLATE_STAGEHAND}, ${TEMPLATE_PERSISTENT_BROWSER}, ${TEMPLATE_COMPUTER_USE})`
345356
)
346357
.action(
347358
async (
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Dependencies
2+
node_modules/
3+
package-lock.json
4+
5+
# TypeScript
6+
*.tsbuildinfo
7+
dist/
8+
build/
9+
10+
# Environment
11+
.env
12+
.env.local
13+
.env.*.local
14+
15+
# IDE
16+
.vscode/
17+
.idea/
18+
*.swp
19+
*.swo
20+
21+
# OS
22+
.DS_Store
23+
Thumbs.db
24+
25+
# Logs
26+
logs/
27+
*.log
28+
npm-debug.log*
29+
yarn-debug.log*
30+
yarn-error.log*
31+
32+
# Testing
33+
coverage/
34+
.nyc_output/
35+
36+
# Misc
37+
.cache/
38+
.temp/
39+
.tmp/
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Kernel Typscript Sample App - Computer Use
2+
3+
This is a simple Kernel application that implements a prompt loop using Anthropic Computer Use.
4+
5+
It generally follows the [Anthropic Reference Implementation](https://github.com/anthropics/anthropic-quickstarts/tree/main/computer-use-demo) but replaces `xodotool` and `gnome-screenshot` with Playwright.
6+
7+
See the [docs](https://docs.onkernel.com/quickstart) for information.
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
import { Kernel, type KernelContext } from '@onkernel/sdk';
2+
import { samplingLoop } from './loop';
3+
import { chromium } from 'playwright';
4+
5+
const kernel = new Kernel();
6+
7+
const app = kernel.app('ts-cu');
8+
9+
interface QueryInput {
10+
query: string;
11+
}
12+
13+
interface QueryOutput {
14+
result: string;
15+
}
16+
17+
// LLM API Keys are set in the environment during `kernel deploy <filename> -e ANTHROPIC_API_KEY=XXX`
18+
// See https://docs.onkernel.com/launch/deploy#environment-variables
19+
const ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
20+
21+
if (!ANTHROPIC_API_KEY) {
22+
throw new Error('ANTHROPIC_API_KEY is not set');
23+
}
24+
25+
app.action<QueryInput, QueryOutput>(
26+
'cu-task',
27+
async (ctx: KernelContext, payload?: QueryInput): Promise<QueryOutput> => {
28+
if (!payload?.query) {
29+
throw new Error('Query is required');
30+
}
31+
32+
const kernelBrowser = await kernel.browsers.create({
33+
invocation_id: ctx.invocation_id,
34+
});
35+
36+
console.log("Kernel browser live view url: ", kernelBrowser.browser_live_view_url);
37+
38+
const browser = await chromium.connectOverCDP(kernelBrowser.cdp_ws_url);
39+
const context = await browser.contexts()[0];
40+
const page = await context?.pages()[0];
41+
if (!page) {
42+
throw new Error('Error getting initial page');
43+
}
44+
45+
try {
46+
// Run the sampling loop
47+
const finalMessages = await samplingLoop({
48+
model: 'claude-sonnet-4-20250514',
49+
messages: [{
50+
role: 'user',
51+
content: payload.query,
52+
}],
53+
apiKey: ANTHROPIC_API_KEY,
54+
thinkingBudget: 1024,
55+
playwrightPage: page,
56+
});
57+
58+
// Extract the final result from the messages
59+
if (finalMessages.length === 0) {
60+
throw new Error('No messages were generated during the sampling loop');
61+
}
62+
63+
const lastMessage = finalMessages[finalMessages.length - 1];
64+
if (!lastMessage) {
65+
throw new Error('Failed to get the last message from the sampling loop');
66+
}
67+
68+
const result = typeof lastMessage.content === 'string'
69+
? lastMessage.content
70+
: lastMessage.content.map(block =>
71+
block.type === 'text' ? block.text : ''
72+
).join('');
73+
74+
return { result };
75+
} catch (error) {
76+
console.error('Error in sampling loop:', error);
77+
throw error;
78+
} finally {
79+
await browser.close();
80+
}
81+
},
82+
);

0 commit comments

Comments
 (0)