Skip to content

Commit e68bf6d

Browse files
author
raidendotai
committed
added python-cua + updated readme & cli
1 parent b0504ef commit e68bf6d

File tree

19 files changed

+710
-9
lines changed

19 files changed

+710
-9
lines changed

README.md

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ create-kernel-app [app-name] [options]
4747
- `stagehand`: Template with Stagehand SDK (Typescript only)
4848
- `advanced-sample`: Implements sample apps using advanced Kernel configs
4949
- `computer-use`: Implements a prompt loop using Anthropic Computer Use
50-
- `cua`: Implements a Computer Use Agent (OpenAI CUA) sample (Typescript only)
50+
- `cua`: Implements a Computer Use Agent (OpenAI CUA) sample
5151

5252
### Examples
5353

@@ -124,7 +124,10 @@ kernel invoke python-basic get-page-title --payload '{"url": "https://www.google
124124
kernel invoke python-bu bu-task --payload '{"task": "Compare the price of gpt-4o and DeepSeek-V3"}'
125125

126126
# Typescript + CUA Sample
127-
kernel invoke ts-cua cua-task --payload '{"task": "open hackernews and get the top 5 articles"}'
127+
kernel invoke ts-cua cua-task --payload '{"task": "Go to https://news.ycombinator.com and get the top 5 articles"}'
128+
129+
# Python + CUA Sample
130+
kernel invoke python-cua cua-task --payload '{"task": "Get current market price range for an unboxed Dreamcast"}'
128131
```
129132

130133
## Sample apps reference

index.ts

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ const TEMPLATE_BROWSER_USE = "browser-use";
3535
const TEMPLATE_STAGEHAND = "stagehand";
3636
const TEMPLATE_ADVANCED_SAMPLE = "advanced-sample";
3737
const TEMPLATE_COMPUTER_USE = "computer-use";
38-
const TEMPLATE_CUA_SAMPLE = "cua";
38+
const TEMPLATE_CUA = "cua";
3939
const LANGUAGE_SHORTHAND_TS = "ts";
4040
const LANGUAGE_SHORTHAND_PY = "py";
4141

@@ -75,10 +75,10 @@ const TEMPLATES: Record<TemplateKey, TemplateInfo> = {
7575
description: "Implements the Anthropic Computer Use SDK",
7676
languages: [LANGUAGE_TYPESCRIPT, LANGUAGE_PYTHON],
7777
},
78-
[TEMPLATE_CUA_SAMPLE]: {
78+
[TEMPLATE_CUA]: {
7979
name: "CUA Sample",
8080
description: "Implements a Computer Use Agent (OpenAI CUA) sample",
81-
languages: [LANGUAGE_TYPESCRIPT],
81+
languages: [LANGUAGE_TYPESCRIPT, LANGUAGE_PYTHON],
8282
},
8383
};
8484

@@ -95,8 +95,8 @@ const INVOKE_SAMPLES: Record<
9595
'kernel invoke ts-advanced test-captcha-solver',
9696
[TEMPLATE_COMPUTER_USE]:
9797
'kernel invoke ts-cu cu-task --payload \'{"query": "Return the first url of a search result for NYC restaurant reviews Pete Wells"}\'',
98-
[TEMPLATE_CUA_SAMPLE]:
99-
'kernel invoke ts-cua cua-task --payload \'{"query": "open hackernews and get the top 5 articles"}\'',
98+
[TEMPLATE_CUA]:
99+
'kernel invoke ts-cua cua-task --payload \'{"query": "Go to https://news.ycombinator.com and get the top 5 articles"}\'',
100100
},
101101
[LANGUAGE_PYTHON]: {
102102
[TEMPLATE_SAMPLE_APP]:
@@ -107,6 +107,8 @@ const INVOKE_SAMPLES: Record<
107107
'kernel invoke python-advanced test-captcha-solver',
108108
[TEMPLATE_COMPUTER_USE]:
109109
'kernel invoke python-cu cu-task --payload \'{"query": "Return the first url of a search result for NYC restaurant reviews Pete Wells"}\'',
110+
[TEMPLATE_CUA]:
111+
'kernel invoke python-cua cua-task --payload \'{"query": "Go to https://news.ycombinator.com and get the top 5 articles"}\'',
110112
},
111113
};
112114

@@ -123,7 +125,7 @@ const REGISTERED_APP_NAMES: Record<
123125
'ts-advanced',
124126
[TEMPLATE_COMPUTER_USE]:
125127
'ts-cu',
126-
[TEMPLATE_CUA_SAMPLE]:
128+
[TEMPLATE_CUA]:
127129
'ts-cua',
128130
},
129131
[LANGUAGE_PYTHON]: {
@@ -135,6 +137,8 @@ const REGISTERED_APP_NAMES: Record<
135137
'python-advanced',
136138
[TEMPLATE_COMPUTER_USE]:
137139
'python-cu',
140+
[TEMPLATE_CUA]:
141+
'python-cua',
138142
},
139143
};
140144

@@ -365,14 +369,16 @@ function printNextSteps(
365369
? "kernel deploy index.ts --env OPENAI_API_KEY=XXX"
366370
: language === LANGUAGE_TYPESCRIPT && template === TEMPLATE_COMPUTER_USE
367371
? "kernel deploy index.ts --env ANTHROPIC_API_KEY=XXX"
368-
: language === LANGUAGE_TYPESCRIPT && template === TEMPLATE_CUA_SAMPLE
372+
: language === LANGUAGE_TYPESCRIPT && template === TEMPLATE_CUA
369373
? "kernel deploy index.ts --env OPENAI_API_KEY=XXX"
370374
: language === LANGUAGE_PYTHON && (template === TEMPLATE_SAMPLE_APP || template === TEMPLATE_ADVANCED_SAMPLE)
371375
? "kernel deploy main.py"
372376
: language === LANGUAGE_PYTHON && template === TEMPLATE_BROWSER_USE
373377
? "kernel deploy main.py --env OPENAI_API_KEY=XXX"
374378
: language === LANGUAGE_PYTHON && template === TEMPLATE_COMPUTER_USE
375379
? "kernel deploy main.py --env ANTHROPIC_API_KEY=XXX"
380+
: language === LANGUAGE_PYTHON && template === TEMPLATE_CUA
381+
? "kernel deploy main.py --env OPENAI_API_KEY=XXX"
376382
: "";
377383

378384
console.log(

templates/python/cua/README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Kernel Python Sample App - CUA
2+
3+
This is a Kernel application that demonstrates using the Computer Using Agent (CUA) from OpenAI.
4+
5+
It generally follows the [OpenAI CUA Sample App Reference](https://github.com/openai/openai-cua-sample-app) and uses Playwright via Kernel for browser automation.
6+
7+
See the [docs](https://docs.onkernel.com/quickstart) for more information.

templates/python/cua/__init__.py

Whitespace-only changes.

templates/python/cua/_gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
__pycache__/
2+
.env
3+
.venv/
4+
env/
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .agent import Agent
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
from computers import Computer
2+
from utils import (
3+
create_response,
4+
show_image,
5+
pp,
6+
sanitize_message,
7+
check_blocklisted_url,
8+
)
9+
import json
10+
from typing import Callable
11+
12+
13+
class Agent:
14+
"""
15+
A sample agent class that can be used to interact with a computer.
16+
17+
(See simple_cua_loop.py for a simple example without an agent.)
18+
"""
19+
20+
def __init__(
21+
self,
22+
model="computer-use-preview",
23+
computer: Computer = None,
24+
tools: list[dict] = [],
25+
acknowledge_safety_check_callback: Callable = lambda message: False,
26+
):
27+
self.model = model
28+
self.computer = computer
29+
self.tools = tools
30+
self.print_steps = True
31+
self.debug = False
32+
self.show_images = False
33+
self.acknowledge_safety_check_callback = acknowledge_safety_check_callback
34+
35+
if computer:
36+
dimensions = computer.get_dimensions()
37+
self.tools += [
38+
{
39+
"type": "computer-preview",
40+
"display_width": dimensions[0],
41+
"display_height": dimensions[1],
42+
"environment": computer.get_environment(),
43+
},
44+
{
45+
"type": "function",
46+
"name": "back",
47+
"description": "Go back to the previous page.",
48+
"parameters": {},
49+
},
50+
{
51+
"type": "function",
52+
"name": "goto",
53+
"description": "Go to a specific URL.",
54+
"parameters": {
55+
"type": "object",
56+
"properties": {
57+
"url": {
58+
"type": "string",
59+
"description": "Fully qualified URL to navigate to.",
60+
},
61+
},
62+
"additionalProperties": False,
63+
"required": ["url"],
64+
},
65+
},
66+
{
67+
"type": "function",
68+
"name": "forward",
69+
"description": "Go forward to the next page.",
70+
"parameters": {},
71+
},
72+
]
73+
74+
def debug_print(self, *args):
75+
if self.debug:
76+
pp(*args)
77+
78+
def handle_item(self, item):
79+
"""Handle each item; may cause a computer action + screenshot."""
80+
if item["type"] == "message":
81+
if self.print_steps:
82+
print(item["content"][0]["text"])
83+
84+
if item["type"] == "function_call":
85+
name, args = item["name"], json.loads(item["arguments"])
86+
if self.print_steps:
87+
print(f"{name}({args})")
88+
89+
if hasattr(self.computer, name): # if function exists on computer, call it
90+
method = getattr(self.computer, name)
91+
method(**args)
92+
return [
93+
{
94+
"type": "function_call_output",
95+
"call_id": item["call_id"],
96+
"output": "success", # hard-coded output for demo
97+
}
98+
]
99+
100+
if item["type"] == "computer_call":
101+
action = item["action"]
102+
action_type = action["type"]
103+
action_args = {k: v for k, v in action.items() if k != "type"}
104+
if self.print_steps:
105+
print(f"{action_type}({action_args})")
106+
107+
method = getattr(self.computer, action_type)
108+
method(**action_args)
109+
110+
screenshot_base64 = self.computer.screenshot()
111+
if self.show_images:
112+
show_image(screenshot_base64)
113+
114+
# if user doesn't ack all safety checks exit with error
115+
pending_checks = item.get("pending_safety_checks", [])
116+
for check in pending_checks:
117+
message = check["message"]
118+
if not self.acknowledge_safety_check_callback(message):
119+
raise ValueError(
120+
f"Safety check failed: {message}. Cannot continue with unacknowledged safety checks."
121+
)
122+
123+
call_output = {
124+
"type": "computer_call_output",
125+
"call_id": item["call_id"],
126+
"acknowledged_safety_checks": pending_checks,
127+
"output": {
128+
"type": "input_image",
129+
"image_url": f"data:image/png;base64,{screenshot_base64}",
130+
},
131+
}
132+
133+
# additional URL safety checks for browser environments
134+
if self.computer.get_environment() == "browser":
135+
current_url = self.computer.get_current_url()
136+
check_blocklisted_url(current_url)
137+
call_output["output"]["current_url"] = current_url
138+
139+
return [call_output]
140+
return []
141+
142+
def run_full_turn(
143+
self, input_items, print_steps=True, debug=False, show_images=False
144+
):
145+
self.print_steps = print_steps
146+
self.debug = debug
147+
self.show_images = show_images
148+
new_items = []
149+
150+
# keep looping until we get a final response
151+
while new_items[-1].get("role") != "assistant" if new_items else True:
152+
self.debug_print([sanitize_message(msg) for msg in input_items + new_items])
153+
154+
response = create_response(
155+
model=self.model,
156+
input=input_items + new_items,
157+
tools=self.tools,
158+
truncation="auto",
159+
)
160+
self.debug_print(response)
161+
162+
if "output" not in response and self.debug:
163+
print(response)
164+
raise ValueError("No output from model")
165+
else:
166+
new_items += response["output"]
167+
for item in response["output"]:
168+
new_items += self.handle_item(item)
169+
170+
return new_items
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
from . import default
2+
from . import contrib
3+
from .computer import Computer
4+
from .config import computers_config
5+
6+
__all__ = [
7+
"default",
8+
"contrib",
9+
"Computer",
10+
"computers_config",
11+
]
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
from typing import Protocol, List, Literal, Dict
2+
3+
4+
class Computer(Protocol):
5+
"""Defines the 'shape' (methods/properties) our loop expects."""
6+
7+
def get_environment(self) -> Literal["windows", "mac", "linux", "browser"]: ...
8+
9+
def get_dimensions(self) -> tuple[int, int]: ...
10+
11+
def screenshot(self) -> str: ...
12+
13+
def click(self, x: int, y: int, button: str = "left") -> None: ...
14+
15+
def double_click(self, x: int, y: int) -> None: ...
16+
17+
def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: ...
18+
19+
def type(self, text: str) -> None: ...
20+
21+
def wait(self, ms: int = 1000) -> None: ...
22+
23+
def move(self, x: int, y: int) -> None: ...
24+
25+
def keypress(self, keys: List[str]) -> None: ...
26+
27+
def drag(self, path: List[Dict[str, int]]) -> None: ...
28+
29+
def get_current_url() -> str: ...
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
from .default import *
2+
from .contrib import *
3+
4+
computers_config = {
5+
"local-playwright": LocalPlaywrightBrowser,
6+
"kernel": KernelPlaywrightBrowser,
7+
}

0 commit comments

Comments
 (0)