raiden-staging
diff --git a/‎README.md‎
Lines changed: 5 additions & 2 deletions b/‎README.md‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎index.ts‎
Lines changed: 13 additions & 7 deletions b/‎index.ts‎
Lines changed: 13 additions & 7 deletions
diff --git a/‎templates/python/cua/README.md‎
Lines changed: 7 additions & 0 deletions b/‎templates/python/cua/README.md‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎templates/python/cua/__init__.py‎ b/‎templates/python/cua/__init__.py‎
diff --git a/‎templates/python/cua/_gitignore‎
Lines changed: 4 additions & 0 deletions b/‎templates/python/cua/_gitignore‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎templates/python/cua/agent/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎templates/python/cua/agent/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎templates/python/cua/agent/agent.py‎
Lines changed: 170 additions & 0 deletions b/‎templates/python/cua/agent/agent.py‎
Lines changed: 170 additions & 0 deletions
diff --git a/‎templates/python/cua/computers/__init__.py‎
Lines changed: 11 additions & 0 deletions b/‎templates/python/cua/computers/__init__.py‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎templates/python/cua/computers/computer.py‎
Lines changed: 29 additions & 0 deletions b/‎templates/python/cua/computers/computer.py‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎templates/python/cua/computers/config.py‎
Lines changed: 7 additions & 0 deletions b/‎templates/python/cua/computers/config.py‎
Lines changed: 7 additions & 0 deletions
@@ -47,7 +47,7 @@ create-kernel-app [app-name] [options]
   - `stagehand`: Template with Stagehand SDK (Typescript only)
   - `advanced-sample`: Implements sample apps using advanced Kernel configs
   - `computer-use`: Implements a prompt loop using Anthropic Computer Use
-  - `cua`: Implements a Computer Use Agent (OpenAI CUA) sample (Typescript only)
+  - `cua`: Implements a Computer Use Agent (OpenAI CUA) sample
 
 ### Examples
 
@@ -124,7 +124,10 @@ kernel invoke python-basic get-page-title --payload '{"url": "https://www.google
 kernel invoke python-bu bu-task --payload '{"task": "Compare the price of gpt-4o and DeepSeek-V3"}'
 
 # Typescript + CUA Sample
-kernel invoke ts-cua cua-task --payload '{"task": "open hackernews and get the top 5 articles"}'
+kernel invoke ts-cua cua-task --payload '{"task": "Go to https://news.ycombinator.com and get the top 5 articles"}'
+
+# Python + CUA Sample
+kernel invoke python-cua cua-task --payload '{"task": "Get current market price range for an unboxed Dreamcast"}'
 ```
 
 ## Sample apps reference
 
@@ -35,7 +35,7 @@ const TEMPLATE_BROWSER_USE = "browser-use";
 const TEMPLATE_STAGEHAND = "stagehand";
 const TEMPLATE_ADVANCED_SAMPLE = "advanced-sample";
 const TEMPLATE_COMPUTER_USE = "computer-use";
-const TEMPLATE_CUA_SAMPLE = "cua";
+const TEMPLATE_CUA = "cua";
 const LANGUAGE_SHORTHAND_TS = "ts";
 const LANGUAGE_SHORTHAND_PY = "py";
 
@@ -75,10 +75,10 @@ const TEMPLATES: Record<TemplateKey, TemplateInfo> = {
     description: "Implements the Anthropic Computer Use SDK",
     languages: [LANGUAGE_TYPESCRIPT, LANGUAGE_PYTHON],
   },
-  [TEMPLATE_CUA_SAMPLE]: {
+  [TEMPLATE_CUA]: {
     name: "CUA Sample",
     description: "Implements a Computer Use Agent (OpenAI CUA) sample",
-    languages: [LANGUAGE_TYPESCRIPT],
+    languages: [LANGUAGE_TYPESCRIPT, LANGUAGE_PYTHON],
   },
 };
 
@@ -95,8 +95,8 @@ const INVOKE_SAMPLES: Record<
       'kernel invoke ts-advanced test-captcha-solver',
     [TEMPLATE_COMPUTER_USE]:
       'kernel invoke ts-cu cu-task --payload \'{"query": "Return the first url of a search result for NYC restaurant reviews Pete Wells"}\'',
-    [TEMPLATE_CUA_SAMPLE]:
-      'kernel invoke ts-cua cua-task --payload \'{"query": "open hackernews and get the top 5 articles"}\'',
+    [TEMPLATE_CUA]:
+      'kernel invoke ts-cua cua-task --payload \'{"query": "Go to https://news.ycombinator.com and get the top 5 articles"}\'',
   },
   [LANGUAGE_PYTHON]: {
     [TEMPLATE_SAMPLE_APP]:
@@ -107,6 +107,8 @@ const INVOKE_SAMPLES: Record<
       'kernel invoke python-advanced test-captcha-solver',
     [TEMPLATE_COMPUTER_USE]:
       'kernel invoke python-cu cu-task --payload \'{"query": "Return the first url of a search result for NYC restaurant reviews Pete Wells"}\'',
+    [TEMPLATE_CUA]:
+      'kernel invoke python-cua cua-task --payload \'{"query": "Go to https://news.ycombinator.com and get the top 5 articles"}\'',
   },
 };
 
@@ -123,7 +125,7 @@ const REGISTERED_APP_NAMES: Record<
       'ts-advanced',
     [TEMPLATE_COMPUTER_USE]:
       'ts-cu',
-    [TEMPLATE_CUA_SAMPLE]:
+    [TEMPLATE_CUA]:
       'ts-cua',
   },
   [LANGUAGE_PYTHON]: {
@@ -135,6 +137,8 @@ const REGISTERED_APP_NAMES: Record<
       'python-advanced',
     [TEMPLATE_COMPUTER_USE]:
       'python-cu',
+    [TEMPLATE_CUA]:
+      'python-cua',
   },
 };
 
@@ -365,14 +369,16 @@ function printNextSteps(
       ? "kernel deploy index.ts --env OPENAI_API_KEY=XXX"
       : language === LANGUAGE_TYPESCRIPT && template === TEMPLATE_COMPUTER_USE
       ? "kernel deploy index.ts --env ANTHROPIC_API_KEY=XXX"
-      : language === LANGUAGE_TYPESCRIPT && template === TEMPLATE_CUA_SAMPLE
+      : language === LANGUAGE_TYPESCRIPT && template === TEMPLATE_CUA
       ? "kernel deploy index.ts --env OPENAI_API_KEY=XXX"
       : language === LANGUAGE_PYTHON && (template === TEMPLATE_SAMPLE_APP || template === TEMPLATE_ADVANCED_SAMPLE)
       ? "kernel deploy main.py"
       : language === LANGUAGE_PYTHON && template === TEMPLATE_BROWSER_USE
       ? "kernel deploy main.py --env OPENAI_API_KEY=XXX"
       : language === LANGUAGE_PYTHON && template === TEMPLATE_COMPUTER_USE
       ? "kernel deploy main.py --env ANTHROPIC_API_KEY=XXX"
+      : language === LANGUAGE_PYTHON && template === TEMPLATE_CUA
+      ? "kernel deploy main.py --env OPENAI_API_KEY=XXX"
       : "";
 
   console.log(
 
@@ -0,0 +1,7 @@
+# Kernel Python Sample App - CUA
+
+This is a Kernel application that demonstrates using the Computer Using Agent (CUA) from OpenAI.
+
+It generally follows the [OpenAI CUA Sample App Reference](https://github.com/openai/openai-cua-sample-app) and uses Playwright via Kernel for browser automation.
+
+See the [docs](https://docs.onkernel.com/quickstart) for more information.
@@ -0,0 +1,4 @@
+__pycache__/
+.env
+.venv/
+env/
@@ -0,0 +1 @@
+from .agent import Agent
@@ -0,0 +1,170 @@
+from computers import Computer
+from utils import (
+    create_response,
+    show_image,
+    pp,
+    sanitize_message,
+    check_blocklisted_url,
+)
+import json
+from typing import Callable
+
+
+class Agent:
+    """
+    A sample agent class that can be used to interact with a computer.
+
+    (See simple_cua_loop.py for a simple example without an agent.)
+    """
+
+    def __init__(
+        self,
+        model="computer-use-preview",
+        computer: Computer = None,
+        tools: list[dict] = [],
+        acknowledge_safety_check_callback: Callable = lambda message: False,
+    ):
+        self.model = model
+        self.computer = computer
+        self.tools = tools
+        self.print_steps = True
+        self.debug = False
+        self.show_images = False
+        self.acknowledge_safety_check_callback = acknowledge_safety_check_callback
+
+        if computer:
+            dimensions = computer.get_dimensions()
+            self.tools += [
+                {
+                    "type": "computer-preview",
+                    "display_width": dimensions[0],
+                    "display_height": dimensions[1],
+                    "environment": computer.get_environment(),
+                },
+                {
+                    "type": "function",
+                    "name": "back",
+                    "description": "Go back to the previous page.",
+                    "parameters": {},
+                },
+                {
+                    "type": "function",
+                    "name": "goto",
+                    "description": "Go to a specific URL.",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "url": {
+                                "type": "string",
+                                "description": "Fully qualified URL to navigate to.",
+                            },
+                        },
+                        "additionalProperties": False,
+                        "required": ["url"],
+                    },
+                },
+                {
+                    "type": "function",
+                    "name": "forward",
+                    "description": "Go forward to the next page.",
+                    "parameters": {},
+                },
+            ]
+
+    def debug_print(self, *args):
+        if self.debug:
+            pp(*args)
+
+    def handle_item(self, item):
+        """Handle each item; may cause a computer action + screenshot."""
+        if item["type"] == "message":
+            if self.print_steps:
+                print(item["content"][0]["text"])
+
+        if item["type"] == "function_call":
+            name, args = item["name"], json.loads(item["arguments"])
+            if self.print_steps:
+                print(f"{name}({args})")
+
+            if hasattr(self.computer, name):  # if function exists on computer, call it
+                method = getattr(self.computer, name)
+                method(**args)
+            return [
+                {
+                    "type": "function_call_output",
+                    "call_id": item["call_id"],
+                    "output": "success",  # hard-coded output for demo
+                }
+            ]
+
+        if item["type"] == "computer_call":
+            action = item["action"]
+            action_type = action["type"]
+            action_args = {k: v for k, v in action.items() if k != "type"}
+            if self.print_steps:
+                print(f"{action_type}({action_args})")
+
+            method = getattr(self.computer, action_type)
+            method(**action_args)
+
+            screenshot_base64 = self.computer.screenshot()
+            if self.show_images:
+                show_image(screenshot_base64)
+
+            # if user doesn't ack all safety checks exit with error
+            pending_checks = item.get("pending_safety_checks", [])
+            for check in pending_checks:
+                message = check["message"]
+                if not self.acknowledge_safety_check_callback(message):
+                    raise ValueError(
+                        f"Safety check failed: {message}. Cannot continue with unacknowledged safety checks."
+                    )
+
+            call_output = {
+                "type": "computer_call_output",
+                "call_id": item["call_id"],
+                "acknowledged_safety_checks": pending_checks,
+                "output": {
+                    "type": "input_image",
+                    "image_url": f"data:image/png;base64,{screenshot_base64}",
+                },
+            }
+
+            # additional URL safety checks for browser environments
+            if self.computer.get_environment() == "browser":
+                current_url = self.computer.get_current_url()
+                check_blocklisted_url(current_url)
+                call_output["output"]["current_url"] = current_url
+
+            return [call_output]
+        return []
+
+    def run_full_turn(
+        self, input_items, print_steps=True, debug=False, show_images=False
+    ):
+        self.print_steps = print_steps
+        self.debug = debug
+        self.show_images = show_images
+        new_items = []
+
+        # keep looping until we get a final response
+        while new_items[-1].get("role") != "assistant" if new_items else True:
+            self.debug_print([sanitize_message(msg) for msg in input_items + new_items])
+
+            response = create_response(
+                model=self.model,
+                input=input_items + new_items,
+                tools=self.tools,
+                truncation="auto",
+            )
+            self.debug_print(response)
+
+            if "output" not in response and self.debug:
+                print(response)
+                raise ValueError("No output from model")
+            else:
+                new_items += response["output"]
+                for item in response["output"]:
+                    new_items += self.handle_item(item)
+
+        return new_items
@@ -0,0 +1,11 @@
+from . import default
+from . import contrib
+from .computer import Computer
+from .config import computers_config
+
+__all__ = [
+    "default",
+    "contrib",
+    "Computer",
+    "computers_config",
+]
@@ -0,0 +1,29 @@
+from typing import Protocol, List, Literal, Dict
+
+
+class Computer(Protocol):
+    """Defines the 'shape' (methods/properties) our loop expects."""
+
+    def get_environment(self) -> Literal["windows", "mac", "linux", "browser"]: ...
+
+    def get_dimensions(self) -> tuple[int, int]: ...
+
+    def screenshot(self) -> str: ...
+
+    def click(self, x: int, y: int, button: str = "left") -> None: ...
+
+    def double_click(self, x: int, y: int) -> None: ...
+
+    def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: ...
+
+    def type(self, text: str) -> None: ...
+
+    def wait(self, ms: int = 1000) -> None: ...
+
+    def move(self, x: int, y: int) -> None: ...
+
+    def keypress(self, keys: List[str]) -> None: ...
+
+    def drag(self, path: List[Dict[str, int]]) -> None: ...
+
+    def get_current_url() -> str: ...
@@ -0,0 +1,7 @@
+from .default import *
+from .contrib import *
+
+computers_config = {
+    "local-playwright": LocalPlaywrightBrowser,
+    "kernel": KernelPlaywrightBrowser,
+}
-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +__pycache__/
 +.env
 +.venv/
 +env/