kernel
diff --git a/‎.cursor/commands/qa.md‎
Lines changed: 2 additions & 2 deletions b/‎.cursor/commands/qa.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pkg/create/templates.go‎
Lines changed: 2 additions & 2 deletions b/‎pkg/create/templates.go‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pkg/templates/python/anthropic-computer-use/README.md‎
Lines changed: 43 additions & 3 deletions b/‎pkg/templates/python/anthropic-computer-use/README.md‎
Lines changed: 43 additions & 3 deletions
diff --git a/‎pkg/templates/python/anthropic-computer-use/loop.py‎
Lines changed: 33 additions & 30 deletions b/‎pkg/templates/python/anthropic-computer-use/loop.py‎
Lines changed: 33 additions & 30 deletions
diff --git a/‎pkg/templates/python/anthropic-computer-use/main.py‎
Lines changed: 61 additions & 66 deletions b/‎pkg/templates/python/anthropic-computer-use/main.py‎
Lines changed: 61 additions & 66 deletions
diff --git a/‎pkg/templates/python/anthropic-computer-use/pyproject.toml‎
Lines changed: 1 addition & 3 deletions b/‎pkg/templates/python/anthropic-computer-use/pyproject.toml‎
Lines changed: 1 addition & 3 deletions
@@ -230,7 +230,7 @@ Once all deployments are complete, present the human with these invoke commands
 kernel invoke ts-basic get-page-title --payload '{"url": "https://www.google.com"}'
 kernel invoke ts-captcha-solver test-captcha-solver
 kernel invoke ts-stagehand teamsize-task --payload '{"company": "Kernel"}'
-kernel invoke ts-anthropic-cua cua-task --payload '{"query": "Return the first url of a search result for NYC restaurant reviews Pete Wells"}'
+kernel invoke ts-anthropic-cua cua-task --payload '{"query": "Go to http://magnitasks.com, Click the Tasks option in the left-side bar, and move the 5 items in the To Do and In Progress items to the Done section of the Kanban board. You are done successfully when the items are moved.", "record_replay": true}'
 kernel invoke ts-magnitude mag-url-extract --payload '{"url": "https://en.wikipedia.org/wiki/Special:Random"}'
 kernel invoke ts-openai-cua cua-task --payload '{"task": "Go to https://news.ycombinator.com and get the top 5 articles"}'
 kernel invoke ts-gemini-cua gemini-cua-task --payload '{"startingUrl": "https://www.magnitasks.com/", "instruction": "Click the Tasks option in the left-side bar, and move the 5 items in the To Do and In Progress items to the Done section of the Kanban board? You are done successfully when the items are moved."}'
@@ -240,7 +240,7 @@ kernel invoke ts-claude-agent-sdk agent-task --payload '{"task": "Go to https://
 kernel invoke python-basic get-page-title --payload '{"url": "https://www.google.com"}'
 kernel invoke python-captcha-solver test-captcha-solver
 kernel invoke python-bu bu-task --payload '{"task": "Compare the price of gpt-4o and DeepSeek-V3"}'
-kernel invoke python-anthropic-cua cua-task --payload '{"query": "Return the first url of a search result for NYC restaurant reviews Pete Wells"}'
+kernel invoke python-anthropic-cua cua-task --payload '{"query": "Go to http://magnitasks.com, Click the Tasks option in the left-side bar, and move the 5 items in the To Do and In Progress items to the Done section of the Kanban board. You are done successfully when the items are moved.", "record_replay": true}'
 kernel invoke python-openai-cua cua-task --payload '{"task": "Go to https://news.ycombinator.com and get the top 5 articles"}'
 kernel invoke python-openagi-cua openagi-default-task -p '{"instruction": "Navigate to https://agiopen.org and click the What is Computer Use? button"}'
 kernel invoke py-claude-agent-sdk agent-task --payload '{"task": "Go to https://news.ycombinator.com and get the top 3 stories"}'
 
@@ -178,7 +178,7 @@ var Commands = map[string]map[string]DeployConfig{
 		TemplateAnthropicComputerUse: {
 			EntryPoint:    "index.ts",
 			NeedsEnvFile:  true,
-			InvokeCommand: `kernel invoke ts-anthropic-cua cua-task --payload '{"query": "Return the first url of a search result for NYC restaurant reviews Pete Wells"}'`,
+			InvokeCommand: `kernel invoke ts-anthropic-cua cua-task --payload '{"query": "Navigate to http://magnitasks.com and click on Tasks in the sidebar"}'`,
 		},
 		TemplateMagnitude: {
 			EntryPoint:    "index.ts",
@@ -220,7 +220,7 @@ var Commands = map[string]map[string]DeployConfig{
 		TemplateAnthropicComputerUse: {
 			EntryPoint:    "main.py",
 			NeedsEnvFile:  true,
-			InvokeCommand: `kernel invoke python-anthropic-cua cua-task --payload '{"query": "Return the first url of a search result for NYC restaurant reviews Pete Wells"}'`,
+			InvokeCommand: `kernel invoke python-anthropic-cua cua-task --payload '{"query": "Navigate to http://magnitasks.com and click on Tasks in the sidebar"}'`,
 		},
 		TemplateOpenAIComputerUse: {
 			EntryPoint:    "main.py",
 
@@ -1,7 +1,47 @@
 # Kernel Python Sample App - Anthropic Computer Use
 
-This is a simple Kernel application that implements a prompt loop using Anthropic Computer Use.
+This is a Kernel application that implements a prompt loop using Anthropic Computer Use with Kernel's Computer Controls API.
 
-It generally follows the [Anthropic Reference Implementation](https://github.com/anthropics/anthropic-quickstarts/tree/main/computer-use-demo) but replaces `xodotool` and `gnome-screenshot` with Playwright.
+It generally follows the [Anthropic Reference Implementation](https://github.com/anthropics/anthropic-quickstarts/tree/main/computer-use-demo) but uses Kernel's Computer Controls API instead of `xdotool` and `gnome-screenshot`.
 
-See the [docs](https://www.kernel.sh/docs/quickstart) for information.
+## Setup
+
+1. Get your API keys:
+   - **Kernel**: [dashboard.onkernel.com](https://dashboard.onkernel.com)
+   - **Anthropic**: [console.anthropic.com](https://console.anthropic.com)
+
+2. Deploy the app:
+```bash
+kernel login
+cp .env.example .env  # Add your ANTHROPIC_API_KEY
+kernel deploy main.py --env-file .env
+```
+
+## Usage
+
+```bash
+kernel invoke python-anthropic-cua cua-task --payload '{"query": "Navigate to https://example.com and describe the page"}'
+```
+
+## Recording Replays
+
+> **Note:** Replay recording is only available to Kernel users on paid plans.
+
+Add `"record_replay": true` to your payload to capture a video of the browser session:
+
+```bash
+kernel invoke python-anthropic-cua cua-task --payload '{"query": "Navigate to https://example.com", "record_replay": true}'
+```
+
+When enabled, the response will include a `replay_url` field with a link to view the recorded session.
+
+## Known Limitations
+
+### Cursor Position
+
+The `cursor_position` action is not supported with Kernel's Computer Controls API. If the model attempts to use this action, an error will be returned. This is a known limitation that does not significantly impact most computer use workflows, as the model typically tracks cursor position through screenshots.
+
+## Resources
+
+- [Anthropic Computer Use Documentation](https://docs.anthropic.com/en/docs/build-with-claude/computer-use)
+- [Kernel Documentation](https://www.kernel.sh/docs/quickstart)
@@ -1,25 +1,16 @@
 """
 Agentic sampling loop that calls the Anthropic API and local implementation of anthropic-defined computer use tools.
 From https://github.com/anthropics/anthropic-quickstarts/blob/main/computer-use-demo/computer_use_demo/loop.py
+Modified to use Kernel Computer Controls API instead of Playwright.
 """
 
 import os
-import platform
-from collections.abc import Callable
 from datetime import datetime
 from enum import StrEnum
 from typing import Any, cast
-from playwright.async_api import Page
-
-import httpx
-from anthropic import (
-    Anthropic,
-    AnthropicBedrock,
-    AnthropicVertex,
-    APIError,
-    APIResponseValidationError,
-    APIStatusError,
-)
+
+from kernel import Kernel
+from anthropic import Anthropic
 from anthropic.types.beta import (
     BetaCacheControlEphemeralParam,
     BetaContentBlockParam,
@@ -78,14 +69,15 @@ async def sampling_loop(
     model: str,
     messages: list[BetaMessageParam],
     api_key: str,
+    kernel: Kernel,
+    session_id: str,
     provider: APIProvider = APIProvider.ANTHROPIC,
     system_prompt_suffix: str = "",
     only_n_most_recent_images: int | None = None,
     max_tokens: int = 4096,
     tool_version: ToolVersion = "computer_use_20250124",
     thinking_budget: int | None = None,
     token_efficient_tools_beta: bool = False,
-    playwright_page: Page,
 ):
     """
     Agentic sampling loop for the assistant/tool interaction of computer use.
@@ -94,19 +86,20 @@ async def sampling_loop(
         model: The model to use for the API call
         messages: The conversation history
         api_key: The API key for authentication
+        kernel: The Kernel client instance
+        session_id: The Kernel browser session ID
         provider: The API provider (defaults to ANTHROPIC)
         system_prompt_suffix: Additional system prompt text (defaults to empty string)
         only_n_most_recent_images: Optional limit on number of recent images to keep
         max_tokens: Maximum tokens for the response (defaults to 4096)
         tool_version: Version of tools to use (defaults to V20250124)
         thinking_budget: Optional token budget for thinking
         token_efficient_tools_beta: Whether to use token efficient tools beta
-        playwright_page: The Playwright page instance for browser automation
     """
     tool_group = TOOL_GROUPS_BY_VERSION[tool_version]
     tool_collection = ToolCollection(
         *(
-            ToolCls(page=playwright_page if ToolCls.__name__.startswith("ComputerTool") else None)
+            ToolCls(kernel=kernel, session_id=session_id) if ToolCls.__name__.startswith("ComputerTool") else ToolCls()
             for ToolCls in tool_group.tools
         )
     )
@@ -252,21 +245,31 @@ def _response_to_params(
 ) -> list[BetaContentBlockParam]:
     res: list[BetaContentBlockParam] = []
     for block in response.content:
-        if isinstance(block, BetaTextBlock):
-            if block.text:
+        block_type = getattr(block, "type", None)
+        
+        if block_type == "thinking":
+            thinking_block = {
+                "type": "thinking",
+                "thinking": getattr(block, "thinking", None),
+            }
+            if hasattr(block, "signature"):
+                thinking_block["signature"] = getattr(block, "signature", None)
+            res.append(cast(BetaContentBlockParam, thinking_block))
+        elif block_type == "text" or isinstance(block, BetaTextBlock):
+            if getattr(block, "text", None):
                 res.append(BetaTextBlockParam(type="text", text=block.text))
-            elif getattr(block, "type", None) == "thinking":
-                # Handle thinking blocks - include signature field
-                thinking_block = {
-                    "type": "thinking",
-                    "thinking": getattr(block, "thinking", None),
-                }
-                if hasattr(block, "signature"):
-                    thinking_block["signature"] = getattr(block, "signature", None)
-                res.append(cast(BetaContentBlockParam, thinking_block))
+        elif block_type == "tool_use":
+            tool_use_block: BetaToolUseBlockParam = {
+                "type": "tool_use",
+                "id": block.id,
+                "name": block.name,
+                "input": block.input,
+            }
+            res.append(tool_use_block)
         else:
-            # Handle tool use blocks normally
-            res.append(cast(BetaToolUseBlockParam, block.model_dump()))
+            # Preserve unexpected block types to avoid silently dropping content
+            if hasattr(block, "model_dump"):
+                res.append(cast(BetaContentBlockParam, block.model_dump()))
     return res
 
 
@@ -334,4 +337,4 @@ def _make_api_tool_result(
 def _maybe_prepend_system_tool_result(result: ToolResult, result_text: str):
     if result.system:
         result_text = f"<system>{result.system}</system>\n{result_text}"
-    return result_text
+    return result_text
@@ -1,97 +1,92 @@
 import os
-from typing import Dict, TypedDict
+from typing import Dict, Optional, TypedDict
 
 import kernel
-from kernel import Kernel
 from loop import sampling_loop
-from playwright.async_api import async_playwright
+from session import KernelBrowserSession
 
 
 class QueryInput(TypedDict):
     query: str
+    record_replay: Optional[bool]
 
 
 class QueryOutput(TypedDict):
     result: str
+    replay_url: Optional[str]
 
 
 api_key = os.getenv("ANTHROPIC_API_KEY")
 if not api_key:
     raise ValueError("ANTHROPIC_API_KEY is not set")
 
-client = Kernel()
 app = kernel.App("python-anthropic-cua")
 
+
 @app.action("cua-task")
 async def cua_task(
     ctx: kernel.KernelContext,
     payload: QueryInput,
 ) -> QueryOutput:
-    # A function that processes a user query using a browser-based sampling loop
-
-    # Args:
-    #     ctx: Kernel context containing invocation information
-    #     payload: An object containing a query string to process
-
-    # Returns:
-    #     A dictionary containing the result of the sampling loop as a string
+    """
+    Process a user query using Anthropic Computer Use with Kernel's browser automation.
+
+    Args:
+        ctx: Kernel context containing invocation information
+        payload: An object containing:
+            - query: The task/query string to process
+            - record_replay: Optional boolean to enable video replay recording
+
+    Returns:
+        A dictionary containing:
+            - result: The result of the sampling loop as a string
+            - replay_url: URL to view the replay (if recording was enabled)
+    """
     if not payload or not payload.get("query"):
         raise ValueError("Query is required")
 
-    kernel_browser = client.browsers.create(
-        invocation_id=ctx.invocation_id, stealth=True
-    )
-    print("Kernel browser live view url: ", kernel_browser.browser_live_view_url)
-
-    try:
-        async with async_playwright() as playwright:
-            browser = await playwright.chromium.connect_over_cdp(
-                kernel_browser.cdp_ws_url
-            )
-            context = (
-                browser.contexts[0] if browser.contexts else await browser.new_context()
+    record_replay = payload.get("record_replay", False)
+
+    async with KernelBrowserSession(
+        stealth=True,
+        record_replay=record_replay,
+    ) as session:
+        print("Kernel browser live view url:", session.live_view_url)
+
+        final_messages = await sampling_loop(
+            model="claude-sonnet-4-5-20250929",
+            messages=[
+                {
+                    "role": "user",
+                    "content": payload["query"],
+                }
+            ],
+            api_key=str(api_key),
+            thinking_budget=1024,
+            kernel=session.kernel,
+            session_id=session.session_id,
+        )
+
+        if not final_messages:
+            raise ValueError("No messages were generated during the sampling loop")
+
+        last_message = final_messages[-1]
+        if not last_message:
+            raise ValueError(
+                "Failed to get the last message from the sampling loop"
             )
-            page = context.pages[0] if context.pages else await context.new_page()
-
-            # Run the sampling loop
-            final_messages = await sampling_loop(
-                model="claude-sonnet-4-20250514",
-                messages=[
-                    {
-                        "role": "user",
-                        "content": payload["query"],
-                    }
-                ],
-                api_key=str(api_key),
-                thinking_budget=1024,
-                playwright_page=page,
+
+        result = ""
+        if isinstance(last_message.get("content"), str):
+            result = last_message["content"]  # type: ignore[assignment]
+        else:
+            result = "".join(
+                block["text"]
+                for block in last_message["content"]  # type: ignore[index]
+                if isinstance(block, Dict) and block.get("type") == "text"
             )
 
-            # Extract the final result
-            if not final_messages:
-                raise ValueError("No messages were generated during the sampling loop")
-
-            last_message = final_messages[-1]
-            if not last_message:
-                raise ValueError(
-                    "Failed to get the last message from the sampling loop"
-                )
-
-            result = ""
-            if isinstance(last_message.get("content"), str):
-                result = last_message["content"]  # type: ignore[assignment]
-            else:
-                result = "".join(
-                    block["text"]
-                    for block in last_message["content"]  # type: ignore[index]
-                    if isinstance(block, Dict) and block.get("type") == "text"
-                )
-
-            return {"result": result}
-    except Exception as exc:
-        print(f"Error in sampling loop: {exc}")
-        raise
-    finally:
-        if browser is not None:
-            await browser.close()
-        client.browsers.delete_by_id(kernel_browser.session_id)
+    return {
+        "result": result,
+        "replay_url": session.replay_view_url,
+    }
@@ -5,11 +5,9 @@ description = "Kernel reference app for Anthropic Computer Use"
 requires-python = ">=3.9"
 dependencies = [
     "anthropic>=0.75.0",
-    "playwright>=1.56.0",
     "python-dateutil>=2.9.0",
     "pydantic>=2.12.5",
     "typing-extensions>=4.15.0",
-    "kernel>=0.23.0",
+    "kernel>=0.24.0",
     "python-dotenv>=1.2.1",
-    "httpx>=0.28.1",
 ]