kernel · juecd · Jun 22, 2025 · Jun 18, 2025 · Jun 19, 2025 · Jun 19, 2025
diff --git a/README.md b/README.md
@@ -47,6 +47,7 @@ create-kernel-app [app-name] [options]
   - `stagehand`: Template with Stagehand SDK (Typescript only)
   - `advanced-sample`: Implements sample apps using advanced Kernel configs
   - `computer-use`: Implements a prompt loop using Anthropic Computer Use
+  - `cua-sample`: Implements a Computer Use Agent (OpenAI CUA) sample (Typescript only)
 
 ### Examples
 
@@ -121,6 +122,9 @@ kernel invoke python-basic get-page-title --payload '{"url": "https://www.google
 
 # Python + Browser Use
 kernel invoke python-bu bu-task --payload '{"task": "Compare the price of gpt-4o and DeepSeek-V3"}'
+
+# Typescript + CUA Sample
+kernel invoke ts-cua agent-run --payload '{"query": "open hackernews and get the top 5 articles"}'
 ```
 
 ## Sample apps reference
@@ -134,6 +138,7 @@ These are the sample apps currently available when you run `npx @onkernel/create
 | **stagehand** | Returns the first result of a specified Google search | Stagehand | `{ query }` |
 | **advanced-sample** | Implements sample apps using advanced Kernel configs | n/a |
 | **computer-use** | Implements a prompt loop | Anthropic Computer Use API | `{ query }` |
+| **cua-sample** | Implements the OpenAI Computer Using Agent (CUA) | OpenAI CUA | `{ query }` |
 
 ## Documentation
 

diff --git a/index.ts b/index.ts
@@ -18,7 +18,8 @@ type TemplateKey =
   | "browser-use"
   | "stagehand"
   | "advanced-sample"
-  | "computer-use";
+  | "computer-use"
+  | "cua-sample";
 type LanguageInfo = { name: string; shorthand: string };
 type TemplateInfo = {
   name: string;
@@ -34,6 +35,7 @@ const TEMPLATE_BROWSER_USE = "browser-use";
 const TEMPLATE_STAGEHAND = "stagehand";
 const TEMPLATE_ADVANCED_SAMPLE = "advanced-sample";
 const TEMPLATE_COMPUTER_USE = "computer-use";
+const TEMPLATE_CUA_SAMPLE = "cua-sample";
 const LANGUAGE_SHORTHAND_TS = "ts";
 const LANGUAGE_SHORTHAND_PY = "py";
 
@@ -73,6 +75,11 @@ const TEMPLATES: Record<TemplateKey, TemplateInfo> = {
     description: "Implements the Anthropic Computer Use SDK",
     languages: [LANGUAGE_TYPESCRIPT, LANGUAGE_PYTHON],
   },
+  [TEMPLATE_CUA_SAMPLE]: {
+    name: "CUA Sample",
+    description: "Implements a Computer Use Agent (OpenAI CUA) sample",
+    languages: [LANGUAGE_TYPESCRIPT],
+  },
 };
 
 const INVOKE_SAMPLES: Record<
@@ -88,6 +95,8 @@ const INVOKE_SAMPLES: Record<
       'kernel invoke ts-advanced test-captcha-solver',
     [TEMPLATE_COMPUTER_USE]:
       'kernel invoke ts-cu cu-task --payload \'{"query": "Return the first url of a search result for NYC restaurant reviews Pete Wells"}\'',
+    [TEMPLATE_CUA_SAMPLE]:
+      'kernel invoke ts-cua cua-task --payload \'{"query": "open hackernews and get the top 5 articles"}\'',
   },
   [LANGUAGE_PYTHON]: {
     [TEMPLATE_SAMPLE_APP]:
@@ -114,6 +123,8 @@ const REGISTERED_APP_NAMES: Record<
       'ts-advanced',
     [TEMPLATE_COMPUTER_USE]:
       'ts-cu',
+    [TEMPLATE_CUA_SAMPLE]:
+      'ts-cua',
   },
   [LANGUAGE_PYTHON]: {
     [TEMPLATE_SAMPLE_APP]:
@@ -354,6 +365,8 @@ function printNextSteps(
       ? "kernel deploy index.ts --env OPENAI_API_KEY=XXX"
       : language === LANGUAGE_TYPESCRIPT && template === TEMPLATE_COMPUTER_USE
       ? "kernel deploy index.ts --env ANTHROPIC_API_KEY=XXX"
+      : language === LANGUAGE_TYPESCRIPT && template === TEMPLATE_CUA_SAMPLE
+      ? "kernel deploy index.ts --env OPENAI_API_KEY=XXX"
       : language === LANGUAGE_PYTHON && (template === TEMPLATE_SAMPLE_APP || template === TEMPLATE_ADVANCED_SAMPLE)
       ? "kernel deploy main.py"
       : language === LANGUAGE_PYTHON && template === TEMPLATE_BROWSER_USE

diff --git a/templates/python/browser-use/main.py b/templates/python/browser-use/main.py
@@ -13,7 +13,130 @@ class TaskInput(TypedDict):
 
 # LLM API Keys are set in the environment during `kernel deploy <filename> -e OPENAI_API_KEY=XXX`
 # See https://docs.onkernel.com/launch/deploy#environment-variables
-llm = ChatOpenAI(model="gpt-4o")
+llm = ChatOpenAI(model="gpt-4o-mini")
+
+
+# Define a subclass of BrowserSession that overrides _setup_viewports (which mishandles resizeing on connecting via cdp)
+class BrowserSessionCustomResize(BrowserSession):
+    async def _setup_viewports(self) -> None:
+        """Resize any existing page viewports to match the configured size, set up storage_state, permissions, geolocation, etc."""
+
+        assert self.browser_context, 'BrowserSession.browser_context must already be set up before calling _setup_viewports()'
+
+        self.browser_profile.window_size = {"width": 1024, "height": 786}
+        self.browser_profile.viewport = {"width": 1024, "height": 786}
+        self.browser_profile.screen = {"width": 1024, "height": 786}
+        self.browser_profile.device_scale_factor = 1.0
+
+        # log the viewport settings to terminal
+        viewport = self.browser_profile.viewport
+        print(
+            '📐 Setting up viewport: '
+            + f'headless={self.browser_profile.headless} '
+            + (
+                f'window={self.browser_profile.window_size["width"]}x{self.browser_profile.window_size["height"]}px '
+                if self.browser_profile.window_size
+                else '(no window) '
+            )
+            + (
+                f'screen={self.browser_profile.screen["width"]}x{self.browser_profile.screen["height"]}px '
+                if self.browser_profile.screen
+                else ''
+            )
+            + (f'viewport={viewport["width"]}x{viewport["height"]}px ' if viewport else '(no viewport) ')
+            + f'device_scale_factor={self.browser_profile.device_scale_factor or 1.0} '
+            + f'is_mobile={self.browser_profile.is_mobile} '
+            + (f'color_scheme={self.browser_profile.color_scheme.value} ' if self.browser_profile.color_scheme else '')
+            + (f'locale={self.browser_profile.locale} ' if self.browser_profile.locale else '')
+            + (f'timezone_id={self.browser_profile.timezone_id} ' if self.browser_profile.timezone_id else '')
+            + (f'geolocation={self.browser_profile.geolocation} ' if self.browser_profile.geolocation else '')
+            + (f'permissions={",".join(self.browser_profile.permissions or ["<none>"])} ')
+        )
+
+        # if we have any viewport settings in the profile, make sure to apply them to the entire browser_context as defaults
+        if self.browser_profile.permissions:
+            try:
+                await self.browser_context.grant_permissions(self.browser_profile.permissions)
+            except Exception as e:
+                self.logger.warning(
+                    f'⚠️ Failed to grant browser permissions {self.browser_profile.permissions}: {type(e).__name__}: {e}'
+                )
+        try:
+            if self.browser_profile.default_timeout:
+                self.browser_context.set_default_timeout(self.browser_profile.default_timeout)
+            if self.browser_profile.default_navigation_timeout:
+                self.browser_context.set_default_navigation_timeout(self.browser_profile.default_navigation_timeout)
+        except Exception as e:
+            self.logger.warning(
+                f'⚠️ Failed to set playwright timeout settings '
+                f'cdp_api={self.browser_profile.default_timeout} '
+                f'navigation={self.browser_profile.default_navigation_timeout}: {type(e).__name__}: {e}'
+            )
+        try:
+            if self.browser_profile.extra_http_headers:
+                self.browser_context.set_extra_http_headers(self.browser_profile.extra_http_headers)
+        except Exception as e:
+            self.logger.warning(
+                f'⚠️ Failed to setup playwright extra_http_headers: {type(e).__name__}: {e}'
+            )  # dont print the secret header contents in the logs!
+
+        try:
+            if self.browser_profile.geolocation:
+                await self.browser_context.set_geolocation(self.browser_profile.geolocation)
+        except Exception as e:
+            self.logger.warning(
+                f'⚠️ Failed to update browser geolocation {self.browser_profile.geolocation}: {type(e).__name__}: {e}'
+            )
+
+        await self.load_storage_state()
+
+        page = None
+
+        for page in self.browser_context.pages:
+            # apply viewport size settings to any existing pages
+            if viewport:
+                await page.set_viewport_size(viewport)
+
+            # show browser-use dvd screensaver-style bouncing loading animation on any about:blank pages
+            if page.url == 'about:blank':
+                await self._show_dvd_screensaver_loading_animation(page)
+
+        page = page or (await self.browser_context.new_page())
+
+        if (not viewport) and (self.browser_profile.window_size is not None) and not self.browser_profile.headless:
+            # attempt to resize the actual browser window
+
+            # cdp api: https://chromedevtools.github.io/devtools-protocol/tot/Browser/#method-setWindowBounds
+            try:
+                cdp_session = await page.context.new_cdp_session(page)
+                window_id_result = await cdp_session.send('Browser.getWindowForTarget')
+                await cdp_session.send(
+                    'Browser.setWindowBounds',
+                    {
+                        'windowId': window_id_result['windowId'],
+                        'bounds': {
+                            **self.browser_profile.window_size,
+                            'windowState': 'normal',  # Ensure window is not minimized/maximized
+                        },
+                    },
+                )
+                await cdp_session.detach()
+            except Exception as e:
+                _log_size = lambda size: f'{size["width"]}x{size["height"]}px'
+                try:
+                    # fallback to javascript resize if cdp setWindowBounds fails
+                    await page.evaluate(
+                        """(width, height) => {window.resizeTo(width, height)}""",
+                        **self.browser_profile.window_size,
+                    )
+                    return
+                except Exception as e:
+                    pass
+
+                self.logger.warning(
+                    f'⚠️ Failed to resize browser window to {_log_size(self.browser_profile.window_size)} using CDP setWindowBounds: {type(e).__name__}: {e}'
+                )
+
 
 @app.action("bu-task")
 async def bu_task(ctx: kernel.KernelContext, input_data: TaskInput):
@@ -37,7 +160,7 @@ async def bu_task(ctx: kernel.KernelContext, input_data: TaskInput):
         #task="Compare the price of gpt-4o and DeepSeek-V3",
         task=input_data["task"],
         llm=llm,
-        browser_session=BrowserSession(cdp_url=kernel_browser.cdp_ws_url)
+        browser_session=BrowserSessionCustomResize(cdp_url=kernel_browser.cdp_ws_url)
     )
     result = await agent.run()
     if result.final_result() is not None:

diff --git a/templates/typescript/cua-sample/.gitignore b/templates/typescript/cua-sample/.gitignore
@@ -0,0 +1,2 @@
+node_modules
+bun.lockb
diff --git a/templates/typescript/cua-sample/.prettierignore b/templates/typescript/cua-sample/.prettierignore
@@ -0,0 +1 @@
+node_modules
diff --git a/templates/typescript/cua-sample/.prettierrc b/templates/typescript/cua-sample/.prettierrc
@@ -0,0 +1,4 @@
+{
+	"tabWidth": 1,
+	"useTabs": true
+}
diff --git a/templates/typescript/cua-sample/README.md b/templates/typescript/cua-sample/README.md
@@ -0,0 +1,8 @@
+# Kernel Typescript Sample App - CUA
+
+This is a Kernel application that demonstrates using the Computer Using Agent (CUA) from OpenAI.
+
+It generally follows the [OpenAI CUA Sample App Reference](https://github.com/openai/openai-cua-sample-app) and uses Playwright via Kernel for browser automation.
+Also makes use of the latest OpenAI SDK format, and has local equivalent to Kernel methods for local testing before deploying on Kernel.
+
+See the [docs](https://docs.onkernel.com/quickstart) for information.
diff --git a/templates/typescript/cua-sample/index.ts b/templates/typescript/cua-sample/index.ts
@@ -0,0 +1,110 @@
+// @ts-nocheck
+
+import "dotenv/config";
+import { Kernel, type KernelContext } from "@onkernel/sdk";
+import { chromium } from "playwright";
+import { Agent } from "./lib/agent";
+import computers from "./lib/computers";
+
+const kernel = new Kernel();
+const app = kernel.app("ts-cua");
+
+// LLM API Keys are set in the environment during `kernel deploy <filename> -e ANTHROPIC_API_KEY=XXX`
+// See https://docs.onkernel.com/launch/deploy#environment-variables
+if (!process.env.OPENAI_API_KEY) throw new Error('OPENAI_API_KEY is not set');
+
+/**
+ * Example app that run an agent using openai CUA
+ * Args:
+ *     ctx: Kernel context containing invocation information
+ *     payload: An object with a `query` property
+ * Returns:
+ *     An answer to the query, elapsed time and optionally the messages stack
+ * Invoke this via CLI:
+ *  export KERNEL_API_KEY=<your_api_key>
+ *  kernel deploy index.ts -e OPENAI_API_KEY=XXXXX --force
+ *  kernel invoke ts-cua agent-run -p "{\"query\":\"current market price range for a used dreamcast\"}"
+ *  kernel logs ts-cua -f # Open in separate tab
+ */
+
+interface CuaInput {
+	query: string;
+}
+
+interface CuaOutput {
+	elapsed: number;
+	response: Array<object>;
+	answer: object;
+}
+
+app.action<CuaInput, CuaOutput>(
+	"agent-run",
+	async (ctx: KernelContext, payload?: CuaInput): Promise<CuaOutput> => {
+		const startTime = Date.now();
+		const kernelBrowser = await kernel.browsers.create({
+			invocation_id: ctx.invocation_id,
+		});
+		console.log(
+			"> Kernel browser live view url: ",
+			kernelBrowser.browser_live_view_url,
+		);
+
+		try {
+
+			// kernel browser
+			const { computer } = await computers.create({
+				type: "kernel",
+				cdp_ws_url: kernelBrowser.cdp_ws_url,
+			});
+
+			// setup agent
+			const agent = new Agent(
+				"computer-use-preview",
+				computer,
+				[], // additional tools
+				(message: string) => {
+					console.log(`> safety check: ${message}`);
+					return true; // Auto-acknowledge all safety checks for testing
+				},
+			);
+
+			// start agent run
+			const response = await agent.runFullTurn(
+				[
+					{
+						role: "system",
+						content: `- Current date and time: ${new Date().toISOString()} (${new Date().toLocaleDateString("en-US", { weekday: "long" })})`,
+					},
+					{
+						type: "message",
+						role: "user",
+						content: [
+							{
+								type: "input_text",
+								text: payload.query,
+								// text: "go to https://news.ycombinator.com , open top article , describe the target website design (in yaml format)"
+							},
+						],
+					},
+				],
+				true, // print_steps
+				true, // debug
+				false, // show_images
+			);
+
+			console.log("> agent run done");
+
+			const endTime = Date.now();
+			const timeElapsed = (endTime - startTime) / 1000; // Convert to seconds
+
+			return {
+				// response, // full messages stack trace
+				elapsed: parseFloat(timeElapsed.toFixed(2)),
+				answer: response?.slice(-1)?.[0]?.content?.[0]?.text ?? null,
+			};
+		} finally {
+			// Note: KernelPlaywrightComputer handles browser cleanup internally
+			// No need to manually close browser here
+		}
+	},
+);
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		node_modules
Copy link Contributor juecd Jun 19, 2025 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. Rename the folder /cua-sample/ to just `/cua/` so it matches other examples
		bun.lockb