diff --git a/containers/docker/.gitignore b/containers/docker/.gitignore deleted file mode 100644 index 36a72f60..00000000 --- a/containers/docker/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -bin/ -recordings/ diff --git a/containers/docker/Dockerfile b/containers/docker/Dockerfile deleted file mode 100644 index c950b54a..00000000 --- a/containers/docker/Dockerfile +++ /dev/null @@ -1,77 +0,0 @@ -FROM ghcr.io/anthropics/anthropic-quickstarts:computer-use-demo-latest - -USER root -RUN add-apt-repository -y ppa:xtradeb/apps -RUN apt update -y && apt install -y chromium ncat ffmpeg - -# Switch to computeruse user -USER computeruse - -# copy the kernel-images API binary built by build.sh -COPY bin/kernel-images-api /usr/local/bin/kernel-images-api -ENV WITH_KERNEL_IMAGES_API=false - -# Modify entrypoint script -# The original can be found here: https://github.com/anthropics/anthropic-quickstarts/blob/main/computer-use-demo/image/entrypoint.sh -COPY --chmod=0755 <<'EOL' /home/computeruse/entrypoint.sh -#!/bin/bash -set -o pipefail -o errexit -o nounset - -./start_all.sh >&2 - -# Start Chromium with display :1 and remote debugging -# Use ncat to listen on 0.0.0.0:9222 since chromium does not let you listen on 0.0.0.0 anymore: https://github.com/pyppeteer/pyppeteer/pull/379#issuecomment-217029626 -cleanup () { - echo "Cleaning up..." - kill -TERM $pid - kill -TERM $pid2 - # Kill API pid if set - if [[ -n "${pid3:-}" ]]; then - kill -TERM $pid3 || true - fi -} -trap cleanup TERM INT -pid= -pid2= -pid3= -INTERNAL_PORT=9223 -CHROME_PORT=9222 # External port mapped in Docker -echo "Starting Chromium on internal port $INTERNAL_PORT" -DISPLAY=:1 chromium \ - --remote-debugging-port=$INTERNAL_PORT \ - ${CHROMIUM_FLAGS:-} >&2 & - -echo "Setting up ncat proxy on port $CHROME_PORT" -ncat \ - --sh-exec "ncat localhost $INTERNAL_PORT" \ - -l "$CHROME_PORT" \ - --keep-open & pid2=$! - -./novnc_startup.sh >&2 - -if [[ "${WITH_KERNEL_IMAGES_API:-}" == "true" ]]; then - echo "✨ Starting kernel-images API." - - API_PORT="${KERNEL_IMAGES_API_PORT:-10001}" - API_FRAME_RATE="${KERNEL_IMAGES_API_FRAME_RATE:-10}" - API_DISPLAY_NUM="${KERNEL_IMAGES_API_DISPLAY_NUM:-1}" - API_MAX_SIZE_MB="${KERNEL_IMAGES_API_MAX_SIZE_MB:-500}" - API_OUTPUT_DIR="${KERNEL_IMAGES_API_OUTPUT_DIR:-/recordings}" - - mkdir -p "$API_OUTPUT_DIR" - - PORT="$API_PORT" \ - FRAME_RATE="$API_FRAME_RATE" \ - DISPLAY_NUM="$API_DISPLAY_NUM" \ - MAX_SIZE_MB="$API_MAX_SIZE_MB" \ - OUTPUT_DIR="$API_OUTPUT_DIR" \ - /usr/local/bin/kernel-images-api & pid3=$! -fi - -python http_server.py >&2 & - -STREAMLIT_SERVER_PORT=8501 python -m streamlit run computer_use_demo/streamlit.py >&2 -EOL - -WORKDIR /home/computeruse -ENTRYPOINT ["./entrypoint.sh"] diff --git a/containers/docker/README.md b/containers/docker/README.md deleted file mode 100644 index f946cf08..00000000 --- a/containers/docker/README.md +++ /dev/null @@ -1,94 +0,0 @@ -# 🐋 Chromium x Docker - -This Dockerfile extends Anthropic's [Computer Use reference implementation](https://github.com/anthropics/anthropic-quickstarts/tree/main/computer-use-demo) by: (1) installing headful Chromium (2) Exposing Chromium's port `9222` so Chrome DevTools Protocol-based frameworks (Playwright, Puppeteer) can connect to it. - -## 1. Build From the Source - -```bash -git clone https://github.com/onkernel/kernel-images.git -cd kernel-images -docker build -t onkernel/kernel-chromium:latest -f containers/docker/Dockerfile . -``` - -## 2. Run the Container - -```bash -docker run -e CHROMIUM_FLAGS="--no-sandbox --disable-dev-shm-usage --disable-gpu --start-maximized --disable-software-rasterizer --remote-allow-origins=* --no-zygote" -p 8501:8501 -p 8080:8080 -p 6080:6080 -p 9222:9222 kernel-chromium -``` - -This exposes three ports: - -- `8080`: Anthropic's Computer Use web application, which includes a chat interface and remote GUI -- `6080`: NoVNC interface for visual monitoring via browser-based VNC client -- `9222`: Chrome DevTools Protocol for browser automation via Playwright and Puppeteer -- `8501`: Streamlit interfaced used by Computer Use - -## Live View Configuration -You can set the browser width and height with the environment variables `WIDTH` and `HEIGHT`, and control Chromium startup flags with `CHROMIUM_FLAGS` in the `docker run` command: - -```bash -docker run -e WIDTH=1920 -e HEIGHT=1080 \ - -e CHROMIUM_FLAGS="--start-maximized --disable-gpu" \ - -p 8501:8501 -p 8080:8080 -p 6080:6080 -p 9222:9222 kernel-chromium -``` - -## 👾 Connect via Chrome DevTools Protocol - -We expose port `9222` via ncat, allowing you to connect Chrome DevTools Protocol-based browser frameworks like Playwright and Puppeteer (and CDP-based SDKs like Browser Use). You can use these frameworks to drive the browser in the cloud. - -First, fetch the browser's CDP websocket endpoint: - -```typescript -const url = "http://localhost:9222/json/version"; -const response = await fetch(url); -if (response.status !== 200) { - throw new Error( - `Failed to retrieve browser instance: ${ - response.statusText - } ${await response.text()}` - ); -} -const { webSocketDebuggerUrl } = await response.json(); -``` - -Then, connect a remote Playwright or Puppeteer client to it: - -```typescript -const browser = await puppeteer.connect({ - browserWSEndpoint: webSocketDebuggerUrl, -}); -``` - -or: - -```typescript -const browser = await chromium.connectOverCDP(webSocketDebuggerUrl); -``` - -## 🧑‍💻 Connect via remote GUI (noVNC) - -For visual monitoring, access the browser via NoVNC by opening: - -```bash -http://localhost:6080/vnc.html -``` - -## 🛜 Connect via Anthropic Computer Use's web app - -For a unified interface that includes Anthropic Computer Use's chat (via Streamlit) plus GUI (via noVNC), visit: - -```bash -http://localhost:8080 -``` - -## 🤝 License & Contributing -See [here](/README.md) for license and contributing details. - -## 🏅 Join our Team -We're hiring exceptional senior and staff backend engineers to work on the future of AI infrastructure. Full-time or contract-to-hire. Join a small team that punches well above its weight, minimal meetings and no bureaucracy. By developers, for developers. - -$175k-200k + equity + great healthcare. Remote in the continental US. - -Things we're working on: serverless, containers/vms/unikernels, streaming, SDKs, CLIs. - -Message `catherine jue` on [Discord](https://discord.gg/FBrveQRcud) with what you've been building lately. diff --git a/unikernels/unikraft-cu/.gitignore b/unikernels/unikraft-cu/.gitignore index e660fd93..c249e748 100644 --- a/unikernels/unikraft-cu/.gitignore +++ b/unikernels/unikraft-cu/.gitignore @@ -1 +1,2 @@ bin/ +recording/ diff --git a/unikernels/unikraft-cu/Dockerfile b/unikernels/unikraft-cu/Dockerfile index 531e0a63..c00157dc 100644 --- a/unikernels/unikraft-cu/Dockerfile +++ b/unikernels/unikraft-cu/Dockerfile @@ -152,7 +152,7 @@ COPY --from=xorg-deps /usr/local/lib/xorg/modules/input/neko_drv.so /usr/lib/xor COPY image-chromium/ / COPY ./wrapper.sh /wrapper.sh -# copy the kernel-images API binary built by build.sh +# copy the kernel-images API binary built externally COPY bin/kernel-images-api /usr/local/bin/kernel-images-api ENV WITH_KERNEL_IMAGES_API=false diff --git a/unikernels/unikraft-cu/README.md b/unikernels/unikraft-cu/README.md index 9e1f75b8..db9a4a20 100644 --- a/unikernels/unikraft-cu/README.md +++ b/unikernels/unikraft-cu/README.md @@ -12,10 +12,10 @@ This deploys headful Chromium on a unikernel. It also exposes a remote GUI throu `export UKC_METRO= and UKC_TOKEN=` ## 3. Build the image -`./build.sh` +`./build-unikernel.sh` ## 4. Run it -`./run.sh` +`./run-unikernel.sh` When the deployment finishes successfully, the Kraft CLI will print out something like this: ``` @@ -36,7 +36,7 @@ Deployed successfully! ### 3.1 Deploy the Implementation with WebRTC desktop streaming enabled ```sh -ENABLE_WEBRTC=true NEKO_ICESERVERS=xxx ./run.sh +ENABLE_WEBRTC=true NEKO_ICESERVERS=xxx ./run-unikernel.sh ``` `NEKO_ICESERVERS` @@ -111,17 +111,8 @@ const browser = await chromium.connectOverCDP(finalWSUrl); You can also run the Dockerfile directly as a docker container: ```sh -docker build -t kernel-docker . -docker run -d \ - -p 8080:8080 \ - -p 9222:9222 \ - --cap-add SYS_ADMIN \ - -p 56000-56100:56000-56100/udp \ - -e ENABLE_WEBRTC=true \ - -e CHROMIUM_FLAGS="--no-sandbox --disable-dev-shm-usage --disable-gpu --start-maximized --disable-software-rasterizer --remote-allow-origins=* --no-zygote" \ - -e NEKO_WEBRTC_EPR=56000-56100 \ - -e NEKO_WEBRTC_NAT1TO1=127.0.0.1 \ - kernel-docker +./build-docker.sh +./run-docker.sh ``` ## 📞 WebRTC Notes diff --git a/containers/docker/build.sh b/unikernels/unikraft-cu/build-docker.sh similarity index 100% rename from containers/docker/build.sh rename to unikernels/unikraft-cu/build-docker.sh diff --git a/unikernels/unikraft-cu/build.sh b/unikernels/unikraft-cu/build-unikernel.sh similarity index 100% rename from unikernels/unikraft-cu/build.sh rename to unikernels/unikraft-cu/build-unikernel.sh diff --git a/containers/docker/run.sh b/unikernels/unikraft-cu/run-docker.sh similarity index 100% rename from containers/docker/run.sh rename to unikernels/unikraft-cu/run-docker.sh diff --git a/unikernels/unikraft-cu/run.sh b/unikernels/unikraft-cu/run-unikernel.sh similarity index 100% rename from unikernels/unikraft-cu/run.sh rename to unikernels/unikraft-cu/run-unikernel.sh