diff --git a/containers/docker/.gitignore b/containers/docker/.gitignore new file mode 100644 index 00000000..36a72f60 --- /dev/null +++ b/containers/docker/.gitignore @@ -0,0 +1,2 @@ +bin/ +recordings/ diff --git a/containers/docker/Dockerfile b/containers/docker/Dockerfile index 231287cc..c950b54a 100644 --- a/containers/docker/Dockerfile +++ b/containers/docker/Dockerfile @@ -2,11 +2,15 @@ FROM ghcr.io/anthropics/anthropic-quickstarts:computer-use-demo-latest USER root RUN add-apt-repository -y ppa:xtradeb/apps -RUN apt update -y && apt install -y chromium ncat +RUN apt update -y && apt install -y chromium ncat ffmpeg # Switch to computeruse user USER computeruse +# copy the kernel-images API binary built by build.sh +COPY bin/kernel-images-api /usr/local/bin/kernel-images-api +ENV WITH_KERNEL_IMAGES_API=false + # Modify entrypoint script # The original can be found here: https://github.com/anthropics/anthropic-quickstarts/blob/main/computer-use-demo/image/entrypoint.sh COPY --chmod=0755 <<'EOL' /home/computeruse/entrypoint.sh @@ -21,10 +25,15 @@ cleanup () { echo "Cleaning up..." kill -TERM $pid kill -TERM $pid2 + # Kill API pid if set + if [[ -n "${pid3:-}" ]]; then + kill -TERM $pid3 || true + fi } trap cleanup TERM INT pid= pid2= +pid3= INTERNAL_PORT=9223 CHROME_PORT=9222 # External port mapped in Docker echo "Starting Chromium on internal port $INTERNAL_PORT" @@ -40,6 +49,25 @@ ncat \ ./novnc_startup.sh >&2 +if [[ "${WITH_KERNEL_IMAGES_API:-}" == "true" ]]; then + echo "✨ Starting kernel-images API." + + API_PORT="${KERNEL_IMAGES_API_PORT:-10001}" + API_FRAME_RATE="${KERNEL_IMAGES_API_FRAME_RATE:-10}" + API_DISPLAY_NUM="${KERNEL_IMAGES_API_DISPLAY_NUM:-1}" + API_MAX_SIZE_MB="${KERNEL_IMAGES_API_MAX_SIZE_MB:-500}" + API_OUTPUT_DIR="${KERNEL_IMAGES_API_OUTPUT_DIR:-/recordings}" + + mkdir -p "$API_OUTPUT_DIR" + + PORT="$API_PORT" \ + FRAME_RATE="$API_FRAME_RATE" \ + DISPLAY_NUM="$API_DISPLAY_NUM" \ + MAX_SIZE_MB="$API_MAX_SIZE_MB" \ + OUTPUT_DIR="$API_OUTPUT_DIR" \ + /usr/local/bin/kernel-images-api & pid3=$! +fi + python http_server.py >&2 & STREAMLIT_SERVER_PORT=8501 python -m streamlit run computer_use_demo/streamlit.py >&2 diff --git a/containers/docker/build.sh b/containers/docker/build.sh new file mode 100755 index 00000000..c0d9485d --- /dev/null +++ b/containers/docker/build.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +set -ex -o pipefail + +# Move to the script's directory so relative paths work regardless of the caller CWD +SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd) +cd "$SCRIPT_DIR" + +IMAGE="${IMAGE:-onkernel/kernel-cu-test:latest}" + +source ../../shared/start-buildkit.sh + +# Build the kernel-images API binary and place it into ./bin for Docker build context +source ../../shared/build-server.sh "$(pwd)/bin" + +# Build (and optionally push) the Docker image. +docker build -t "$IMAGE" . diff --git a/containers/docker/run.sh b/containers/docker/run.sh new file mode 100755 index 00000000..b82871a3 --- /dev/null +++ b/containers/docker/run.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +set -ex -o pipefail + +# Move to the script's directory so relative paths work regardless of the caller CWD +SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd) +cd "$SCRIPT_DIR" + +IMAGE="${IMAGE:-onkernel/kernel-cu-test:latest}" +NAME="${NAME:-kernel-cu-test}" + +# Directory on host where recordings will be saved +HOST_RECORDINGS_DIR="$SCRIPT_DIR/recordings" +mkdir -p "$HOST_RECORDINGS_DIR" + +# Build docker run argument list +RUN_ARGS=( + --name "$NAME" + --privileged + --tmpfs /dev/shm:size=2g + -v "$HOST_RECORDINGS_DIR:/recordings" + --memory 8192m + -p 9222:9222 \ + -e DISPLAY_NUM=1 \ + -e HEIGHT=768 \ + -e WIDTH=1024 \ + -e CHROMIUM_FLAGS="--no-sandbox --disable-dev-shm-usage --disable-gpu --start-maximized --disable-software-rasterizer --remote-allow-origins=* --no-zygote" +) + +if [[ "${WITH_KERNEL_IMAGES_API:-}" == "true" ]]; then + RUN_ARGS+=( -p 444:10001 ) + RUN_ARGS+=( -e WITH_KERNEL_IMAGES_API=true ) +fi + +# noVNC vs WebRTC port mapping +if [[ "${ENABLE_WEBRTC:-}" == "true" ]]; then + echo "Running container with WebRTC" + RUN_ARGS+=( -p 443:8080 ) + RUN_ARGS+=( -e ENABLE_WEBRTC=true ) + [[ -n "${NEKO_ICESERVERS:-}" ]] && RUN_ARGS+=( -e NEKO_ICESERVERS="$NEKO_ICESERVERS" ) +else + echo "Running container with noVNC" + RUN_ARGS+=( -p 443:6080 ) +fi + +docker rm -f "$NAME" 2>/dev/null || true +docker run -d "${RUN_ARGS[@]}" "$IMAGE" diff --git a/server/cmd/api/main.go b/server/cmd/api/main.go index ec52a9ce..536e3b20 100644 --- a/server/cmd/api/main.go +++ b/server/cmd/api/main.go @@ -9,6 +9,7 @@ import ( "os/exec" "os/signal" "syscall" + "time" "github.com/ghodss/yaml" "github.com/go-chi/chi/v5" @@ -101,13 +102,15 @@ func main() { <-ctx.Done() slogger.Info("shutdown signal received") - g, _ := errgroup.WithContext(ctx) + shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 10*time.Second) + defer shutdownCancel() + g, _ := errgroup.WithContext(shutdownCtx) g.Go(func() error { - return srv.Shutdown(context.Background()) + return srv.Shutdown(shutdownCtx) }) g.Go(func() error { - return apiService.Shutdown(ctx) + return apiService.Shutdown(shutdownCtx) }) if err := g.Wait(); err != nil { diff --git a/server/lib/recorder/ffmpeg.go b/server/lib/recorder/ffmpeg.go index bf7e5e92..05fbc952 100644 --- a/server/lib/recorder/ffmpeg.go +++ b/server/lib/recorder/ffmpeg.go @@ -153,7 +153,7 @@ func (fr *FFmpegRecorder) Start(ctx context.Context) error { go fr.waitForCommand(ctx) // Check for startup errors before returning - if err := waitForChan(ctx, 500*time.Millisecond, fr.exited); err == nil { + if err := waitForChan(ctx, 250*time.Millisecond, fr.exited); err == nil { fr.mu.Lock() defer fr.mu.Unlock() return fmt.Errorf("failed to start ffmpeg process: %w", fr.ffmpegErr) @@ -165,16 +165,17 @@ func (fr *FFmpegRecorder) Start(ctx context.Context) error { // Stop gracefully stops the recording using a multi-phase shutdown process. func (fr *FFmpegRecorder) Stop(ctx context.Context) error { return fr.shutdownInPhases(ctx, []shutdownPhase{ - {"interrupt", []syscall.Signal{syscall.SIGCONT, syscall.SIGINT}, 5 * time.Second, "graceful stop"}, - {"terminate", []syscall.Signal{syscall.SIGTERM}, 2 * time.Second, "forceful termination"}, - {"kill", []syscall.Signal{syscall.SIGKILL}, 1 * time.Second, "immediate kill"}, + {"wake_and_interrupt", []syscall.Signal{syscall.SIGCONT, syscall.SIGINT}, 5 * time.Second, "graceful stop"}, + {"retry_interrupt", []syscall.Signal{syscall.SIGINT}, 3 * time.Second, "retry graceful stop"}, + {"terminate", []syscall.Signal{syscall.SIGTERM}, 250 * time.Millisecond, "forceful termination"}, + {"kill", []syscall.Signal{syscall.SIGKILL}, 100 * time.Millisecond, "immediate kill"}, }) } // ForceStop immediately terminates the recording process. func (fr *FFmpegRecorder) ForceStop(ctx context.Context) error { return fr.shutdownInPhases(ctx, []shutdownPhase{ - {"kill", []syscall.Signal{syscall.SIGKILL}, 1 * time.Second, "immediate kill"}, + {"kill", []syscall.Signal{syscall.SIGKILL}, 100 * time.Millisecond, "immediate kill"}, }) } @@ -321,6 +322,7 @@ func (fr *FFmpegRecorder) shutdownInPhases(ctx context.Context, phases []shutdow pgid := -cmd.Process.Pid // negative PGID targets the whole group for _, phase := range phases { + phaseStartTime := time.Now() // short circuit: the process exited before this phase started. select { case <-done: @@ -331,12 +333,16 @@ func (fr *FFmpegRecorder) shutdownInPhases(ctx context.Context, phases []shutdow log.Info("ffmpeg shutdown phase", "phase", phase.name, "desc", phase.desc) // Send the phase's signals in order. - for _, sig := range phase.signals { + for idx, sig := range phase.signals { _ = syscall.Kill(pgid, sig) // ignore error; process may have gone away + // arbitrary delay between signals, but not after the last signal + if idx < len(phase.signals)-1 { + time.Sleep(100 * time.Millisecond) + } } // Wait for exit or timeout - if err := waitForChan(ctx, phase.timeout, done); err == nil { + if err := waitForChan(ctx, phase.timeout-time.Since(phaseStartTime), done); err == nil { log.Info("ffmpeg shutdown successful", "phase", phase.name) return nil } diff --git a/shared/build-server.sh b/shared/build-server.sh new file mode 100644 index 00000000..e4fa84cd --- /dev/null +++ b/shared/build-server.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash + +# build-server.sh +# ------------------------- +# Usage (source or execute): +# build-recording-server.sh [dest-dir] [goos] [goarch] +# +# dest-dir (optional) Directory to place the resulting binary. Defaults to ./bin +# goos (optional) Target GOOS for cross-compilation. Defaults to linux +# goarch (optional) Target GOARCH for cross-compilation. Defaults to amd64 +# +# Examples +# source ../../shared/build-recording-server.sh # → ./bin, linux/amd64 +# ../../shared/build-recording-server.sh ./bin arm64 # → linux/arm64 +# ../../shared/build-recording-server.sh ./out darwin arm64 # → darwin/arm64 +set -euo pipefail + +DEST_DIR="${1:-./bin}" +# Optional os/arch parameters +TARGET_OS="${2:-linux}" +TARGET_ARCH="${3:-amd64}" + +# Resolve repo root as the parent directory of this script's directory +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" + +# 1. Build the binary in the server module +pushd "$REPO_ROOT/server" >/dev/null +GOOS="$TARGET_OS" GOARCH="$TARGET_ARCH" CGO_ENABLED=0 make build +popd >/dev/null + +# 2. Copy to destination +mkdir -p "$DEST_DIR" +cp "$REPO_ROOT/server/bin/api" "$DEST_DIR/kernel-images-api" + +echo "✅ kernel-images-api binary copied to $DEST_DIR/kernel-images-api" diff --git a/unikernels/unikraft-cu/.gitignore b/unikernels/unikraft-cu/.gitignore new file mode 100644 index 00000000..e660fd93 --- /dev/null +++ b/unikernels/unikraft-cu/.gitignore @@ -0,0 +1 @@ +bin/ diff --git a/unikernels/unikraft-cu/Dockerfile b/unikernels/unikraft-cu/Dockerfile index 393747bf..531e0a63 100644 --- a/unikernels/unikraft-cu/Dockerfile +++ b/unikernels/unikraft-cu/Dockerfile @@ -50,6 +50,8 @@ RUN apt-get update && \ sudo \ mutter \ x11vnc \ + # Recording tools + ffmpeg \ # Python/pyenv reqs build-essential \ libssl-dev \ @@ -150,5 +152,9 @@ COPY --from=xorg-deps /usr/local/lib/xorg/modules/input/neko_drv.so /usr/lib/xor COPY image-chromium/ / COPY ./wrapper.sh /wrapper.sh +# copy the kernel-images API binary built by build.sh +COPY bin/kernel-images-api /usr/local/bin/kernel-images-api +ENV WITH_KERNEL_IMAGES_API=false + ENTRYPOINT [ "/wrapper.sh" ] diff --git a/unikernels/unikraft-cu/build.sh b/unikernels/unikraft-cu/build.sh index 85759f58..5af580c5 100755 --- a/unikernels/unikraft-cu/build.sh +++ b/unikernels/unikraft-cu/build.sh @@ -10,6 +10,9 @@ if [ -z "$UKC_TOKEN" ] || [ -z "$UKC_METRO" ]; then fi source ../../shared/start-buildkit.sh +# Build the API binary +source ../../shared/build-server.sh "$(pwd)/bin" + kraft pkg \ --name index.unikraft.io/$image \ --plat kraftcloud --arch x86_64 \ diff --git a/unikernels/unikraft-cu/run.sh b/unikernels/unikraft-cu/run.sh index f56941cb..3fcb4d50 100755 --- a/unikernels/unikraft-cu/run.sh +++ b/unikernels/unikraft-cu/run.sh @@ -15,6 +15,11 @@ deploy_args=( -n "$name" ) +if [[ "${WITH_KERNEL_IMAGES_API:-}" == "true" ]]; then + deploy_args+=( -p 444:10001/tls ) + deploy_args+=( -e WITH_KERNEL_IMAGES_API=true ) +fi + kraft cloud inst rm $name || true if [[ "${ENABLE_WEBRTC:-}" == "true" ]]; then diff --git a/unikernels/unikraft-cu/wrapper.sh b/unikernels/unikraft-cu/wrapper.sh index 106f6001..3ff1b922 100755 --- a/unikernels/unikraft-cu/wrapper.sh +++ b/unikernels/unikraft-cu/wrapper.sh @@ -28,10 +28,15 @@ cleanup () { echo "Cleaning up..." kill -TERM $pid kill -TERM $pid2 + # Kill the API server if it was started + if [[ -n "${pid3:-}" ]]; then + kill -TERM $pid3 || true + fi } trap cleanup TERM INT pid= pid2= +pid3= INTERNAL_PORT=9223 CHROME_PORT=9222 # External port mapped to host echo "Starting Chromium on internal port $INTERNAL_PORT" @@ -54,5 +59,24 @@ else echo "✨ noVNC demo is ready to use!" fi +if [[ "${WITH_KERNEL_IMAGES_API:-}" == "true" ]]; then + echo "✨ Starting kernel-images API." + + API_PORT="${KERNEL_IMAGES_API_PORT:-10001}" + API_FRAME_RATE="${KERNEL_IMAGES_API_FRAME_RATE:-10}" + API_DISPLAY_NUM="${KERNEL_IMAGES_API_DISPLAY_NUM:-${DISPLAY_NUM:-1}}" + API_MAX_SIZE_MB="${KERNEL_IMAGES_API_MAX_SIZE_MB:-500}" + API_OUTPUT_DIR="${KERNEL_IMAGES_API_OUTPUT_DIR:-/recordings}" + + mkdir -p "$API_OUTPUT_DIR" + + PORT="$API_PORT" \ + FRAME_RATE="$API_FRAME_RATE" \ + DISPLAY_NUM="$API_DISPLAY_NUM" \ + MAX_SIZE_MB="$API_MAX_SIZE_MB" \ + OUTPUT_DIR="$API_OUTPUT_DIR" \ + /usr/local/bin/kernel-images-api & pid3=$! +fi + # Keep the container running tail -f /dev/null