kaito-project · sozercan · Mar 10, 2026 · Mar 10, 2026 · Mar 10, 2026 · Mar 10, 2026
@@ -0,0 +1,129 @@
+name: docker-test-runner-gpu
+
+on:
+  workflow_dispatch:
+    inputs:
+      backend:
+        description: 'Runner backend to test (leave empty to test all)'
+        required: false
+        type: choice
+        default: 'all'
+        options:
+          - all
+          - llama-cpp-cuda
+          - diffusers-cuda
+          - vllm-cuda
+
+permissions: read-all
+
+jobs:
+  test:
+    runs-on: [self-hosted, gpu]
+    timeout-minutes: 240
+    strategy:
+      fail-fast: false
+      max-parallel: 1
+      matrix:
+        backend: ${{ inputs.backend == 'all' && fromJson('["llama-cpp-cuda", "diffusers-cuda", "vllm-cuda"]') || fromJson(format('["{0}"]', inputs.backend)) }}
+    steps:
+      - name: cleanup workspace
+        run: |
+          rm -rf ./* || true
+          rm -rf ./.??* || true
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+
+        # use default docker driver builder with containerd image store for local aikit image
+        # these must be setup before running this test
+      - run: docker buildx use default
+
+      - name: build aikit
+        run: |
+          docker buildx build . -t aikit:test \
+            --load --provenance=false --progress plain
+
+      - name: build runner image
+        run: |
+          docker buildx build . -t runner-test:test \
+            -f runners/${{ matrix.backend }}.yaml \
+            --load --provenance=false --progress plain
+
+      - name: list images
+        run: docker images
+
+      - name: run runner (llama-cpp-cuda)
+        if: matrix.backend == 'llama-cpp-cuda'
+        run: docker run --name runner-test -d --rm -p 8080:8080 --gpus all runner-test:test unsloth/gemma-3-1b-it-GGUF
+
+      - name: run runner (diffusers-cuda)
+        if: matrix.backend == 'diffusers-cuda'
+        run: docker run --name runner-test -d --rm -p 8080:8080 --gpus all runner-test:test stabilityai/stable-diffusion-2-1
+
+      - name: run runner (vllm-cuda)
+        if: matrix.backend == 'vllm-cuda'
+        run: docker run --name runner-test -d --rm -p 8080:8080 --gpus all runner-test:test Qwen/Qwen2.5-0.5B-Instruct
+
+      - name: run test (llama-cpp-cuda)
+        if: matrix.backend == 'llama-cpp-cuda'
+        run: |
+          result=$(curl --fail --retry 30 --retry-all-errors http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+            "model": "gemma-3-1b-it-GGUF",
+            "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]
+          }')
+          echo $result
+
+          choices=$(echo "$result" | jq '.choices')
+          if [ -z "$choices" ]; then
+            exit 1
+          fi
+
+      - name: run test (diffusers-cuda)
+        if: matrix.backend == 'diffusers-cuda'
+        run: |
+          result=$(curl --fail --retry 30 --retry-all-errors http://127.0.0.1:8080/v1/images/generations -H "Content-Type: application/json" -d '{
+            "model": "stable-diffusion-2-1",
+            "prompt": "A cute baby llama",
+            "size": "256x256"
+          }')
+          echo $result
+
+          url=$(echo "$result" | jq '.data[0].url')
+          if [ -z "$url" ]; then
+            exit 1
+          fi
+
+      - name: save generated image
+        if: matrix.backend == 'diffusers-cuda'
+        run: docker cp runner-test:/tmp/generated/content/images /tmp || true
+
+      - name: run test (vllm-cuda)
+        if: matrix.backend == 'vllm-cuda'
+        run: |
+          result=$(curl --fail --retry 30 --retry-all-errors http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+            "model": "Qwen2.5-0.5B-Instruct",
+            "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]
+          }')
+          echo $result
+
+          choices=$(echo "$result" | jq '.choices')
+          if [ -z "$choices" ]; then
+            exit 1
+          fi
+
+      - name: save logs
+        if: always()
+        run: docker logs runner-test > /tmp/docker-runner-${{ matrix.backend }}.log 2>&1
+
+      - run: docker stop runner-test
-        run: docker logs runner-test > /tmp/docker-runner-${{ matrix.backend }}.log 2>&1
-
-      - run: docker stop runner-test
+        run: docker logs runner-test > /tmp/docker-runner-${{ matrix.backend }}.log 2>&1 || true
+
+      - run: docker stop runner-test || true
-        run: docker logs runner-test > /tmp/docker-runner-${{ matrix.backend }}.log 2>&1
-
-      - run: docker stop runner-test
+        run: docker logs runner-test > /tmp/docker-runner-${{ matrix.backend }}.log 2>&1 || true
+
+      - run: docker stop runner-test || true
+        if: always()
+
+      - run: docker system prune -a -f --volumes || true
+        if: always()
+
+      - name: publish test artifacts
+        if: always()
+        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
+        with:
+          name: test-runner-${{ matrix.backend }}
+          path: |
+            /tmp/*.log
+            /tmp/images/*.png
@@ -0,0 +1,114 @@
+name: docker-test-runner
+
+on:
+  push:
+    paths-ignore:
+      - '**.md'
+      - 'website/**'
+  pull_request:
+    paths-ignore:
+      - '**.md'
+      - 'website/**'
+  workflow_dispatch:
+
+permissions: read-all
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  test:
+    runs-on: ubuntu-latest-16-cores
+    timeout-minutes: 240
+    steps:
+      - name: Harden Runner
+        uses: step-security/harden-runner@58077d3c7e43986b6b15fba718e8ea69e387dfcc # v2.15.1
+        with:
+          egress-policy: audit
+          allowed-endpoints: >
+            auth.docker.io:443
+            huggingface.co:443
+            *.huggingface.co:443
+            *.hf.co:443
+            cdn.dl.k8s.io:443
+            dl.k8s.io:443
+            download.docker.com:443
+            gcr.io:443
+            github.com:443
+            *.githubusercontent.com:443
+            production.cloudflare.docker.com:443
+            proxy.golang.org:443
+            registry-1.docker.io:443
+            storage.googleapis.com:443
+            *.blob.core.windows.net:443
+            *.azureedge.net:443
+            developer.download.nvidia.com:443
+            dl-cdn.alpinelinux.org:443
+            *.ubuntu.com:80
+            ghcr.io:443
+            sum.golang.org:443
+            quay.io:443
+            pypi.org:443
+            files.pythonhosted.org:443
+
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+
+      # need containerd image store for testing local images
+      - uses: crazy-max/ghaction-setup-docker@1a6edb0ba9ac496f6850236981f15d8f9a82254d # v5.0.0
+        with:
+          version: version=v27.5.1
+          daemon-config: |
+            {
+              "debug": true,
+              "features": {
+                "containerd-snapshotter": true
+              }
+            }
+      - uses: crazy-max/ghaction-github-runtime@04d248b84655b509d8c44dc1d6f990c879747487 # v4.0.0
+
+      - name: build aikit
+        run: |
+          docker buildx build . -t aikit:test \
+            --load --provenance=false --progress plain
+
+      - name: build runner image
+        run: |
+          docker buildx build . -t runner-test:test \
+            -f runners/llama-cpp-cpu.yaml \
+            --load --provenance=false --progress plain
+
+      - name: list images
+        run: docker images
+
+      - name: run runner with model
+        run: docker run --name runner-test -d -p 8080:8080 runner-test:test unsloth/gemma-3-1b-it-GGUF
+
+      - name: run chat completion test
+        run: |
+          result=$(curl --fail --retry 30 --retry-all-errors http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+            "model": "gemma-3-1b-it-GGUF",
+            "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]
+          }')
+          echo $result
+
+          choices=$(echo "$result" | jq '.choices')
+          if [ -z "$choices" ]; then
+            exit 1
+          fi
+
-          echo $result
-
-          choices=$(echo "$result" | jq '.choices')
-          if [ -z "$choices" ]; then
-            exit 1
-          fi
+          echo "$result"
+
+          echo "$result" | jq -e '
+            if (.error? != null) then
+              error("error field present in response")
+            elif (.choices | type != "array" or (.choices | length) == 0) then
+              error("choices must be a non-empty array")
+            else
+              .
+            end
+          ' > /dev/null
-          echo $result
-
-          choices=$(echo "$result" | jq '.choices')
-          if [ -z "$choices" ]; then
-            exit 1
-          fi
+          echo "$result"
+
+          echo "$result" | jq -e '
+            if (.error? != null) then
+              error("error field present in response")
+            elif (.choices | type != "array" or (.choices | length) == 0) then
+              error("choices must be a non-empty array")
+            else
+              .
+            end
+          ' > /dev/null
+      - name: save logs
+        if: always()
+        run: docker logs runner-test > /tmp/docker-runner-llama-cpp-cpu.log 2>&1
+
+      - name: stop container
+        if: always()
+        run: docker stop runner-test || true
+
+      - name: publish test artifacts
+        if: always()
+        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
+        with:
+          name: test-runner-llama-cpp-cpu
+          path: |
+            /tmp/*.log
@@ -30,9 +30,17 @@ func Aikit2LLB(c *config.InferenceConfig, platform *specs.Platform) (llb.State,
 	base := getBaseImage(c, platform)
 
 	var err error
-	state, merge, err = copyModels(c, base, state, *platform)
-	if err != nil {
-		return state, nil, err
+	if isRunnerMode(c) {
+		// Runner mode: skip model downloads, write config if present, install runner deps
+		state, merge = writeConfig(c, base, state, *platform)
+		state, merge = installRunnerDependencies(c, state, merge, *platform)
+		state, merge = installRunnerEntrypoint(c, state, merge)
+	} else {
+		// Standard mode: download models + write config
+		state, merge, err = copyModels(c, base, state, *platform)
+		if err != nil {
+			return state, nil, err
+		}
 	}
 
 	state, merge, err = addLocalAI(state, merge, *platform)
@@ -63,7 +71,19 @@ func getBaseImage(c *config.InferenceConfig, platform *specs.Platform) llb.State
 	return llb.Image(distrolessBase, llb.Platform(*platform))
 }
 
-// copyModels copies models to the image.
+// writeConfig writes the /config.yaml file to the image when c.Config is set.
+func writeConfig(c *config.InferenceConfig, base llb.State, s llb.State, platform specs.Platform) (llb.State, llb.State) {
+	savedState := s
+	if c.Config != "" {
+		s = s.Run(utils.Shf("mkdir -p /configuration && echo -n \"%s\" > /config.yaml", c.Config),
+			llb.WithCustomName(fmt.Sprintf("Creating config for platform %s/%s", platform.OS, platform.Architecture))).Root()
+	}
+	diff := llb.Diff(savedState, s)
+	merge := llb.Merge([]llb.State{base, diff})
+	return s, merge
+}
+
+// copyModels copies models to the image and writes the config.
 func copyModels(c *config.InferenceConfig, base llb.State, s llb.State, platform specs.Platform) (llb.State, llb.State, error) {
 	savedState := s
 	for _, model := range c.Models {

@@ -1,6 +1,8 @@
 package inference
 
 import (
+	"strings"
+
 	"github.com/kaito-project/aikit/pkg/aikit/config"
 	"github.com/kaito-project/aikit/pkg/utils"
 	"github.com/moby/buildkit/util/system"
@@ -9,16 +11,35 @@ import (
 
 func NewImageConfig(c *config.InferenceConfig, platform *specs.Platform) *specs.Image {
 	img := emptyImage(c, platform)
-	cmd := []string{}
-	if c.Debug {
-		cmd = append(cmd, "--debug")
-	}
-	if c.Config != "" {
-		cmd = append(cmd, "--config-file=/config.yaml")
+
+	if isRunnerMode(c) {
+		// Runner mode: use the aikit-runner entrypoint script
+		img.Config.Entrypoint = []string{"/usr/local/bin/aikit-runner"}
+		img.Config.Cmd = []string{}
+
+		// Add runner labels
+		backendLabel := strings.Join(c.Backends, ",")
+		img.Config.Labels = map[string]string{
+			"ai.kaito.aikit.runner":  "true",
+			"ai.kaito.aikit.backend": backendLabel,
+		}
+		if c.Runtime != "" {
+			img.Config.Labels["ai.kaito.aikit.runtime"] = c.Runtime
+		}
+	} else {
+		// Standard mode: use local-ai directly
+		cmd := []string{}
+		if c.Debug {
+			cmd = append(cmd, "--debug")
+		}
+		if c.Config != "" {
+			cmd = append(cmd, "--config-file=/config.yaml")
+		}
+
+		img.Config.Entrypoint = []string{"local-ai"}
+		img.Config.Cmd = cmd
 	}
 
-	img.Config.Entrypoint = []string{"local-ai"}
-	img.Config.Cmd = cmd
 	return img
 }