Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
5270b77
Add runtime images without models (#719)
sozercan Mar 10, 2026
8359850
fix: bump golangci-lint to v2.11.2 for Go 1.26 compatibility
sozercan Mar 10, 2026
3ca4c75
Address PR review feedback
sozercan Mar 10, 2026
51123d5
Add release workflow for runner images
sozercan Mar 10, 2026
2e672bf
fix: add GPU detection wrapper for CUDA images to handle CPU fallback
sozercan Mar 10, 2026
1d93939
fix: address review feedback for GPU detection wrapper
sozercan Mar 10, 2026
fe0a276
fix: runner CI frontend, arg parsing, and model cache bugs
sozercan Mar 10, 2026
0996f6f
fix: use bash instead of sh in GPU detection wrapper
sozercan Mar 10, 2026
6837d26
fix: reduce curl retry count and add health check to runner CI
sozercan Mar 10, 2026
88c593b
fix: download single GGUF quantization instead of all variants
sozercan Mar 10, 2026
e6e28e7
fix: use direct GGUF URL in runner CI tests
sozercan Mar 10, 2026
edcccc1
fix: generate model config for downloaded GGUF files in runner
sozercan Mar 10, 2026
0a528e6
fix: add -s flag to curl in runner test to fix jq parsing
sozercan Mar 10, 2026
3c81bf2
fix: pipe curl through tail -1 to get only final response
sozercan Mar 10, 2026
506d12d
fix: write curl response to file to avoid jq parsing issues
sozercan Mar 10, 2026
2c6fe76
fix: use manual retry loop to avoid curl retry output corruption
sozercan Mar 10, 2026
bba6c6b
fix: handle huggingface:// URI scheme from kubeairunway
sozercan Mar 10, 2026
90869bd
docs: add runner images documentation and missing tests
sozercan Mar 10, 2026
98760fe
docs: lead with pre-built runner images, custom builds secondary
sozercan Mar 10, 2026
bd26cc7
docs: add :latest tag to pre-built runner image references
sozercan Mar 10, 2026
eaad407
fix: push frontend to registry for release runner builds
sozercan Mar 10, 2026
a8e31be
fix: address review feedback on runner script
sozercan Mar 10, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 129 additions & 0 deletions .github/workflows/test-docker-runner-gpu.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
name: docker-test-runner-gpu

on:
workflow_dispatch:
inputs:
backend:
description: 'Runner backend to test (leave empty to test all)'
required: false
type: choice
default: 'all'
options:
- all
- llama-cpp-cuda
- diffusers-cuda
- vllm-cuda

permissions: read-all

jobs:
test:
runs-on: [self-hosted, gpu]
timeout-minutes: 240
strategy:
fail-fast: false
max-parallel: 1
matrix:
backend: ${{ inputs.backend == 'all' && fromJson('["llama-cpp-cuda", "diffusers-cuda", "vllm-cuda"]') || fromJson(format('["{0}"]', inputs.backend)) }}
steps:
- name: cleanup workspace
run: |
rm -rf ./* || true
rm -rf ./.??* || true
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

# use default docker driver builder with containerd image store for local aikit image
# these must be setup before running this test
- run: docker buildx use default

- name: build aikit
run: |
docker buildx build . -t aikit:test \
--load --provenance=false --progress plain

- name: build runner image
run: |
docker buildx build . -t runner-test:test \
-f runners/${{ matrix.backend }}.yaml \
--load --provenance=false --progress plain

- name: list images
run: docker images

- name: run runner (llama-cpp-cuda)
if: matrix.backend == 'llama-cpp-cuda'
run: docker run --name runner-test -d --rm -p 8080:8080 --gpus all runner-test:test unsloth/gemma-3-1b-it-GGUF

- name: run runner (diffusers-cuda)
if: matrix.backend == 'diffusers-cuda'
run: docker run --name runner-test -d --rm -p 8080:8080 --gpus all runner-test:test stabilityai/stable-diffusion-2-1

- name: run runner (vllm-cuda)
if: matrix.backend == 'vllm-cuda'
run: docker run --name runner-test -d --rm -p 8080:8080 --gpus all runner-test:test Qwen/Qwen2.5-0.5B-Instruct

- name: run test (llama-cpp-cuda)
if: matrix.backend == 'llama-cpp-cuda'
run: |
result=$(curl --fail --retry 30 --retry-all-errors http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "gemma-3-1b-it-GGUF",
"messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]
}')
echo $result

choices=$(echo "$result" | jq '.choices')
if [ -z "$choices" ]; then
exit 1
fi
Copy link

Copilot AI Mar 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The JSON assertion here can false-pass: jq '.choices' yields null (non-empty string) if the API returns an error object, so the [ -z "$choices" ] check won’t catch failures. Consider jq -e '.choices and (.choices|type=="array") and (.choices|length>0)' (and fail on .error) to make the GPU runner test meaningful.

This issue also appears on line 89 of the same file.

Copilot uses AI. Check for mistakes.

- name: run test (diffusers-cuda)
if: matrix.backend == 'diffusers-cuda'
run: |
result=$(curl --fail --retry 30 --retry-all-errors http://127.0.0.1:8080/v1/images/generations -H "Content-Type: application/json" -d '{
"model": "stable-diffusion-2-1",
"prompt": "A cute baby llama",
"size": "256x256"
}')
echo $result

url=$(echo "$result" | jq '.data[0].url')
if [ -z "$url" ]; then
exit 1
fi

- name: save generated image
if: matrix.backend == 'diffusers-cuda'
run: docker cp runner-test:/tmp/generated/content/images /tmp || true

- name: run test (vllm-cuda)
if: matrix.backend == 'vllm-cuda'
run: |
result=$(curl --fail --retry 30 --retry-all-errors http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "Qwen2.5-0.5B-Instruct",
"messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]
}')
echo $result

choices=$(echo "$result" | jq '.choices')
if [ -z "$choices" ]; then
exit 1
fi

- name: save logs
if: always()
run: docker logs runner-test > /tmp/docker-runner-${{ matrix.backend }}.log 2>&1

- run: docker stop runner-test
Comment on lines +132 to +134
Copy link

Copilot AI Mar 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The workflow runs containers with --rm, but later always tries to collect logs and stop the container without guarding for the container having already exited (and been auto-removed). If the runner fails quickly, docker logs runner-test / docker stop runner-test can fail and mask the original failure. Consider removing --rm or adding || true (and/or checking existence) around the log/stop steps.

Suggested change
run: docker logs runner-test > /tmp/docker-runner-${{ matrix.backend }}.log 2>&1
- run: docker stop runner-test
run: docker logs runner-test > /tmp/docker-runner-${{ matrix.backend }}.log 2>&1 || true
- run: docker stop runner-test || true

Copilot uses AI. Check for mistakes.
if: always()

- run: docker system prune -a -f --volumes || true
if: always()

- name: publish test artifacts
if: always()
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
with:
name: test-runner-${{ matrix.backend }}
path: |
/tmp/*.log
/tmp/images/*.png
114 changes: 114 additions & 0 deletions .github/workflows/test-docker-runner.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
name: docker-test-runner

on:
push:
paths-ignore:
- '**.md'
- 'website/**'
pull_request:
paths-ignore:
- '**.md'
- 'website/**'
workflow_dispatch:

permissions: read-all

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

jobs:
test:
runs-on: ubuntu-latest-16-cores
timeout-minutes: 240
steps:
- name: Harden Runner
uses: step-security/harden-runner@58077d3c7e43986b6b15fba718e8ea69e387dfcc # v2.15.1
with:
egress-policy: audit
allowed-endpoints: >
auth.docker.io:443
huggingface.co:443
*.huggingface.co:443
*.hf.co:443
cdn.dl.k8s.io:443
dl.k8s.io:443
download.docker.com:443
gcr.io:443
github.com:443
*.githubusercontent.com:443
production.cloudflare.docker.com:443
proxy.golang.org:443
registry-1.docker.io:443
storage.googleapis.com:443
*.blob.core.windows.net:443
*.azureedge.net:443
developer.download.nvidia.com:443
dl-cdn.alpinelinux.org:443
*.ubuntu.com:80
ghcr.io:443
sum.golang.org:443
quay.io:443
pypi.org:443
files.pythonhosted.org:443

- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

# need containerd image store for testing local images
- uses: crazy-max/ghaction-setup-docker@1a6edb0ba9ac496f6850236981f15d8f9a82254d # v5.0.0
with:
version: version=v27.5.1
daemon-config: |
{
"debug": true,
"features": {
"containerd-snapshotter": true
}
}
- uses: crazy-max/ghaction-github-runtime@04d248b84655b509d8c44dc1d6f990c879747487 # v4.0.0

- name: build aikit
run: |
docker buildx build . -t aikit:test \
--load --provenance=false --progress plain

- name: build runner image
run: |
docker buildx build . -t runner-test:test \
-f runners/llama-cpp-cpu.yaml \
--load --provenance=false --progress plain

- name: list images
run: docker images

- name: run runner with model
run: docker run --name runner-test -d -p 8080:8080 runner-test:test unsloth/gemma-3-1b-it-GGUF

- name: run chat completion test
run: |
result=$(curl --fail --retry 30 --retry-all-errors http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "gemma-3-1b-it-GGUF",
"messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]
}')
echo $result

choices=$(echo "$result" | jq '.choices')
if [ -z "$choices" ]; then
exit 1
fi

Copy link

Copilot AI Mar 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This workflow’s validation can false-pass: jq '.choices' returns the string null on error responses, which is non-empty so [ -z "$choices" ] won’t fail. Use jq -e to assert a non-null, non-empty array (and/or explicitly fail if .error is present) so CI reliably detects runner failures.

Suggested change
echo $result
choices=$(echo "$result" | jq '.choices')
if [ -z "$choices" ]; then
exit 1
fi
echo "$result"
echo "$result" | jq -e '
if (.error? != null) then
error("error field present in response")
elif (.choices | type != "array" or (.choices | length) == 0) then
error("choices must be a non-empty array")
else
.
end
' > /dev/null

Copilot uses AI. Check for mistakes.
- name: save logs
if: always()
run: docker logs runner-test > /tmp/docker-runner-llama-cpp-cpu.log 2>&1

- name: stop container
if: always()
run: docker stop runner-test || true

- name: publish test artifacts
if: always()
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
with:
name: test-runner-llama-cpp-cpu
path: |
/tmp/*.log
28 changes: 24 additions & 4 deletions pkg/aikit2llb/inference/convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,17 @@ func Aikit2LLB(c *config.InferenceConfig, platform *specs.Platform) (llb.State,
base := getBaseImage(c, platform)

var err error
state, merge, err = copyModels(c, base, state, *platform)
if err != nil {
return state, nil, err
if isRunnerMode(c) {
// Runner mode: skip model downloads, write config if present, install runner deps
state, merge = writeConfig(c, base, state, *platform)
state, merge = installRunnerDependencies(c, state, merge, *platform)
state, merge = installRunnerEntrypoint(c, state, merge)
} else {
// Standard mode: download models + write config
state, merge, err = copyModels(c, base, state, *platform)
if err != nil {
return state, nil, err
}
}

state, merge, err = addLocalAI(state, merge, *platform)
Expand Down Expand Up @@ -63,7 +71,19 @@ func getBaseImage(c *config.InferenceConfig, platform *specs.Platform) llb.State
return llb.Image(distrolessBase, llb.Platform(*platform))
}

// copyModels copies models to the image.
// writeConfig writes the /config.yaml file to the image when c.Config is set.
func writeConfig(c *config.InferenceConfig, base llb.State, s llb.State, platform specs.Platform) (llb.State, llb.State) {
savedState := s
if c.Config != "" {
s = s.Run(utils.Shf("mkdir -p /configuration && echo -n \"%s\" > /config.yaml", c.Config),
llb.WithCustomName(fmt.Sprintf("Creating config for platform %s/%s", platform.OS, platform.Architecture))).Root()
}
diff := llb.Diff(savedState, s)
merge := llb.Merge([]llb.State{base, diff})
return s, merge
Comment on lines +82 to +93
Copy link

Copilot AI Mar 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

writeConfig builds a shell command with echo -n "%s" using the raw YAML string. This will break for configs containing quotes/newlines and also allows shell injection during the image build. Prefer writing the file via llb.Mkfile (or a here-doc that avoids interpolation) so the config content is treated as data, not shell syntax.

Copilot uses AI. Check for mistakes.
}

// copyModels copies models to the image and writes the config.
func copyModels(c *config.InferenceConfig, base llb.State, s llb.State, platform specs.Platform) (llb.State, llb.State, error) {
savedState := s
for _, model := range c.Models {
Expand Down
37 changes: 29 additions & 8 deletions pkg/aikit2llb/inference/image.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package inference

import (
"strings"

"github.com/kaito-project/aikit/pkg/aikit/config"
"github.com/kaito-project/aikit/pkg/utils"
"github.com/moby/buildkit/util/system"
Expand All @@ -9,16 +11,35 @@ import (

func NewImageConfig(c *config.InferenceConfig, platform *specs.Platform) *specs.Image {
img := emptyImage(c, platform)
cmd := []string{}
if c.Debug {
cmd = append(cmd, "--debug")
}
if c.Config != "" {
cmd = append(cmd, "--config-file=/config.yaml")

if isRunnerMode(c) {
// Runner mode: use the aikit-runner entrypoint script
img.Config.Entrypoint = []string{"/usr/local/bin/aikit-runner"}
img.Config.Cmd = []string{}

// Add runner labels
backendLabel := strings.Join(c.Backends, ",")
img.Config.Labels = map[string]string{
"ai.kaito.aikit.runner": "true",
"ai.kaito.aikit.backend": backendLabel,
}
if c.Runtime != "" {
img.Config.Labels["ai.kaito.aikit.runtime"] = c.Runtime
}
} else {
// Standard mode: use local-ai directly
cmd := []string{}
if c.Debug {
cmd = append(cmd, "--debug")
}
if c.Config != "" {
cmd = append(cmd, "--config-file=/config.yaml")
}

img.Config.Entrypoint = []string{"local-ai"}
img.Config.Cmd = cmd
}

img.Config.Entrypoint = []string{"local-ai"}
img.Config.Cmd = cmd
return img
}

Expand Down
Loading
Loading