Skip to content

Commit bd7072d

Browse files
authored
Add runtime images without models (#747)
1 parent 8a9e12b commit bd7072d

File tree

18 files changed

+1407
-14
lines changed

18 files changed

+1407
-14
lines changed

.github/workflows/lint.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,4 +43,4 @@ jobs:
4343
- name: lint
4444
uses: golangci/golangci-lint-action@1e7e51e771db61008b38414a730f564565cf7c20 # v9.2.0
4545
with:
46-
version: v2.1.6
46+
version: v2.11.2
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
name: release-runners
2+
3+
on:
4+
push:
5+
tags:
6+
- v*
7+
workflow_dispatch:
8+
9+
permissions:
10+
contents: read
11+
12+
jobs:
13+
release-runners:
14+
permissions:
15+
contents: read
16+
packages: write
17+
id-token: write
18+
runs-on: ubuntu-latest-16-cores
19+
timeout-minutes: 360
20+
strategy:
21+
fail-fast: false
22+
matrix:
23+
include:
24+
- runner: llama-cpp-cpu
25+
file: runners/llama-cpp-cpu.yaml
26+
platforms: linux/amd64,linux/arm64
27+
- runner: llama-cpp-cuda
28+
file: runners/llama-cpp-cuda.yaml
29+
platforms: linux/amd64
30+
- runner: diffusers-cuda
31+
file: runners/diffusers-cuda.yaml
32+
platforms: linux/amd64
33+
- runner: vllm-cuda
34+
file: runners/vllm-cuda.yaml
35+
platforms: linux/amd64
36+
steps:
37+
- name: Harden Runner
38+
uses: step-security/harden-runner@58077d3c7e43986b6b15fba718e8ea69e387dfcc # v2.15.1
39+
with:
40+
egress-policy: audit
41+
allowed-endpoints: >
42+
auth.docker.io:443
43+
fulcio.sigstore.dev:443
44+
ghcr.io:443
45+
github.com:443
46+
*.githubusercontent.com:443
47+
production.cloudflare.docker.com:443
48+
proxy.golang.org:443
49+
registry-1.docker.io:443
50+
rekor.sigstore.dev:443
51+
storage.googleapis.com:443
52+
tuf-repo-cdn.sigstore.dev:443
53+
*.blob.core.windows.net:443
54+
*.azureedge.net:443
55+
developer.download.nvidia.com:443
56+
dl-cdn.alpinelinux.org:443
57+
*.ubuntu.com:80
58+
sum.golang.org:443
59+
quay.io:443
60+
pypi.org:443
61+
files.pythonhosted.org:443
62+
63+
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
64+
with:
65+
fetch-tags: true
66+
fetch-depth: 0
67+
68+
- name: Install Cosign
69+
uses: sigstore/cosign-installer@faadad0cce49287aee09b3a48701e75088a2c6ad # v4.0.0
70+
71+
- name: Set up Docker Buildx
72+
uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4.0.0
73+
- uses: crazy-max/ghaction-github-runtime@04d248b84655b509d8c44dc1d6f990c879747487 # v4.0.0
74+
75+
- name: Login to GHCR
76+
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
77+
with:
78+
registry: ghcr.io
79+
username: ${{ github.actor }}
80+
password: ${{ secrets.GITHUB_TOKEN }}
81+
82+
- id: docker_meta
83+
uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # v6.0.0
84+
with:
85+
images: ghcr.io/kaito-project/aikit/runners/${{ matrix.runner }}
86+
tags: |
87+
type=semver,pattern={{raw}}
88+
type=raw,value=latest
89+
90+
- name: Build and push aikit frontend
91+
run: |
92+
docker buildx build . \
93+
--push --provenance=false --progress plain \
94+
-t ghcr.io/kaito-project/aikit/aikit:${{ github.sha }}
95+
96+
- name: Build and push
97+
uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # v7.0.0
98+
id: build-and-push
99+
with:
100+
push: true
101+
tags: ${{ steps.docker_meta.outputs.tags }}
102+
file: ${{ matrix.file }}
103+
build-args: BUILDKIT_SYNTAX=ghcr.io/kaito-project/aikit/aikit:${{ github.sha }}
104+
cache-from: type=gha,scope=runner-${{ matrix.runner }}
105+
cache-to: type=gha,scope=runner-${{ matrix.runner }},mode=max
106+
sbom: true
107+
provenance: true
108+
platforms: ${{ matrix.platforms }}
109+
110+
- name: Sign the images with GitHub OIDC Token
111+
env:
112+
DIGEST: ${{ steps.build-and-push.outputs.digest }}
113+
run: cosign sign --yes "ghcr.io/kaito-project/aikit/runners/${{ matrix.runner }}@${DIGEST}"
114+
115+
- name: Verify image signature
116+
env:
117+
DIGEST: ${{ steps.build-and-push.outputs.digest }}
118+
run: |
119+
cosign verify "ghcr.io/kaito-project/aikit/runners/${{ matrix.runner }}@${DIGEST}" \
120+
--certificate-oidc-issuer https://token.actions.githubusercontent.com \
121+
--certificate-identity https://github.com/kaito-project/aikit/.github/workflows/release-runners.yaml@$GITHUB_REF
Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
name: docker-test-runner-gpu
2+
3+
on:
4+
workflow_dispatch:
5+
inputs:
6+
backend:
7+
description: 'Runner backend to test (leave empty to test all)'
8+
required: false
9+
type: choice
10+
default: 'all'
11+
options:
12+
- all
13+
- llama-cpp-cuda
14+
- diffusers-cuda
15+
- vllm-cuda
16+
17+
permissions: read-all
18+
19+
jobs:
20+
test:
21+
runs-on: [self-hosted, gpu]
22+
timeout-minutes: 240
23+
strategy:
24+
fail-fast: false
25+
max-parallel: 1
26+
matrix:
27+
backend: ${{ inputs.backend == 'all' && fromJson('["llama-cpp-cuda", "diffusers-cuda", "vllm-cuda"]') || fromJson(format('["{0}"]', inputs.backend)) }}
28+
steps:
29+
- name: cleanup workspace
30+
run: |
31+
rm -rf ./* || true
32+
rm -rf ./.??* || true
33+
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
34+
35+
# use default docker driver builder with containerd image store for local aikit image
36+
# these must be setup before running this test
37+
- run: docker buildx use default
38+
39+
- name: build aikit
40+
run: |
41+
docker buildx build . -t aikit:test \
42+
--load --provenance=false --progress plain
43+
44+
- name: build runner image
45+
run: |
46+
docker buildx build . -t runner-test:test \
47+
-f runners/${{ matrix.backend }}.yaml \
48+
--build-arg BUILDKIT_SYNTAX=aikit:test \
49+
--load --provenance=false --progress plain
50+
51+
- name: list images
52+
run: docker images
53+
54+
- name: run runner (llama-cpp-cuda)
55+
if: matrix.backend == 'llama-cpp-cuda'
56+
run: docker run --name runner-test -d --rm -p 8080:8080 --gpus all runner-test:test https://huggingface.co/unsloth/gemma-3-1b-it-GGUF/resolve/main/gemma-3-1b-it-Q2_K.gguf
57+
58+
- name: run runner (diffusers-cuda)
59+
if: matrix.backend == 'diffusers-cuda'
60+
run: docker run --name runner-test -d --rm -p 8080:8080 --gpus all runner-test:test stabilityai/stable-diffusion-2-1
61+
62+
- name: run runner (vllm-cuda)
63+
if: matrix.backend == 'vllm-cuda'
64+
run: docker run --name runner-test -d --rm -p 8080:8080 --gpus all runner-test:test Qwen/Qwen2.5-0.5B-Instruct
65+
66+
- name: run test (llama-cpp-cuda)
67+
if: matrix.backend == 'llama-cpp-cuda'
68+
run: |
69+
result=$(curl --fail --retry 10 --retry-all-errors --retry-max-time 600 http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
70+
"model": "gemma-3-1b-it-Q2_K",
71+
"messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]
72+
}')
73+
echo "$result"
74+
75+
echo "$result" | jq -e '
76+
if (.error? != null) then
77+
error("error field present in response")
78+
elif (.choices | type != "array" or (.choices | length) == 0) then
79+
error("choices must be a non-empty array")
80+
else
81+
.
82+
end
83+
' > /dev/null
84+
85+
- name: run test (diffusers-cuda)
86+
if: matrix.backend == 'diffusers-cuda'
87+
run: |
88+
result=$(curl --fail --retry 10 --retry-all-errors --retry-max-time 600 http://127.0.0.1:8080/v1/images/generations -H "Content-Type: application/json" -d '{
89+
"model": "stable-diffusion-2-1",
90+
"prompt": "A cute baby llama",
91+
"size": "256x256"
92+
}')
93+
echo "$result"
94+
95+
echo "$result" | jq -e '
96+
if (.error? != null) then
97+
error("error field present in response")
98+
elif (.data | type != "array" or (.data | length) == 0) then
99+
error("data must be a non-empty array")
100+
elif (.data[0].url == null or .data[0].url == "") then
101+
error("data[0].url must be non-empty")
102+
else
103+
.
104+
end
105+
' > /dev/null
106+
107+
- name: save generated image
108+
if: matrix.backend == 'diffusers-cuda'
109+
run: docker cp runner-test:/tmp/generated/content/images /tmp || true
110+
111+
- name: run test (vllm-cuda)
112+
if: matrix.backend == 'vllm-cuda'
113+
run: |
114+
result=$(curl --fail --retry 10 --retry-all-errors --retry-max-time 600 http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
115+
"model": "Qwen2.5-0.5B-Instruct",
116+
"messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]
117+
}')
118+
echo "$result"
119+
120+
echo "$result" | jq -e '
121+
if (.error? != null) then
122+
error("error field present in response")
123+
elif (.choices | type != "array" or (.choices | length) == 0) then
124+
error("choices must be a non-empty array")
125+
else
126+
.
127+
end
128+
' > /dev/null
129+
130+
- name: save logs
131+
if: always()
132+
run: docker logs runner-test > /tmp/docker-runner-${{ matrix.backend }}.log 2>&1
133+
134+
- run: docker stop runner-test
135+
if: always()
136+
137+
- run: docker system prune -a -f --volumes || true
138+
if: always()
139+
140+
- name: publish test artifacts
141+
if: always()
142+
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
143+
with:
144+
name: test-runner-${{ matrix.backend }}
145+
path: |
146+
/tmp/*.log
147+
/tmp/images/*.png

0 commit comments

Comments
 (0)