kaito-project
diff --git a/‎.github/workflows/lint.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/lint.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/release-runners.yaml‎
Lines changed: 121 additions & 0 deletions b/‎.github/workflows/release-runners.yaml‎
Lines changed: 121 additions & 0 deletions
diff --git a/‎.github/workflows/test-docker-runner-gpu.yaml‎
Lines changed: 147 additions & 0 deletions b/‎.github/workflows/test-docker-runner-gpu.yaml‎
Lines changed: 147 additions & 0 deletions
@@ -43,4 +43,4 @@ jobs:
       - name: lint
         uses: golangci/golangci-lint-action@1e7e51e771db61008b38414a730f564565cf7c20 # v9.2.0
         with:
-          version: v2.1.6
+          version: v2.11.2
@@ -0,0 +1,121 @@
+name: release-runners
+
+on:
+  push:
+    tags:
+      - v*
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+jobs:
+  release-runners:
+    permissions:
+      contents: read
+      packages: write
+      id-token: write
+    runs-on: ubuntu-latest-16-cores
+    timeout-minutes: 360
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - runner: llama-cpp-cpu
+            file: runners/llama-cpp-cpu.yaml
+            platforms: linux/amd64,linux/arm64
+          - runner: llama-cpp-cuda
+            file: runners/llama-cpp-cuda.yaml
+            platforms: linux/amd64
+          - runner: diffusers-cuda
+            file: runners/diffusers-cuda.yaml
+            platforms: linux/amd64
+          - runner: vllm-cuda
+            file: runners/vllm-cuda.yaml
+            platforms: linux/amd64
+    steps:
+      - name: Harden Runner
+        uses: step-security/harden-runner@58077d3c7e43986b6b15fba718e8ea69e387dfcc # v2.15.1
+        with:
+          egress-policy: audit
+          allowed-endpoints: >
+            auth.docker.io:443
+            fulcio.sigstore.dev:443
+            ghcr.io:443
+            github.com:443
+            *.githubusercontent.com:443
+            production.cloudflare.docker.com:443
+            proxy.golang.org:443
+            registry-1.docker.io:443
+            rekor.sigstore.dev:443
+            storage.googleapis.com:443
+            tuf-repo-cdn.sigstore.dev:443
+            *.blob.core.windows.net:443
+            *.azureedge.net:443
+            developer.download.nvidia.com:443
+            dl-cdn.alpinelinux.org:443
+            *.ubuntu.com:80
+            sum.golang.org:443
+            quay.io:443
+            pypi.org:443
+            files.pythonhosted.org:443
+
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          fetch-tags: true
+          fetch-depth: 0
+
+      - name: Install Cosign
+        uses: sigstore/cosign-installer@faadad0cce49287aee09b3a48701e75088a2c6ad # v4.0.0
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4.0.0
+      - uses: crazy-max/ghaction-github-runtime@04d248b84655b509d8c44dc1d6f990c879747487 # v4.0.0
+
+      - name: Login to GHCR
+        uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - id: docker_meta
+        uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # v6.0.0
+        with:
+          images: ghcr.io/kaito-project/aikit/runners/${{ matrix.runner }}
+          tags: |
+            type=semver,pattern={{raw}}
+            type=raw,value=latest
+
+      - name: Build and push aikit frontend
+        run: |
+          docker buildx build . \
+            --push --provenance=false --progress plain \
+            -t ghcr.io/kaito-project/aikit/aikit:${{ github.sha }}
+
+      - name: Build and push
+        uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # v7.0.0
+        id: build-and-push
+        with:
+          push: true
+          tags: ${{ steps.docker_meta.outputs.tags }}
+          file: ${{ matrix.file }}
+          build-args: BUILDKIT_SYNTAX=ghcr.io/kaito-project/aikit/aikit:${{ github.sha }}
+          cache-from: type=gha,scope=runner-${{ matrix.runner }}
+          cache-to: type=gha,scope=runner-${{ matrix.runner }},mode=max
+          sbom: true
+          provenance: true
+          platforms: ${{ matrix.platforms }}
+
+      - name: Sign the images with GitHub OIDC Token
+        env:
+          DIGEST: ${{ steps.build-and-push.outputs.digest }}
+        run: cosign sign --yes "ghcr.io/kaito-project/aikit/runners/${{ matrix.runner }}@${DIGEST}"
+
+      - name: Verify image signature
+        env:
+          DIGEST: ${{ steps.build-and-push.outputs.digest }}
+        run: |
+          cosign verify "ghcr.io/kaito-project/aikit/runners/${{ matrix.runner }}@${DIGEST}" \
+            --certificate-oidc-issuer https://token.actions.githubusercontent.com \
+            --certificate-identity https://github.com/kaito-project/aikit/.github/workflows/release-runners.yaml@$GITHUB_REF
@@ -0,0 +1,147 @@
+name: docker-test-runner-gpu
+
+on:
+  workflow_dispatch:
+    inputs:
+      backend:
+        description: 'Runner backend to test (leave empty to test all)'
+        required: false
+        type: choice
+        default: 'all'
+        options:
+          - all
+          - llama-cpp-cuda
+          - diffusers-cuda
+          - vllm-cuda
+
+permissions: read-all
+
+jobs:
+  test:
+    runs-on: [self-hosted, gpu]
+    timeout-minutes: 240
+    strategy:
+      fail-fast: false
+      max-parallel: 1
+      matrix:
+        backend: ${{ inputs.backend == 'all' && fromJson('["llama-cpp-cuda", "diffusers-cuda", "vllm-cuda"]') || fromJson(format('["{0}"]', inputs.backend)) }}
+    steps:
+      - name: cleanup workspace
+        run: |
+          rm -rf ./* || true
+          rm -rf ./.??* || true
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+
+        # use default docker driver builder with containerd image store for local aikit image
+        # these must be setup before running this test
+      - run: docker buildx use default
+
+      - name: build aikit
+        run: |
+          docker buildx build . -t aikit:test \
+            --load --provenance=false --progress plain
+
+      - name: build runner image
+        run: |
+          docker buildx build . -t runner-test:test \
+            -f runners/${{ matrix.backend }}.yaml \
+            --build-arg BUILDKIT_SYNTAX=aikit:test \
+            --load --provenance=false --progress plain
+
+      - name: list images
+        run: docker images
+
+      - name: run runner (llama-cpp-cuda)
+        if: matrix.backend == 'llama-cpp-cuda'
+        run: docker run --name runner-test -d --rm -p 8080:8080 --gpus all runner-test:test https://huggingface.co/unsloth/gemma-3-1b-it-GGUF/resolve/main/gemma-3-1b-it-Q2_K.gguf
+
+      - name: run runner (diffusers-cuda)
+        if: matrix.backend == 'diffusers-cuda'
+        run: docker run --name runner-test -d --rm -p 8080:8080 --gpus all runner-test:test stabilityai/stable-diffusion-2-1
+
+      - name: run runner (vllm-cuda)
+        if: matrix.backend == 'vllm-cuda'
+        run: docker run --name runner-test -d --rm -p 8080:8080 --gpus all runner-test:test Qwen/Qwen2.5-0.5B-Instruct
+
+      - name: run test (llama-cpp-cuda)
+        if: matrix.backend == 'llama-cpp-cuda'
+        run: |
+          result=$(curl --fail --retry 10 --retry-all-errors --retry-max-time 600 http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+            "model": "gemma-3-1b-it-Q2_K",
+            "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]
+          }')
+          echo "$result"
+
+          echo "$result" | jq -e '
+            if (.error? != null) then
+              error("error field present in response")
+            elif (.choices | type != "array" or (.choices | length) == 0) then
+              error("choices must be a non-empty array")
+            else
+              .
+            end
+          ' > /dev/null
+
+      - name: run test (diffusers-cuda)
+        if: matrix.backend == 'diffusers-cuda'
+        run: |
+          result=$(curl --fail --retry 10 --retry-all-errors --retry-max-time 600 http://127.0.0.1:8080/v1/images/generations -H "Content-Type: application/json" -d '{
+            "model": "stable-diffusion-2-1",
+            "prompt": "A cute baby llama",
+            "size": "256x256"
+          }')
+          echo "$result"
+
+          echo "$result" | jq -e '
+            if (.error? != null) then
+              error("error field present in response")
+            elif (.data | type != "array" or (.data | length) == 0) then
+              error("data must be a non-empty array")
+            elif (.data[0].url == null or .data[0].url == "") then
+              error("data[0].url must be non-empty")
+            else
+              .
+            end
+          ' > /dev/null
+
+      - name: save generated image
+        if: matrix.backend == 'diffusers-cuda'
+        run: docker cp runner-test:/tmp/generated/content/images /tmp || true
+
+      - name: run test (vllm-cuda)
+        if: matrix.backend == 'vllm-cuda'
+        run: |
+          result=$(curl --fail --retry 10 --retry-all-errors --retry-max-time 600 http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+            "model": "Qwen2.5-0.5B-Instruct",
+            "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]
+          }')
+          echo "$result"
+
+          echo "$result" | jq -e '
+            if (.error? != null) then
+              error("error field present in response")
+            elif (.choices | type != "array" or (.choices | length) == 0) then
+              error("choices must be a non-empty array")
+            else
+              .
+            end
+          ' > /dev/null
+
+      - name: save logs
+        if: always()
+        run: docker logs runner-test > /tmp/docker-runner-${{ matrix.backend }}.log 2>&1
+
+      - run: docker stop runner-test
+        if: always()
+
+      - run: docker system prune -a -f --volumes || true
+        if: always()
+
+      - name: publish test artifacts
+        if: always()
+        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
+        with:
+          name: test-runner-${{ matrix.backend }}
+          path: |
+            /tmp/*.log
+            /tmp/images/*.png