|
| 1 | +name: docker-test-runner-gpu |
| 2 | + |
| 3 | +on: |
| 4 | + workflow_dispatch: |
| 5 | + inputs: |
| 6 | + backend: |
| 7 | + description: 'Runner backend to test (leave empty to test all)' |
| 8 | + required: false |
| 9 | + type: choice |
| 10 | + default: 'all' |
| 11 | + options: |
| 12 | + - all |
| 13 | + - llama-cpp-cuda |
| 14 | + - diffusers-cuda |
| 15 | + - vllm-cuda |
| 16 | + |
| 17 | +permissions: read-all |
| 18 | + |
| 19 | +jobs: |
| 20 | + test: |
| 21 | + runs-on: [self-hosted, gpu] |
| 22 | + timeout-minutes: 240 |
| 23 | + strategy: |
| 24 | + fail-fast: false |
| 25 | + max-parallel: 1 |
| 26 | + matrix: |
| 27 | + backend: ${{ inputs.backend == 'all' && fromJson('["llama-cpp-cuda", "diffusers-cuda", "vllm-cuda"]') || fromJson(format('["{0}"]', inputs.backend)) }} |
| 28 | + steps: |
| 29 | + - name: cleanup workspace |
| 30 | + run: | |
| 31 | + rm -rf ./* || true |
| 32 | + rm -rf ./.??* || true |
| 33 | + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 |
| 34 | + |
| 35 | + # use default docker driver builder with containerd image store for local aikit image |
| 36 | + # these must be setup before running this test |
| 37 | + - run: docker buildx use default |
| 38 | + |
| 39 | + - name: build aikit |
| 40 | + run: | |
| 41 | + docker buildx build . -t aikit:test \ |
| 42 | + --load --provenance=false --progress plain |
| 43 | +
|
| 44 | + - name: build runner image |
| 45 | + run: | |
| 46 | + docker buildx build . -t runner-test:test \ |
| 47 | + -f runners/${{ matrix.backend }}.yaml \ |
| 48 | + --build-arg BUILDKIT_SYNTAX=aikit:test \ |
| 49 | + --load --provenance=false --progress plain |
| 50 | +
|
| 51 | + - name: list images |
| 52 | + run: docker images |
| 53 | + |
| 54 | + - name: run runner (llama-cpp-cuda) |
| 55 | + if: matrix.backend == 'llama-cpp-cuda' |
| 56 | + run: docker run --name runner-test -d --rm -p 8080:8080 --gpus all runner-test:test https://huggingface.co/unsloth/gemma-3-1b-it-GGUF/resolve/main/gemma-3-1b-it-Q2_K.gguf |
| 57 | + |
| 58 | + - name: run runner (diffusers-cuda) |
| 59 | + if: matrix.backend == 'diffusers-cuda' |
| 60 | + run: docker run --name runner-test -d --rm -p 8080:8080 --gpus all runner-test:test stabilityai/stable-diffusion-2-1 |
| 61 | + |
| 62 | + - name: run runner (vllm-cuda) |
| 63 | + if: matrix.backend == 'vllm-cuda' |
| 64 | + run: docker run --name runner-test -d --rm -p 8080:8080 --gpus all runner-test:test Qwen/Qwen2.5-0.5B-Instruct |
| 65 | + |
| 66 | + - name: run test (llama-cpp-cuda) |
| 67 | + if: matrix.backend == 'llama-cpp-cuda' |
| 68 | + run: | |
| 69 | + result=$(curl --fail --retry 10 --retry-all-errors --retry-max-time 600 http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ |
| 70 | + "model": "gemma-3-1b-it-Q2_K", |
| 71 | + "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}] |
| 72 | + }') |
| 73 | + echo "$result" |
| 74 | +
|
| 75 | + echo "$result" | jq -e ' |
| 76 | + if (.error? != null) then |
| 77 | + error("error field present in response") |
| 78 | + elif (.choices | type != "array" or (.choices | length) == 0) then |
| 79 | + error("choices must be a non-empty array") |
| 80 | + else |
| 81 | + . |
| 82 | + end |
| 83 | + ' > /dev/null |
| 84 | +
|
| 85 | + - name: run test (diffusers-cuda) |
| 86 | + if: matrix.backend == 'diffusers-cuda' |
| 87 | + run: | |
| 88 | + result=$(curl --fail --retry 10 --retry-all-errors --retry-max-time 600 http://127.0.0.1:8080/v1/images/generations -H "Content-Type: application/json" -d '{ |
| 89 | + "model": "stable-diffusion-2-1", |
| 90 | + "prompt": "A cute baby llama", |
| 91 | + "size": "256x256" |
| 92 | + }') |
| 93 | + echo "$result" |
| 94 | +
|
| 95 | + echo "$result" | jq -e ' |
| 96 | + if (.error? != null) then |
| 97 | + error("error field present in response") |
| 98 | + elif (.data | type != "array" or (.data | length) == 0) then |
| 99 | + error("data must be a non-empty array") |
| 100 | + elif (.data[0].url == null or .data[0].url == "") then |
| 101 | + error("data[0].url must be non-empty") |
| 102 | + else |
| 103 | + . |
| 104 | + end |
| 105 | + ' > /dev/null |
| 106 | +
|
| 107 | + - name: save generated image |
| 108 | + if: matrix.backend == 'diffusers-cuda' |
| 109 | + run: docker cp runner-test:/tmp/generated/content/images /tmp || true |
| 110 | + |
| 111 | + - name: run test (vllm-cuda) |
| 112 | + if: matrix.backend == 'vllm-cuda' |
| 113 | + run: | |
| 114 | + result=$(curl --fail --retry 10 --retry-all-errors --retry-max-time 600 http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ |
| 115 | + "model": "Qwen2.5-0.5B-Instruct", |
| 116 | + "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}] |
| 117 | + }') |
| 118 | + echo "$result" |
| 119 | +
|
| 120 | + echo "$result" | jq -e ' |
| 121 | + if (.error? != null) then |
| 122 | + error("error field present in response") |
| 123 | + elif (.choices | type != "array" or (.choices | length) == 0) then |
| 124 | + error("choices must be a non-empty array") |
| 125 | + else |
| 126 | + . |
| 127 | + end |
| 128 | + ' > /dev/null |
| 129 | +
|
| 130 | + - name: save logs |
| 131 | + if: always() |
| 132 | + run: docker logs runner-test > /tmp/docker-runner-${{ matrix.backend }}.log 2>&1 |
| 133 | + |
| 134 | + - run: docker stop runner-test |
| 135 | + if: always() |
| 136 | + |
| 137 | + - run: docker system prune -a -f --volumes || true |
| 138 | + if: always() |
| 139 | + |
| 140 | + - name: publish test artifacts |
| 141 | + if: always() |
| 142 | + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 |
| 143 | + with: |
| 144 | + name: test-runner-${{ matrix.backend }} |
| 145 | + path: | |
| 146 | + /tmp/*.log |
| 147 | + /tmp/images/*.png |
0 commit comments