|
5 | 5 | workflow_dispatch: |
6 | 6 | inputs: |
7 | 7 | pushLatest: |
8 | | - description: 'Tag images produced by this job as latest' |
| 8 | + description: "Tag images produced by this job as latest" |
9 | 9 | required: false |
10 | 10 | type: boolean |
11 | 11 | default: false |
12 | 12 | releaseTag: |
13 | | - description: 'Release tag' |
| 13 | + description: "Release tag" |
14 | 14 | required: false |
15 | 15 | type: string |
16 | 16 | default: "test" |
17 | 17 | llamaServerVersion: |
18 | | - description: 'llama-server version' |
| 18 | + description: "llama-server version" |
19 | 19 | required: false |
20 | 20 | type: string |
21 | 21 | default: "latest" |
22 | 22 | vllmVersion: |
23 | | - description: 'vLLM version' |
| 23 | + description: "vLLM version" |
24 | 24 | required: false |
25 | 25 | type: string |
26 | 26 | default: "0.12.0" |
| 27 | + sglangVersion: |
| 28 | + description: "SGLang version" |
| 29 | + required: false |
| 30 | + type: string |
| 31 | + default: "0.4.0" |
27 | 32 | # This can be removed once we have llama.cpp built for MUSA and CANN. |
28 | 33 | buildMusaCann: |
29 | | - description: 'Build MUSA and CANN images' |
| 34 | + description: "Build MUSA and CANN images" |
30 | 35 | required: false |
31 | 36 | type: boolean |
32 | 37 | default: false |
|
76 | 81 | echo "docker/model-runner:latest-vllm-cuda" >> "$GITHUB_OUTPUT" |
77 | 82 | fi |
78 | 83 | echo 'EOF' >> "$GITHUB_OUTPUT" |
| 84 | + echo "sglang-cuda<<EOF" >> "$GITHUB_OUTPUT" |
| 85 | + echo "docker/model-runner:${{ inputs.releaseTag }}-sglang-cuda" >> "$GITHUB_OUTPUT" |
| 86 | + if [ "${{ inputs.pushLatest }}" == "true" ]; then |
| 87 | + echo "docker/model-runner:latest-sglang-cuda" >> "$GITHUB_OUTPUT" |
| 88 | + fi |
| 89 | + echo 'EOF' >> "$GITHUB_OUTPUT" |
79 | 90 | echo "rocm<<EOF" >> "$GITHUB_OUTPUT" |
80 | 91 | echo "docker/model-runner:${{ inputs.releaseTag }}-rocm" >> "$GITHUB_OUTPUT" |
81 | 92 | if [ "${{ inputs.pushLatest }}" == "true" ]; then |
@@ -155,6 +166,22 @@ jobs: |
155 | 166 | provenance: mode=max |
156 | 167 | tags: ${{ steps.tags.outputs.vllm-cuda }} |
157 | 168 |
|
| 169 | + - name: Build SGLang CUDA image |
| 170 | + uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 |
| 171 | + with: |
| 172 | + file: Dockerfile |
| 173 | + target: final-sglang |
| 174 | + platforms: linux/amd64, linux/arm64 |
| 175 | + build-args: | |
| 176 | + "LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}" |
| 177 | + "LLAMA_SERVER_VARIANT=cuda" |
| 178 | + "BASE_IMAGE=nvidia/cuda:12.9.0-runtime-ubuntu24.04" |
| 179 | + "SGLANG_VERSION=${{ inputs.sglangVersion }}" |
| 180 | + push: true |
| 181 | + sbom: true |
| 182 | + provenance: mode=max |
| 183 | + tags: ${{ steps.tags.outputs.sglang-cuda }} |
| 184 | + |
158 | 185 | - name: Build ROCm image |
159 | 186 | uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 |
160 | 187 | with: |
|
0 commit comments