Skip to content

Commit 1087b82

Browse files
authored
Merge pull request #221 from docker/vllm
Add vLLM
2 parents ff83a84 + 13e2b4f commit 1087b82

File tree

24 files changed

+688
-208
lines changed

24 files changed

+688
-208
lines changed

.github/workflows/ci.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,13 @@ jobs:
1111
runs-on: ubuntu-latest
1212
steps:
1313
- name: Checkout code
14-
uses: actions/checkout@v4
14+
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955
1515

1616
- name: Verify vendor/ is not present
1717
run: stat vendor && exit 1 || exit 0
1818

1919
- name: Set up Go
20-
uses: actions/setup-go@v5
20+
uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5
2121
with:
2222
go-version: 1.24.2
2323
cache: true

.github/workflows/cli-build.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ jobs:
2525
id-token: write
2626
contents: read
2727
steps:
28-
- uses: actions/checkout@v4
29-
- uses: actions/setup-go@v5
28+
- uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955
29+
- uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5
3030
with:
3131
go-version-file: cmd/cli/go.mod
3232
cache: true
@@ -35,7 +35,7 @@ jobs:
3535
working-directory: cmd/cli
3636
run: |
3737
make release VERSION=${{ github.sha }}
38-
- uses: actions/upload-artifact@v4
38+
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
3939
with:
4040
name: dist
4141
path: |

.github/workflows/cli-validate.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,11 @@ jobs:
3131
steps:
3232
-
3333
name: Checkout
34-
uses: actions/checkout@v4
34+
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955
3535
-
3636
name: List targets
3737
id: generate
38-
uses: docker/bake-action/subaction/list-targets@v6
38+
uses: docker/bake-action/subaction/list-targets@3acf805d94d93a86cce4ca44798a76464a75b88c
3939
with:
4040
files: ./cmd/cli/docker-bake.hcl
4141
target: validate
@@ -51,7 +51,7 @@ jobs:
5151
steps:
5252
-
5353
name: Checkout
54-
uses: actions/checkout@v4
54+
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955
5555
-
5656
name: Set up Docker Buildx
5757
uses: docker/setup-buildx-action@v3

.github/workflows/dmr-daily-check.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ jobs:
2222

2323
steps:
2424
- name: Set up Docker
25-
uses: docker/setup-docker-action@v4
25+
uses: docker/setup-docker-action@3fb92d6d9c634363128c8cce4bc3b2826526370a
2626

2727
- name: Install docker-model-plugin
2828
run: |

.github/workflows/release.yml

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,21 @@ on:
1919
required: false
2020
type: string
2121
default: "latest"
22+
vllmVersion:
23+
description: 'vLLM version'
24+
required: false
25+
type: string
26+
default: "0.11.0"
2227

2328
jobs:
2429
test:
2530
runs-on: ubuntu-latest
2631
steps:
2732
- name: Checkout code
28-
uses: actions/checkout@v4
33+
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955
2934

3035
- name: Set up Go
31-
uses: actions/setup-go@v5
36+
uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5
3237
with:
3338
go-version: 1.24.2
3439
cache: true
@@ -41,7 +46,7 @@ jobs:
4146
runs-on: ubuntu-latest
4247
steps:
4348
- name: Checkout repo
44-
uses: actions/checkout@v4
49+
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955
4550

4651
- name: Format tags
4752
id: tags
@@ -59,25 +64,32 @@ jobs:
5964
echo "docker/model-runner:latest-cuda" >> "$GITHUB_OUTPUT"
6065
fi
6166
echo 'EOF' >> "$GITHUB_OUTPUT"
67+
echo "vllm-cuda<<EOF" >> "$GITHUB_OUTPUT"
68+
echo "docker/model-runner:${{ inputs.releaseTag }}-vllm-cuda" >> "$GITHUB_OUTPUT"
69+
if [ "${{ inputs.pushLatest }}" == "true" ]; then
70+
echo "docker/model-runner:latest-vllm-cuda" >> "$GITHUB_OUTPUT"
71+
fi
72+
echo 'EOF' >> "$GITHUB_OUTPUT"
6273
6374
- name: Log in to DockerHub
64-
uses: docker/login-action@v3
75+
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef
6576
with:
6677
username: "docker"
6778
password: ${{ secrets.ORG_ACCESS_TOKEN }}
6879

6980
- name: Set up Buildx
70-
uses: docker/setup-buildx-action@v3
81+
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435
7182
with:
7283
version: "lab:latest"
7384
driver: cloud
7485
endpoint: "docker/make-product-smarter"
7586
install: true
7687

7788
- name: Build CPU image
78-
uses: docker/build-push-action@v5
89+
uses: docker/build-push-action@ca052bb54ab0790a636c9b5f226502c73d547a25
7990
with:
8091
file: Dockerfile
92+
target: final-llamacpp
8193
platforms: linux/amd64, linux/arm64
8294
build-args: |
8395
"LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}"
@@ -87,9 +99,10 @@ jobs:
8799
tags: ${{ steps.tags.outputs.cpu }}
88100

89101
- name: Build CUDA image
90-
uses: docker/build-push-action@v5
102+
uses: docker/build-push-action@ca052bb54ab0790a636c9b5f226502c73d547a25
91103
with:
92104
file: Dockerfile
105+
target: final-llamacpp
93106
platforms: linux/amd64, linux/arm64
94107
build-args: |
95108
"LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}"
@@ -99,3 +112,19 @@ jobs:
99112
sbom: true
100113
provenance: mode=max
101114
tags: ${{ steps.tags.outputs.cuda }}
115+
116+
- name: Build vLLM CUDA image
117+
uses: docker/build-push-action@ca052bb54ab0790a636c9b5f226502c73d547a25
118+
with:
119+
file: Dockerfile
120+
target: final-vllm
121+
platforms: linux/amd64
122+
build-args: |
123+
"LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}"
124+
"LLAMA_SERVER_VARIANT=cuda"
125+
"BASE_IMAGE=nvidia/cuda:12.9.0-runtime-ubuntu24.04"
126+
"VLLM_VERSION=${{ inputs.vllmVersion }}"
127+
push: true
128+
sbom: true
129+
provenance: mode=max
130+
tags: ${{ steps.tags.outputs.vllm-cuda }}

Dockerfile

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ RUN --mount=type=cache,target=/go/pkg/mod \
3535
FROM docker/docker-model-backend-llamacpp:${LLAMA_SERVER_VERSION}-${LLAMA_SERVER_VARIANT} AS llama-server
3636

3737
# --- Final image ---
38-
FROM docker.io/${BASE_IMAGE} AS final
38+
FROM docker.io/${BASE_IMAGE} AS llamacpp
3939

4040
ARG LLAMA_SERVER_VARIANT
4141

@@ -55,9 +55,6 @@ RUN mkdir -p /var/run/model-runner /app/bin /models && \
5555
chown -R modelrunner:modelrunner /var/run/model-runner /app /models && \
5656
chmod -R 755 /models
5757

58-
# Copy the built binary from builder
59-
COPY --from=builder /app/model-runner /app/model-runner
60-
6158
# Copy the llama.cpp binary from the llama-server stage
6259
ARG LLAMA_BINARY_PATH
6360
COPY --from=llama-server ${LLAMA_BINARY_PATH}/ /app/.
@@ -77,3 +74,31 @@ ENV LD_LIBRARY_PATH=/app/lib
7774
LABEL com.docker.desktop.service="model-runner"
7875

7976
ENTRYPOINT ["/app/model-runner"]
77+
78+
# --- vLLM variant ---
79+
FROM llamacpp AS vllm
80+
81+
ARG VLLM_VERSION
82+
83+
USER root
84+
85+
RUN apt update && apt install -y python3 python3-venv python3-dev curl ca-certificates build-essential && rm -rf /var/lib/apt/lists/*
86+
87+
RUN mkdir -p /opt/vllm-env && chown -R modelrunner:modelrunner /opt/vllm-env
88+
89+
USER modelrunner
90+
91+
# Install uv and vLLM as modelrunner user
92+
RUN curl -LsSf https://astral.sh/uv/install.sh | sh \
93+
&& ~/.local/bin/uv venv --python /usr/bin/python3 /opt/vllm-env \
94+
&& ~/.local/bin/uv pip install --python /opt/vllm-env/bin/python "vllm==${VLLM_VERSION}"
95+
96+
RUN /opt/vllm-env/bin/python -c "import vllm; print(vllm.__version__)" > /opt/vllm-env/version
97+
98+
FROM llamacpp AS final-llamacpp
99+
# Copy the built binary from builder
100+
COPY --from=builder /app/model-runner /app/model-runner
101+
102+
FROM vllm AS final-vllm
103+
# Copy the built binary from builder
104+
COPY --from=builder /app/model-runner /app/model-runner

cmd/cli/commands/backend.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
var ValidBackends = map[string]bool{
1414
"llama.cpp": true,
1515
"openai": true,
16+
"vllm": true,
1617
}
1718

1819
// validateBackend checks if the provided backend is valid

cmd/cli/docs/reference/docker_model_list.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ plink: docker_model.yaml
88
options:
99
- option: backend
1010
value_type: string
11-
description: Specify the backend to use (llama.cpp, openai)
11+
description: Specify the backend to use (llama.cpp, openai, vllm)
1212
deprecated: false
1313
hidden: true
1414
experimental: false

cmd/cli/docs/reference/docker_model_run.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ plink: docker_model.yaml
1212
options:
1313
- option: backend
1414
value_type: string
15-
description: Specify the backend to use (llama.cpp, openai)
15+
description: Specify the backend to use (llama.cpp, openai, vllm)
1616
deprecated: false
1717
hidden: true
1818
experimental: false

main.go

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"github.com/docker/model-runner/pkg/gpuinfo"
1515
"github.com/docker/model-runner/pkg/inference"
1616
"github.com/docker/model-runner/pkg/inference/backends/llamacpp"
17+
"github.com/docker/model-runner/pkg/inference/backends/vllm"
1718
"github.com/docker/model-runner/pkg/inference/config"
1819
"github.com/docker/model-runner/pkg/inference/memory"
1920
"github.com/docker/model-runner/pkg/inference/models"
@@ -119,9 +120,19 @@ func main() {
119120

120121
memEstimator.SetDefaultBackend(llamaCppBackend)
121122

123+
vllmBackend, err := vllm.New(
124+
log,
125+
modelManager,
126+
log.WithFields(logrus.Fields{"component": "vllm"}),
127+
nil,
128+
)
129+
if err != nil {
130+
log.Fatalf("unable to initialize %s backend: %v", vllm.Name, err)
131+
}
132+
122133
scheduler := scheduling.NewScheduler(
123134
log,
124-
map[string]inference.Backend{llamacpp.Name: llamaCppBackend},
135+
map[string]inference.Backend{llamacpp.Name: llamaCppBackend, vllm.Name: vllmBackend},
125136
llamaCppBackend,
126137
modelManager,
127138
http.DefaultClient,

0 commit comments

Comments
 (0)