@@ -33,7 +33,13 @@ COPY --link . .
3333# Build the Go binary (static build)
3434RUN --mount=type=cache,target=/go/pkg/mod \
3535 --mount=type=cache,target=/root/.cache/go-build \
36- CGO_ENABLED=1 GOOS=linux go build -ldflags="-s -w" -o model-runner ./main.go
36+ CGO_ENABLED=1 GOOS=linux go build -ldflags="-s -w" -o model-runner .
37+
38+ # Build the Go binary for SGLang (without vLLM)
39+ FROM builder AS builder-sglang
40+ RUN --mount=type=cache,target=/go/pkg/mod \
41+ --mount=type=cache,target=/root/.cache/go-build \
42+ CGO_ENABLED=1 GOOS=linux go build -tags=novllm -ldflags="-s -w" -o model-runner .
3743
3844# --- Get llama.cpp binary ---
3945FROM docker/docker-model-backend-llamacpp:${LLAMA_SERVER_VERSION}-${LLAMA_SERVER_VARIANT} AS llama-server
@@ -97,21 +103,58 @@ USER modelrunner
97103
98104# Install uv and vLLM as modelrunner user
99105RUN curl -LsSf https://astral.sh/uv/install.sh | sh \
100- && ~/.local/bin/uv venv --python /usr/bin/python3 /opt/vllm-env \
101- && if [ "$TARGETARCH" = "amd64" ]; then \
102- WHEEL_ARCH="manylinux_2_31_x86_64" ; \
103- WHEEL_URL="https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}%2B${VLLM_CUDA_VERSION}-${VLLM_PYTHON_TAG}-${WHEEL_ARCH}.whl" ; \
104- ~/.local/bin/uv pip install --python /opt/vllm-env/bin/python "$WHEEL_URL" ; \
106+ && ~/.local/bin/uv venv --python /usr/bin/python3 /opt/vllm-env \
107+ && if [ "$TARGETARCH" = "amd64" ]; then \
108+ WHEEL_ARCH="manylinux_2_31_x86_64" ; \
109+ WHEEL_URL="https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}%2B${VLLM_CUDA_VERSION}-${VLLM_PYTHON_TAG}-${WHEEL_ARCH}.whl" ; \
110+ ~/.local/bin/uv pip install --python /opt/vllm-env/bin/python "$WHEEL_URL" ; \
105111 else \
106- ~/.local/bin/uv pip install --python /opt/vllm-env/bin/python "vllm==${VLLM_VERSION}" ; \
112+ ~/.local/bin/uv pip install --python /opt/vllm-env/bin/python "vllm==${VLLM_VERSION}" ; \
107113 fi
108114
109115RUN /opt/vllm-env/bin/python -c "import vllm; print(vllm.__version__)" > /opt/vllm-env/version
110116
117+ # --- SGLang variant ---
118+ FROM llamacpp AS sglang
119+
120+ ARG SGLANG_VERSION=0.5.6
121+
122+ USER root
123+
124+ # Install CUDA toolkit 13 for nvcc (needed for flashinfer JIT compilation)
125+ RUN apt update && apt install -y \
126+ python3 python3-venv python3-dev \
127+ curl ca-certificates build-essential \
128+ libnuma1 libnuma-dev numactl ninja-build \
129+ wget gnupg \
130+ && wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb \
131+ && dpkg -i cuda-keyring_1.1-1_all.deb \
132+ && apt update && apt install -y cuda-toolkit-13-0 \
133+ && rm cuda-keyring_1.1-1_all.deb \
134+ && rm -rf /var/lib/apt/lists/*
135+
136+ RUN mkdir -p /opt/sglang-env && chown -R modelrunner:modelrunner /opt/sglang-env
137+
138+ USER modelrunner
139+
140+ # Set CUDA paths for nvcc (needed during flashinfer compilation)
141+ ENV PATH=/usr/local/cuda-13.0/bin:$PATH
142+ ENV LD_LIBRARY_PATH=/usr/local/cuda-13.0/lib64:$LD_LIBRARY_PATH
143+
144+ # Install uv and SGLang as modelrunner user
145+ RUN curl -LsSf https://astral.sh/uv/install.sh | sh \
146+ && ~/.local/bin/uv venv --python /usr/bin/python3 /opt/sglang-env \
147+ && ~/.local/bin/uv pip install --python /opt/sglang-env/bin/python "sglang==${SGLANG_VERSION}"
148+
149+ RUN /opt/sglang-env/bin/python -c "import sglang; print(sglang.__version__)" > /opt/sglang-env/version
111150FROM llamacpp AS final-llamacpp
112151# Copy the built binary from builder
113152COPY --from=builder /app/model-runner /app/model-runner
114153
115154FROM vllm AS final-vllm
116155# Copy the built binary from builder
117156COPY --from=builder /app/model-runner /app/model-runner
157+
158+ FROM sglang AS final-sglang
159+ # Copy the built binary from builder-sglang (without vLLM)
160+ COPY --from=builder-sglang /app/model-runner /app/model-runner
0 commit comments