@@ -103,37 +103,53 @@ USER modelrunner
103103
104104# Install uv and vLLM as modelrunner user
105105RUN curl -LsSf https://astral.sh/uv/install.sh | sh \
106- && ~/.local/bin/uv venv --python /usr/bin/python3 /opt/vllm-env \
107- && if [ "$TARGETARCH" = "amd64" ]; then \
108- WHEEL_ARCH="manylinux_2_31_x86_64" ; \
109- WHEEL_URL="https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}%2B${VLLM_CUDA_VERSION}-${VLLM_PYTHON_TAG}-${WHEEL_ARCH}.whl" ; \
110- ~/.local/bin/uv pip install --python /opt/vllm-env/bin/python "$WHEEL_URL" ; \
106+ && ~/.local/bin/uv venv --python /usr/bin/python3 /opt/vllm-env \
107+ && if [ "$TARGETARCH" = "amd64" ]; then \
108+ WHEEL_ARCH="manylinux_2_31_x86_64" ; \
109+ WHEEL_URL="https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}%2B${VLLM_CUDA_VERSION}-${VLLM_PYTHON_TAG}-${WHEEL_ARCH}.whl" ; \
110+ ~/.local/bin/uv pip install --python /opt/vllm-env/bin/python "$WHEEL_URL" ; \
111111 else \
112- ~/.local/bin/uv pip install --python /opt/vllm-env/bin/python "vllm==${VLLM_VERSION}" ; \
112+ ~/.local/bin/uv pip install --python /opt/vllm-env/bin/python "vllm==${VLLM_VERSION}" ; \
113113 fi
114114
115115RUN /opt/vllm-env/bin/python -c "import vllm; print(vllm.__version__)" > /opt/vllm-env/version
116116
117117# --- SGLang variant ---
118118FROM llamacpp AS sglang
119119
120- ARG SGLANG_VERSION=0.4.0
120+ ARG SGLANG_VERSION=0.5.6
121121
122122USER root
123123
124- RUN apt update && apt install -y python3 python3-venv python3-dev curl ca-certificates build-essential && rm -rf /var/lib/apt/lists/*
124+ # Install CUDA toolkit 13 for nvcc (needed for flashinfer JIT compilation)
125+ # CUDA 13 supports Blackwell (B200) architecture with compute_100a
126+ # and Hopper (H200) architecture
127+ # Plus other SGLang dependencies
128+ RUN apt update && apt install -y \
129+ python3 python3-venv python3-dev \
130+ curl ca-certificates build-essential \
131+ libnuma1 libnuma-dev numactl ninja-build \
132+ wget gnupg \
133+ && wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb \
134+ && dpkg -i cuda-keyring_1.1-1_all.deb \
135+ && apt update && apt install -y cuda-toolkit-13-0 \
136+ && rm cuda-keyring_1.1-1_all.deb \
137+ && rm -rf /var/lib/apt/lists/*
125138
126139RUN mkdir -p /opt/sglang-env && chown -R modelrunner:modelrunner /opt/sglang-env
127140
128141USER modelrunner
129142
143+ # Set CUDA paths for nvcc (needed during flashinfer compilation)
144+ ENV PATH=/usr/local/cuda-13.0/bin:$PATH
145+ ENV LD_LIBRARY_PATH=/usr/local/cuda-13.0/lib64:$LD_LIBRARY_PATH
146+
130147# Install uv and SGLang as modelrunner user
131148RUN curl -LsSf https://astral.sh/uv/install.sh | sh \
132- && ~/.local/bin/uv venv --python /usr/bin/python3 /opt/sglang-env \
133- && ~/.local/bin/uv pip install --python /opt/sglang-env/bin/python "sglang==${SGLANG_VERSION}"
149+ && ~/.local/bin/uv venv --python /usr/bin/python3 /opt/sglang-env \
150+ && ~/.local/bin/uv pip install --python /opt/sglang-env/bin/python "sglang==${SGLANG_VERSION}"
134151
135152RUN /opt/sglang-env/bin/python -c "import sglang; print(sglang.__version__)" > /opt/sglang-env/version
136-
137153FROM llamacpp AS final-llamacpp
138154# Copy the built binary from builder
139155COPY --from=builder /app/model-runner /app/model-runner
0 commit comments