1- ARG CUDA_VERSION=12.8 .1
1+ ARG CUDA_VERSION=12.9 .1
22ARG PYTHON_VERSION=3.12
33
44# BUILD_BASE_IMAGE: used to setup python build xformers, and vllm wheels, It can be replaced with a different base image from local machine,
@@ -124,7 +124,7 @@ RUN --mount=type=cache,target=/root/.cache/uv bash - <<'BASH'
124124 git clone https://github.com/facebookresearch/xformers.git
125125
126126 pushd xformers
127- git checkout v0.0.32.post2
127+ git checkout v0.0.33.post1
128128 git submodule update --init --recursive
129129 python3 setup.py bdist_wheel --dist-dir=../xformers-dist --verbose
130130 popd
@@ -256,7 +256,7 @@ ENV UV_INDEX_STRATEGY="unsafe-best-match"
256256# Use copy mode to avoid hardlink failures with Docker cache mounts
257257ENV UV_LINK_MODE=copy
258258
259- # Install build and runtime dependencies, this is needed for flashinfer install
259+ # Install build and runtime dependencies
260260COPY requirements/build.txt requirements/build.txt
261261COPY use_existing_torch.py use_existing_torch.py
262262RUN python3 use_existing_torch.py
@@ -294,33 +294,9 @@ RUN --mount=type=cache,target=/root/.cache/uv \
294294RUN --mount=type=cache,target=/root/.cache/uv \
295295 uv pip install --system /wheels/xformers/*.whl --verbose
296296
297- # Build FlashInfer from source
298- ARG torch_cuda_arch_list='8.0;8.9;9.0a;10.0a;12.0'
299- ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
300-
301- # TODO(elainewy): remove this once vllm commit is updated, and install flashinfer from pip
302- # see https://github.com/pytorch/pytorch/pull/165274#issuecomment-3408531784
303- ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git"
304- ARG FLASHINFER_GIT_REF="v0.2.14.post1"
305-
306- RUN --mount=type=cache,target=/root/.cache/uv \
307- git clone --depth 1 --recursive --shallow-submodules \
308- --branch ${FLASHINFER_GIT_REF} \
309- ${FLASHINFER_GIT_REPO} flashinfer \
310- && echo "Building FlashInfer with AOT for arches: ${torch_cuda_arch_list}" \
311- && cd flashinfer \
312- && python3 -m flashinfer.aot \
313- && python3 -m build --no-isolation --wheel --outdir ../wheels/flashinfer \
314- && cd .. \
315- && rm -rf flashinfer
316-
317- # Install FlashInfer
318- RUN --mount=type=cache,target=/root/.cache/uv \
319- uv pip install --system wheels/flashinfer/*.whl --verbose
320-
321297# Logging to confirm the torch versions
322- RUN pip freeze | grep -E 'torch|xformers|vllm|flashinfer '
323- RUN uv pip freeze | grep -i '^torch\| ^torchvision\| ^torchaudio\| ^xformers\| ^vllm\| ^flashinfer ' > build_summary.txt
298+ RUN pip freeze | grep -E 'torch|xformers|vllm'
299+ RUN uv pip freeze | grep -i '^torch\| ^torchvision\| ^torchaudio\| ^xformers\| ^vllm' > build_summary.txt
324300# ################## VLLM INSTALLED IMAGE ####################
325301
326302
@@ -331,4 +307,3 @@ FROM scratch as export-wheels
331307COPY --from=base /workspace/xformers-dist /wheels/xformers
332308COPY --from=build /workspace/vllm-dist /wheels/vllm
333309COPY --from=vllm-base /workspace/build_summary.txt /wheels/build_summary.txt
334- COPY --from=vllm-base /workspace/wheels/flashinfer /wheels/flashinfer-python
0 commit comments