1- FROM nvcr.io/nvidia/tritonserver:24.04-py3-min as base
2- ARG PYTORCH_VERSION=2.6.0
3- ARG PYTHON_VERSION=3.9
4- ARG CUDA_VERSION=12.4
5- ARG MAMBA_VERSION=23.1.0-1
1+ ARG CUDA_VERSION=12.6.1
2+ FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04
3+ ARG PYTHON_VERSION=3.10
4+ ARG MAMBA_VERSION=24.7.1-0
65ARG TARGETPLATFORM
7-
86ENV PATH=/opt/conda/bin:$PATH \
97 CONDA_PREFIX=/opt/conda
108
@@ -21,7 +19,7 @@ RUN case ${TARGETPLATFORM} in \
2119 "linux/arm64") MAMBA_ARCH=aarch64 ;; \
2220 *) MAMBA_ARCH=x86_64 ;; \
2321 esac && \
24- curl -fsSL -o ~/mambaforge.sh -v "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" && \
22+ curl -fsSL -o ~/mambaforge.sh "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" && \
2523 bash ~/mambaforge.sh -b -p /opt/conda && \
2624 rm ~/mambaforge.sh
2725
@@ -36,39 +34,46 @@ RUN case ${TARGETPLATFORM} in \
3634WORKDIR /root
3735
3836COPY ./requirements.txt /lightllm/requirements.txt
39- RUN pip install -r /lightllm/requirements.txt --no-cache-dir --ignore-installed --extra-index-url https://download.pytorch.org/whl/cu124
37+ RUN pip install -U pip
38+ RUN pip install -r /lightllm/requirements.txt --no-cache-dir
4039
41- RUN pip install --no-cache-dir https://github.com/ModelTC/flash-attn-3-build/releases/download/v2.7.4.post1/flash_attn-3.0.0b1-cp39-cp39-linux_x86_64.whl
40+ RUN pip install --no-cache-dir vllm --pre --extra-index-url https://wheels.vllm.ai/nightly
4241
43- RUN pip install --no-cache-dir nvidia-nccl-cu12==2.25.1 # for allreduce hang issues in multinode H100
42+ RUN git clone https://github.com/ModelTC/LightKernel.git && cd LightKernel && pip install --no-deps -v .
4443
45- RUN git clone --recursive https://github.com/deepseek-ai/DeepGEMM.git
46- RUN cd DeepGEMM && python setup.py install
44+ RUN apt-get update && apt-get install -y libnuma-dev wget devscripts debhelper dh-make build-essential dkms
45+ RUN apt-get install -y ibverbs-providers infiniband-diags perftest rdma-core libibverbs-dev librdmacm-dev
4746
48- WORKDIR /root
49- RUN git clone https://github.com/deepseek-ai/DeepEP.git
47+ ENV CUDA_HOME=/usr/local/cuda \
48+ GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/
5049
51- # NVSHMEM
52- RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.2.5/source/nvshmem_src_3.2.5-1.txz
53- RUN tar -xf nvshmem_src_3.2.5-1.txz \
54- && mv nvshmem_src nvshmem
50+ RUN mkdir -p /tmp/gdrcopy && cd /tmp \
51+ && git clone https://github.com/NVIDIA/gdrcopy.git -b v2.4.4 \
52+ && cd gdrcopy/packages \
53+ && CUDA=/usr/local/cuda ./build-deb-packages.sh \
54+ && dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb \
55+ && cd / && rm -rf /tmp/gdrcopy
5556
56- WORKDIR /root/nvshmem
57- RUN git apply /root/DeepEP/third-party/nvshmem.patch
57+ # Fix DeepEP IBGDA symlink
58+ RUN ln -sf /usr/lib/x86_64-linux-gnu/libmlx5.so.1 /usr/lib/x86_64-linux-gnu/libmlx5.so
5859
59- WORKDIR /root/nvshmem
60- ENV CUDA_HOME=/usr/local/cuda
61- RUN NVSHMEM_SHMEM_SUPPORT=0 \
60+ RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \
61+ && tar -xf nvshmem_src_cuda12-all-all-3.3.9.tar.gz && mv nvshmem_src nvshmem \
62+ && cd nvshmem \
63+ && rm -f /root/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \
64+ && NVSHMEM_SHMEM_SUPPORT=0 \
6265 NVSHMEM_UCX_SUPPORT=0 \
6366 NVSHMEM_USE_NCCL=0 \
6467 NVSHMEM_MPI_SUPPORT=0 \
6568 NVSHMEM_IBGDA_SUPPORT=1 \
6669 NVSHMEM_PMIX_SUPPORT=0 \
6770 NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
6871 NVSHMEM_USE_GDRCOPY=1 \
69- cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=/root/nvshmem/install -DCMAKE_CUDA_ARCHITECTURES=90 -DMLX5_lib=/usr/lib/x86_64-linux-gnu/libmlx5.so.1 \
70- && cd build \
71- && make install -j64
72+ cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=/root/nvshmem/install -DCMAKE_CUDA_ARCHITECTURES=90 \
73+ && cmake --build build --target install -j64
74+
75+ ARG DEEPEP_COMMIT=b6ce310bb0b75079682d09bc2ebc063a074fbd58
76+ RUN git clone https://github.com/deepseek-ai/DeepEP.git && cd DeepEP && git checkout ${DEEPEP_COMMIT} && cd ..
7277
7378WORKDIR /root/DeepEP
7479ENV NVSHMEM_DIR=/root/nvshmem/install
0 commit comments