Skip to content

Commit e545421

Browse files
author
niushengxiao
committed
feat: add dockerfiles for multi level cache
1 parent 54e2315 commit e545421

File tree

1 file changed

+123
-0
lines changed

1 file changed

+123
-0
lines changed
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
ARG CUDA_VERSION=12.8.0
2+
FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04
3+
4+
ARG PYTHON_VERSION=3.10
5+
ARG MAMBA_VERSION=24.7.1-0
6+
ARG TARGETPLATFORM
7+
8+
ENV PATH=/opt/conda/bin:$PATH \
9+
CONDA_PREFIX=/opt/conda
10+
11+
RUN chmod 777 -R /tmp && apt-get update --allow-insecure-repositories && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
12+
ca-certificates \
13+
libssl-dev \
14+
curl \
15+
g++ \
16+
make \
17+
git && \
18+
rm -rf /var/lib/apt/lists/*
19+
20+
RUN case ${TARGETPLATFORM} in \
21+
"linux/arm64") MAMBA_ARCH=aarch64 ;; \
22+
*) MAMBA_ARCH=x86_64 ;; \
23+
esac && \
24+
curl -fsSL -o ~/mambaforge.sh -v "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" && \
25+
bash ~/mambaforge.sh -b -p /opt/conda && \
26+
rm ~/mambaforge.sh
27+
28+
RUN case ${TARGETPLATFORM} in \
29+
"linux/arm64") exit 1 ;; \
30+
*) /opt/conda/bin/conda update -y conda && \
31+
/opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" && \
32+
/opt/conda/bin/conda install -y boost ;; \
33+
esac && \
34+
/opt/conda/bin/conda clean -ya
35+
36+
37+
WORKDIR /root
38+
39+
COPY ./requirements.txt /lightllm/requirements.txt
40+
RUN --mount=type=cache,target=/root/.cache/pip pip install -r /lightllm/requirements.txt --ignore-installed --extra-index-url https://download.pytorch.org/whl/cu128
41+
42+
RUN --mount=type=cache,target=/root/.cache/pip pip install vllm --pre --extra-index-url https://wheels.vllm.ai/nightly
43+
RUN --mount=type=cache,target=/root/.cache/pip pip install --no-deps -v 'git+https://github.com/ModelTC/LightKernel.git@07f2f62af5deb41f10a22660f9f42dba9273361e#egg=lightllm_kernel'
44+
RUN --mount=type=cache,target=/root/.cache/pip pip install --no-deps -v 'git+https://github.com/ModelTC/LightMem.git@5900baf92d85ef4dbda6124093506b0af906011a#egg=light_mem'
45+
46+
RUN apt-get update && apt-get install -y libnuma-dev wget devscripts debhelper dh-make build-essential dkms
47+
RUN apt-get install -y ibverbs-providers infiniband-diags perftest rdma-core libibverbs-dev librdmacm-dev
48+
49+
ENV CUDA_HOME=/usr/local/cuda \
50+
GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/
51+
52+
RUN mkdir -p /tmp/gdrcopy && cd /tmp \
53+
&& git clone https://github.com/NVIDIA/gdrcopy.git -b v2.4.4 \
54+
&& cd gdrcopy/packages \
55+
&& CUDA=/usr/local/cuda ./build-deb-packages.sh \
56+
&& dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb \
57+
&& cd / && rm -rf /tmp/gdrcopy
58+
59+
# Fix DeepEP IBGDA symlink
60+
RUN ln -sf /usr/lib/x86_64-linux-gnu/libmlx5.so.1 /usr/lib/x86_64-linux-gnu/libmlx5.so
61+
62+
RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \
63+
&& tar -xf nvshmem_src_cuda12-all-all-3.3.9.tar.gz && mv nvshmem_src nvshmem \
64+
&& cd nvshmem \
65+
&& rm -f /root/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \
66+
&& NVSHMEM_SHMEM_SUPPORT=0 \
67+
NVSHMEM_UCX_SUPPORT=0 \
68+
NVSHMEM_USE_NCCL=0 \
69+
NVSHMEM_MPI_SUPPORT=0 \
70+
NVSHMEM_IBGDA_SUPPORT=1 \
71+
NVSHMEM_PMIX_SUPPORT=0 \
72+
NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
73+
NVSHMEM_USE_GDRCOPY=1 \
74+
cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=/root/nvshmem/install -DCMAKE_CUDA_ARCHITECTURES=90 \
75+
&& cmake --build build --target install -j64
76+
77+
ARG DEEPEP_COMMIT=b6ce310bb0b75079682d09bc2ebc063a074fbd58
78+
RUN git clone https://github.com/deepseek-ai/DeepEP.git && cd DeepEP && git checkout ${DEEPEP_COMMIT} && cd ..
79+
80+
WORKDIR /root/DeepEP
81+
ENV NVSHMEM_DIR=/root/nvshmem/install
82+
RUN NVSHMEM_DIR=/root/nvshmem/install python setup.py install
83+
84+
RUN apt-get update && apt-get install -y cmake automake autotools-dev libtool libz-dev && \
85+
DEBIAN_FRONTEND=noninteractive apt-get -y install --reinstall libibverbs-dev rdma-core ibverbs-utils libibumad-dev; \
86+
rm -rf /usr/lib/ucx && \
87+
rm -rf /opt/hpcx/ucx && \
88+
cd /usr/local/src && \
89+
git clone https://github.com/openucx/ucx.git && \
90+
cd ucx && \
91+
git checkout v1.19.x && \
92+
./autogen.sh && ./configure \
93+
--enable-shared \
94+
--disable-static \
95+
--disable-doxygen-doc \
96+
--enable-optimizations \
97+
--enable-cma \
98+
--enable-devel-headers \
99+
--with-cuda=/usr/local/cuda \
100+
--with-verbs=yes \
101+
--with-dm \
102+
--with-gdrcopy=/usr/local \
103+
--with-efa \
104+
--enable-mt && \
105+
make -j && \
106+
make -j install-strip && \
107+
ldconfig;
108+
109+
RUN apt-get update && apt-get install -y pkg-config tmux net-tools libaio-dev ; \
110+
cd /usr/local/src; \
111+
pip install --upgrade meson pybind11 patchelf; \
112+
git clone https://github.com/ai-dynamo/nixl.git -b main && \
113+
cd nixl && \
114+
rm -rf build && \
115+
mkdir build && \
116+
meson setup build/ --prefix=/usr/local/nixl --buildtype=release && \
117+
cd build && \
118+
ninja && \
119+
ninja install && \
120+
cd .. && pip install . --no-deps;
121+
122+
COPY . /lightllm
123+
RUN pip install -e /lightllm --no-cache-dir

0 commit comments

Comments
 (0)