1+ # ################### BASE BUILD IMAGE ####################
2+ FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 AS dev
3+ RUN apt-get update -y \
4+ && apt-get install -y python3-pip git
5+ # Workaround for https://github.com/openai/triton/issues/2507 and
6+ # https://github.com/pytorch/pytorch/issues/107960 -- hopefully
7+ # this won't be needed for future versions of this docker image
8+ # or future versions of triton.
9+ RUN ldconfig /usr/local/cuda-12.1/compat/
10+ WORKDIR /workspace
11+
12+ COPY requirements-build.txt requirements-build.txt
13+ RUN --mount=type=cache,target=/root/.cache/pip \
14+ pip install -r requirements-build.txt
15+ # ################### BASE BUILD IMAGE ####################
16+
17+ # ################### FLASH_ATTENTION Build IMAGE ####################
18+ FROM dev as flash-attn-builder
19+ # max jobs used for build
20+ ARG max_jobs=2
21+ ENV MAX_JOBS=${max_jobs}
22+ # flash attention version
23+ ARG flash_attn_version=v2.4.2
24+ ENV FLASH_ATTN_VERSION=${flash_attn_version}
25+
26+ WORKDIR /usr/src/flash-attention-v2
27+
28+ # Download the wheel or build it if a pre-compiled release doesn't exist
29+ RUN pip --verbose wheel flash-attn==${FLASH_ATTN_VERSION} \
30+ --no-build-isolation --no-deps --no-cache-dir
31+
32+ # ################### FLASH_ATTENTION Build IMAGE ####################
33+
34+ # ################### Runtime IMAGE ####################
135FROM nvcr.io/nvidia/pytorch:23.09-py3
236
337RUN apt-get update \
@@ -7,6 +41,10 @@ RUN apt-get update \
741 && apt-get autoremove -y \
842 && rm -rf /var/lib/apt/lists/*
943
44+ # Install flash attention (from pre-built wheel)
45+ RUN --mount=type=bind,from=flash-attn-builder,src=/usr/src/flash-attention-v2,target=/usr/src/flash-attention-v2 \
46+ pip install /usr/src/flash-attention-v2/*.whl --no-cache-dir
47+
1048RUN pip uninstall torch -y
1149COPY requirements.txt /workspace/requirements.txt
1250RUN pip install -r requirements.txt
@@ -15,3 +53,5 @@ RUN wget https://github.com/peak/s5cmd/releases/download/v2.2.1/s5cmd_2.2.1_Linu
1553RUN tar -xvzf s5cmd_2.2.1_Linux-64bit.tar.gz
1654
1755COPY vllm_server.py /workspace/vllm_server.py
56+
57+ # ################### Runtime IMAGE ####################
0 commit comments