Skip to content

Commit 7e6c3e2

Browse files
tweaks around flash attention
1 parent f52c9fc commit 7e6c3e2

File tree

1 file changed

+17
-2
lines changed
  • images/universal/training/rocm64-torch290-py312

1 file changed

+17
-2
lines changed

images/universal/training/rocm64-torch290-py312/Dockerfile

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,11 @@ RUN dnf install -y --setopt=install_weak_deps=False \
100100
cmake \
101101
git && dnf clean all && rm -rf /var/cache/dnf/*
102102

103+
# Install ninja as root (critical for flash-attention, reduces build from hours to minutes)
104+
# ninja-build package not available in base repos, so install via pip
105+
RUN pip install --no-cache-dir ninja && \
106+
ln -sf /usr/local/bin/ninja /usr/bin/ninja
107+
103108
# Bundle RDMA runtime libs to a staging dir
104109
RUN mkdir -p /opt/rdma-runtime \
105110
&& cp -a /usr/lib64/libibverbs* /opt/rdma-runtime/ || true \
@@ -143,12 +148,22 @@ RUN pip install --retries 5 --timeout 300 --no-cache-dir \
143148
# Install Flash Attention from original Dao-AILab repo
144149
# --no-build-isolation: Use already-installed torch instead of isolated env
145150
USER 0
146-
ENV GPU_ARCHS="gfx90a;gfx942"
151+
152+
# Set build parallelism environment variables
153+
# MAX_JOBS: Controls PyTorch extension build parallelism
154+
# CMAKE_BUILD_PARALLEL_LEVEL: Controls CMake parallelism
155+
# NINJA_FLAGS: Controls ninja build parallelism
156+
# GPU_ARCHS: Target GPU architectures (gfx942=MI300, gfx90a=MI200/MI250)
157+
ENV GPU_ARCHS="gfx90a;gfx942" \
158+
MAX_JOBS=12 \
159+
CMAKE_BUILD_PARALLEL_LEVEL=12 \
160+
NINJA_FLAGS=-j12
161+
147162
RUN cd /tmp \
148163
&& git clone --depth 1 --branch v2.8.2 https://github.com/Dao-AILab/flash-attention.git \
149164
&& cd flash-attention \
150165
&& git submodule update --init \
151-
&& MAX_JOBS="4" pip install --no-build-isolation --no-cache-dir --no-deps . \
166+
&& pip install --no-build-isolation --no-cache-dir --no-deps . \
152167
&& cd / && rm -rf /tmp/flash-attention
153168

154169

0 commit comments

Comments
 (0)