@@ -100,6 +100,11 @@ RUN dnf install -y --setopt=install_weak_deps=False \
100100 cmake \
101101 git && dnf clean all && rm -rf /var/cache/dnf/*
102102
103+ # Install ninja as root (critical for flash-attention, reduces build from hours to minutes)
104+ # ninja-build package not available in base repos, so install via pip
105+ RUN pip install --no-cache-dir ninja && \
106+ ln -sf /usr/local/bin/ninja /usr/bin/ninja
107+
103108# Bundle RDMA runtime libs to a staging dir
104109RUN mkdir -p /opt/rdma-runtime \
105110 && cp -a /usr/lib64/libibverbs* /opt/rdma-runtime/ || true \
@@ -143,12 +148,22 @@ RUN pip install --retries 5 --timeout 300 --no-cache-dir \
143148# Install Flash Attention from original Dao-AILab repo
144149# --no-build-isolation: Use already-installed torch instead of isolated env
145150USER 0
146- ENV GPU_ARCHS="gfx90a;gfx942"
151+
152+ # Set build parallelism environment variables
153+ # MAX_JOBS: Controls PyTorch extension build parallelism
154+ # CMAKE_BUILD_PARALLEL_LEVEL: Controls CMake parallelism
155+ # NINJA_FLAGS: Controls ninja build parallelism
156+ # GPU_ARCHS: Target GPU architectures (gfx942=MI300, gfx90a=MI200/MI250)
157+ ENV GPU_ARCHS="gfx90a;gfx942" \
158+ MAX_JOBS=12 \
159+ CMAKE_BUILD_PARALLEL_LEVEL=12 \
160+ NINJA_FLAGS=-j12
161+
147162RUN cd /tmp \
148163 && git clone --depth 1 --branch v2.8.2 https://github.com/Dao-AILab/flash-attention.git \
149164 && cd flash-attention \
150165 && git submodule update --init \
151- && MAX_JOBS= "4" pip install --no-build-isolation --no-cache-dir --no-deps . \
166+ && pip install --no-build-isolation --no-cache-dir --no-deps . \
152167 && cd / && rm -rf /tmp/flash-attention
153168
154169
0 commit comments