@@ -59,7 +59,7 @@ ENV NVIDIA_VISIBLE_DEVICES=all \
5959 CUDA_HOME=/usr/local/cuda \
6060 PATH=/usr/local/cuda/bin:$PATH \
6161 LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH \
62- TORCH_CUDA_ARCH_LIST="8.0;8.6;8.9;9.0 " \
62+ TORCH_CUDA_ARCH_LIST="8.6 " \
6363 XLA_FLAGS=--xla_gpu_cuda_data_dir=/usr/local/cuda
6464
6565# ###############################################################################
@@ -176,19 +176,19 @@ RUN pip install --retries 5 --timeout 300 --no-cache-dir \
176176# Copy requirements-special.txt for installation
177177COPY --chown=1001:0 requirements-special.txt /tmp/deps/
178178
179+ # To avoid out-of-memory during CUDA extension builds, cap parallel jobs
180+ # These environment variables are respected by PyTorch/ninja/CMake builds
181+ ENV MAX_JOBS=4 \
182+ CMAKE_BUILD_PARALLEL_LEVEL=4 \
183+ NINJA_FLAGS=-j4
184+
179185# 1. Flash Attention (standalone, needs --no-build-isolation --no-deps)
180186RUN pip install --no-build-isolation --no-cache-dir --no-deps \
181187 $(grep "^flash-attn" /tmp/deps/requirements-special.txt)
182188
183189# 2. Mamba SSM dependencies (order matters!)
184190# - causal-conv1d first (needs --no-build-isolation)
185191# - mamba-ssm second (needs --no-build-isolation --no-deps)
186- #
187- # To avoid out-of-memory during CUDA extension builds, cap parallel jobs.
188- # These environment variables are respected by PyTorch/ninja/CMake builds.
189- ENV MAX_JOBS=4 \
190- CMAKE_BUILD_PARALLEL_LEVEL=4 \
191- NINJA_FLAGS=-j4
192192RUN pip install --no-build-isolation --no-cache-dir \
193193 $(grep "^causal-conv1d" /tmp/deps/requirements-special.txt) \
194194 && pip install --no-build-isolation --no-cache-dir --no-deps \
0 commit comments