foundation-model-stack · dushyantbehl · Nov 10, 2025 · Jul 24, 2025 · Jul 28, 2025 · Aug 4, 2025
@@ -35,6 +35,8 @@ jobs:
 
     steps:
       - uses: actions/checkout@v4
+      - name: "Free up disk space"
+        uses: ./.github/actions/free-up-disk-space
       - name: Set up Python ${{ matrix.python-version.setup }}
         uses: actions/setup-python@v4
         with:

@@ -10,6 +10,8 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
+      - name: "Free up disk space"
+        uses: ./.github/actions/free-up-disk-space
       - name: Set up Python 3.12
         uses: actions/setup-python@v4
         with:

@@ -25,6 +25,8 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
+      - name: "Free up disk space"
+        uses: ./.github/actions/free-up-disk-space
       - name: Set up Python 3.12
         uses: actions/setup-python@v4
         with:

@@ -21,7 +21,6 @@ ARG PYTHON_VERSION=3.12
 ARG WHEEL_VERSION=""
 ## Enable Aimstack or MLflow if requested via ENABLE_AIM/MLFLOW set to "true"
 ARG ENABLE_AIM=false
-ARG ENABLE_ALORA=false
 ARG ENABLE_MLFLOW=false
 ARG ENABLE_FMS_ACCELERATION=true
 ARG ENABLE_SCANNER=false
@@ -127,7 +126,6 @@ ARG USER_UID
 ARG ENABLE_FMS_ACCELERATION
 ARG ENABLE_AIM
 ARG ENABLE_MLFLOW
-ARG ENABLE_ALORA
 ARG ENABLE_SCANNER
 ARG ENABLE_CLEARML
 
@@ -151,33 +149,35 @@ RUN if [[ -z "${WHEEL_VERSION}" ]]; \
     fi && \
     ls /tmp/*.whl >/tmp/bdist_name
 
+# Ensures to always build mamba_ssm from source
+ENV PIP_NO_BINARY=mamba-ssm,mamba_ssm
+
 # Install from the wheel
 RUN --mount=type=cache,target=/home/${USER}/.cache/pip,uid=${USER_UID} \
     python -m pip install --user wheel && \
     python -m pip install --user "$(head bdist_name)" && \
-    python -m pip install --user "$(head bdist_name)[flash-attn]" && \
     python -m pip install --user --no-build-isolation "$(head bdist_name)[mamba]"
 
+RUN python -m pip install --user --no-build-isolation "$(head bdist_name)[flash-attn]"
+
 # fms_acceleration_peft = PEFT-training, e.g., 4bit QLoRA
 # fms_acceleration_foak = Fused LoRA and triton kernels
 # fms_acceleration_aadp = Padding-Free Flash Attention Computation
 # fms_acceleration_moe = Parallelized Mixture of Experts
+# fms_acceleration_odm = Online Data Mixing
 RUN if [[ "${ENABLE_FMS_ACCELERATION}" == "true" ]]; then \
         python -m pip install --user "$(head bdist_name)[fms-accel]"; \
         python -m fms_acceleration.cli install fms_acceleration_peft; \
         python -m fms_acceleration.cli install fms_acceleration_foak; \
         python -m fms_acceleration.cli install fms_acceleration_aadp; \
         python -m fms_acceleration.cli install fms_acceleration_moe; \
+        python -m fms_acceleration.cli install fms_acceleration_odm; \
     fi
 
 RUN if [[ "${ENABLE_AIM}" == "true" ]]; then \
         python -m pip install --user "$(head bdist_name)[aim]"; \
     fi
 
-RUN if [[ "${ENABLE_ALORA}" == "true" ]]; then \
-        python -m pip install --user "$(head bdist_name)[activated-lora]"; \
-    fi
-
 RUN if [[ "${ENABLE_MLFLOW}" == "true" ]]; then \
     python -m pip install --user "$(head bdist_name)[mlflow]"; \
     fi
@@ -234,4 +234,4 @@ USER ${USER}
 COPY --from=python-installations /home/${USER}/.local /home/${USER}/.local
 ENV PYTHONPATH="/home/${USER}/.local/lib/python${PYTHON_VERSION}/site-packages"
 
-CMD [ "python", "/app/accelerate_launch.py" ]
+CMD [ "python", "/app/accelerate_launch.py" ]
@@ -0,0 +1,93 @@
+# Copyright The FMS HF Tuning Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+## Global Args #################################################################
+## If the nvcr container is updated, ensure to check the torch and python
+## installation version inside the dockerfile before pushing changes.
+ARG NVCR_IMAGE_VERSION=25.02-py3
+
+# This is based on what is inside the NVCR image already
+ARG PYTHON_VERSION=3.12
+
+## Base Layer ##################################################################
+FROM nvcr.io/nvidia/pytorch:${NVCR_IMAGE_VERSION} AS dev
+
+ARG USER=root
+ARG USER_UID=0
+ARG WORKDIR=/app
+ARG SOURCE_DIR=${WORKDIR}/fms-hf-tuning
+
+ARG ENABLE_FMS_ACCELERATION=true
+ARG ENABLE_AIM=true
+ARG ENABLE_MLFLOW=true
+ARG ENABLE_SCANNER=true
+ARG ENABLE_CLEARML=true
+ARG ENABLE_TRITON_KERNELS=true
+ARG ENABLE_MAMBA_SUPPORT=true
+
+# Ensures to always build mamba_ssm from source
+ENV PIP_NO_BINARY=mamba-ssm,mamba_ssm
+
+RUN python -m pip install --upgrade pip
+
+# upgrade torch as the base layer contains only torch 2.7
+RUN pip install --upgrade --force-reinstall torch torchaudio torchvision --index-url https://download.pytorch.org/whl/cu128
+
+# Install main package + flash attention
+COPY . ${SOURCE_DIR}
+RUN cd ${SOURCE_DIR}
+RUN pip install --no-cache-dir ${SOURCE_DIR} && \
+    pip install --no-cache-dir ${SOURCE_DIR}[flash-attn]
+
+# Optional extras
+RUN if [[ "${ENABLE_FMS_ACCELERATION}" == "true" ]]; then \
+        pip install --no-cache-dir ${SOURCE_DIR}[fms-accel] && \
+        python -m fms_acceleration.cli install fms_acceleration_peft && \
+        python -m fms_acceleration.cli install fms_acceleration_foak && \
+        python -m fms_acceleration.cli install fms_acceleration_aadp && \
+        python -m fms_acceleration.cli install fms_acceleration_moe && \
+        python -m fms_acceleration.cli install fms_acceleration_odm; \
+    fi
+
+RUN if [[ "${ENABLE_AIM}" == "true" ]]; then \
+        pip install --no-cache-dir ${SOURCE_DIR}[aim]; \
+    fi
+RUN if [[ "${ENABLE_MLFLOW}" == "true" ]]; then \
+        pip install --no-cache-dir ${SOURCE_DIR}[mlflow]; \
+    fi
+RUN if [[ "${ENABLE_SCANNER}" == "true" ]]; then \
+        pip install --no-cache-dir ${SOURCE_DIR}[scanner-dev]; \
+    fi
+RUN if [[ "${ENABLE_CLEARML}" == "true" ]]; then \
+        pip install --no-cache-dir ${SOURCE_DIR}[clearml]; \
+    fi
+RUN if [[ "${ENABLE_MAMBA_SUPPORT}" == "true" ]]; then \
+        pip install --no-cache-dir ${SOURCE_DIR}[mamba]; \
+    fi
+RUN if [[ "${ENABLE_TRITON_KERNELS}" == "true" ]]; then \
+        pip install --no-cache-dir "git+https://github.com/triton-lang/triton.git@main#subdirectory=python/triton_kernels"; \
+    fi
+
+RUN chmod -R g+rwX $WORKDIR /tmp
+RUN mkdir -p /.cache && chmod -R 777 /.cache
+
+# Set Triton environment variables for qLoRA
+ENV TRITON_HOME="/tmp/triton_home"
+ENV TRITON_DUMP_DIR="/tmp/triton_dump_dir"
+ENV TRITON_CACHE_DIR="/tmp/triton_cache_dir"
+ENV TRITON_OVERRIDE_DIR="/tmp/triton_override_dir"
+
+WORKDIR $WORKDIR
+
+CMD ["${SOURCE_DIR}/build/accelerate_launch.py"]