Skip to content

Commit 8c7566f

Browse files
committed
Merge tag 'v3.1.0-rc3' into v3.1.0-rc3
Signed-off-by: Dushyant Behl <[email protected]>
2 parents d8cb1cb + 2706020 commit 8c7566f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+2925
-1498
lines changed

.github/workflows/build-and-publish.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ jobs:
3535

3636
steps:
3737
- uses: actions/checkout@v4
38+
- name: "Free up disk space"
39+
uses: ./.github/actions/free-up-disk-space
3840
- name: Set up Python ${{ matrix.python-version.setup }}
3941
uses: actions/setup-python@v4
4042
with:

.github/workflows/coverage.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ jobs:
1010
runs-on: ubuntu-latest
1111
steps:
1212
- uses: actions/checkout@v4
13+
- name: "Free up disk space"
14+
uses: ./.github/actions/free-up-disk-space
1315
- name: Set up Python 3.12
1416
uses: actions/setup-python@v4
1517
with:

.github/workflows/format.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ jobs:
2525
runs-on: ubuntu-latest
2626
steps:
2727
- uses: actions/checkout@v4
28+
- name: "Free up disk space"
29+
uses: ./.github/actions/free-up-disk-space
2830
- name: Set up Python 3.12
2931
uses: actions/setup-python@v4
3032
with:

README.md

Lines changed: 50 additions & 1058 deletions
Large diffs are not rendered by default.

build/Dockerfile

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ ARG PYTHON_VERSION=3.12
2121
ARG WHEEL_VERSION=""
2222
## Enable Aimstack or MLflow if requested via ENABLE_AIM/MLFLOW set to "true"
2323
ARG ENABLE_AIM=false
24-
ARG ENABLE_ALORA=false
2524
ARG ENABLE_MLFLOW=false
2625
ARG ENABLE_FMS_ACCELERATION=true
2726
ARG ENABLE_SCANNER=false
@@ -127,7 +126,6 @@ ARG USER_UID
127126
ARG ENABLE_FMS_ACCELERATION
128127
ARG ENABLE_AIM
129128
ARG ENABLE_MLFLOW
130-
ARG ENABLE_ALORA
131129
ARG ENABLE_SCANNER
132130
ARG ENABLE_CLEARML
133131

@@ -151,33 +149,35 @@ RUN if [[ -z "${WHEEL_VERSION}" ]]; \
151149
fi && \
152150
ls /tmp/*.whl >/tmp/bdist_name
153151

152+
# Ensures to always build mamba_ssm from source
153+
ENV PIP_NO_BINARY=mamba-ssm,mamba_ssm
154+
154155
# Install from the wheel
155156
RUN --mount=type=cache,target=/home/${USER}/.cache/pip,uid=${USER_UID} \
156157
python -m pip install --user wheel && \
157158
python -m pip install --user "$(head bdist_name)" && \
158-
python -m pip install --user "$(head bdist_name)[flash-attn]" && \
159159
python -m pip install --user --no-build-isolation "$(head bdist_name)[mamba]"
160160

161+
RUN python -m pip install --user --no-build-isolation "$(head bdist_name)[flash-attn]"
162+
161163
# fms_acceleration_peft = PEFT-training, e.g., 4bit QLoRA
162164
# fms_acceleration_foak = Fused LoRA and triton kernels
163165
# fms_acceleration_aadp = Padding-Free Flash Attention Computation
164166
# fms_acceleration_moe = Parallelized Mixture of Experts
167+
# fms_acceleration_odm = Online Data Mixing
165168
RUN if [[ "${ENABLE_FMS_ACCELERATION}" == "true" ]]; then \
166169
python -m pip install --user "$(head bdist_name)[fms-accel]"; \
167170
python -m fms_acceleration.cli install fms_acceleration_peft; \
168171
python -m fms_acceleration.cli install fms_acceleration_foak; \
169172
python -m fms_acceleration.cli install fms_acceleration_aadp; \
170173
python -m fms_acceleration.cli install fms_acceleration_moe; \
174+
python -m fms_acceleration.cli install fms_acceleration_odm; \
171175
fi
172176

173177
RUN if [[ "${ENABLE_AIM}" == "true" ]]; then \
174178
python -m pip install --user "$(head bdist_name)[aim]"; \
175179
fi
176180

177-
RUN if [[ "${ENABLE_ALORA}" == "true" ]]; then \
178-
python -m pip install --user "$(head bdist_name)[activated-lora]"; \
179-
fi
180-
181181
RUN if [[ "${ENABLE_MLFLOW}" == "true" ]]; then \
182182
python -m pip install --user "$(head bdist_name)[mlflow]"; \
183183
fi
@@ -234,4 +234,4 @@ USER ${USER}
234234
COPY --from=python-installations /home/${USER}/.local /home/${USER}/.local
235235
ENV PYTHONPATH="/home/${USER}/.local/lib/python${PYTHON_VERSION}/site-packages"
236236

237-
CMD [ "python", "/app/accelerate_launch.py" ]
237+
CMD [ "python", "/app/accelerate_launch.py" ]

build/nvcr.Dockerfile

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
# Copyright The FMS HF Tuning Authors
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
## Global Args #################################################################
16+
## If the nvcr container is updated, ensure to check the torch and python
17+
## installation version inside the dockerfile before pushing changes.
18+
ARG NVCR_IMAGE_VERSION=25.02-py3
19+
20+
# This is based on what is inside the NVCR image already
21+
ARG PYTHON_VERSION=3.12
22+
23+
## Base Layer ##################################################################
24+
FROM nvcr.io/nvidia/pytorch:${NVCR_IMAGE_VERSION} AS dev
25+
26+
ARG USER=root
27+
ARG USER_UID=0
28+
ARG WORKDIR=/app
29+
ARG SOURCE_DIR=${WORKDIR}/fms-hf-tuning
30+
31+
ARG ENABLE_FMS_ACCELERATION=true
32+
ARG ENABLE_AIM=true
33+
ARG ENABLE_MLFLOW=true
34+
ARG ENABLE_SCANNER=true
35+
ARG ENABLE_CLEARML=true
36+
ARG ENABLE_TRITON_KERNELS=true
37+
ARG ENABLE_MAMBA_SUPPORT=true
38+
39+
# Ensures to always build mamba_ssm from source
40+
ENV PIP_NO_BINARY=mamba-ssm,mamba_ssm
41+
42+
RUN python -m pip install --upgrade pip
43+
44+
# upgrade torch as the base layer contains only torch 2.7
45+
RUN pip install --upgrade --force-reinstall torch torchaudio torchvision --index-url https://download.pytorch.org/whl/cu128
46+
47+
# Install main package + flash attention
48+
COPY . ${SOURCE_DIR}
49+
RUN cd ${SOURCE_DIR}
50+
RUN pip install --no-cache-dir ${SOURCE_DIR} && \
51+
pip install --no-cache-dir ${SOURCE_DIR}[flash-attn]
52+
53+
# Optional extras
54+
RUN if [[ "${ENABLE_FMS_ACCELERATION}" == "true" ]]; then \
55+
pip install --no-cache-dir ${SOURCE_DIR}[fms-accel] && \
56+
python -m fms_acceleration.cli install fms_acceleration_peft && \
57+
python -m fms_acceleration.cli install fms_acceleration_foak && \
58+
python -m fms_acceleration.cli install fms_acceleration_aadp && \
59+
python -m fms_acceleration.cli install fms_acceleration_moe && \
60+
python -m fms_acceleration.cli install fms_acceleration_odm; \
61+
fi
62+
63+
RUN if [[ "${ENABLE_AIM}" == "true" ]]; then \
64+
pip install --no-cache-dir ${SOURCE_DIR}[aim]; \
65+
fi
66+
RUN if [[ "${ENABLE_MLFLOW}" == "true" ]]; then \
67+
pip install --no-cache-dir ${SOURCE_DIR}[mlflow]; \
68+
fi
69+
RUN if [[ "${ENABLE_SCANNER}" == "true" ]]; then \
70+
pip install --no-cache-dir ${SOURCE_DIR}[scanner-dev]; \
71+
fi
72+
RUN if [[ "${ENABLE_CLEARML}" == "true" ]]; then \
73+
pip install --no-cache-dir ${SOURCE_DIR}[clearml]; \
74+
fi
75+
RUN if [[ "${ENABLE_MAMBA_SUPPORT}" == "true" ]]; then \
76+
pip install --no-cache-dir ${SOURCE_DIR}[mamba]; \
77+
fi
78+
RUN if [[ "${ENABLE_TRITON_KERNELS}" == "true" ]]; then \
79+
pip install --no-cache-dir "git+https://github.com/triton-lang/triton.git@main#subdirectory=python/triton_kernels"; \
80+
fi
81+
82+
RUN chmod -R g+rwX $WORKDIR /tmp
83+
RUN mkdir -p /.cache && chmod -R 777 /.cache
84+
85+
# Set Triton environment variables for qLoRA
86+
ENV TRITON_HOME="/tmp/triton_home"
87+
ENV TRITON_DUMP_DIR="/tmp/triton_dump_dir"
88+
ENV TRITON_CACHE_DIR="/tmp/triton_cache_dir"
89+
ENV TRITON_OVERRIDE_DIR="/tmp/triton_override_dir"
90+
91+
WORKDIR $WORKDIR
92+
93+
CMD ["${SOURCE_DIR}/build/accelerate_launch.py"]

0 commit comments

Comments
 (0)