Skip to content

Commit b3a8a78

Browse files
authored
chore: fix image build and push actions to automate image build process. (#650)
* change the image build and push workflows for various branches Signed-off-by: Dushyant Behl <[email protected]> * push staging images on tags/release change image names to be standards fix nvcr dockerfile Signed-off-by: Dushyant Behl <[email protected]> * remove staging branch Signed-off-by: Dushyant Behl <[email protected]> * update nvcr dockefile to multistage to save space Signed-off-by: Dushyant Behl <[email protected]> --------- Signed-off-by: Dushyant Behl <[email protected]>
1 parent 13b05a9 commit b3a8a78

File tree

4 files changed

+119
-25
lines changed

4 files changed

+119
-25
lines changed

.github/workflows/image.yaml

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
name: Image
22
on:
33
push:
4-
branches: [ "main", "release" ]
5-
pull_request:
6-
branches: [ "main", "release" ]
4+
branches: [ "main" ]
75

86
jobs:
97
build:
@@ -12,6 +10,15 @@ jobs:
1210
- uses: actions/checkout@v4
1311
- name: "Free up disk space"
1412
uses: ./.github/actions/free-up-disk-space
15-
- name: Build image
13+
- name: Build NVCR dev Image
1614
run: |
17-
docker build -t fms-hf-tuning:dev . -f build/Dockerfile
15+
docker build -t fms-hf-tuning:main-nvcr-latest . -f build/nvcr.Dockerfile
16+
- name: Login to Quay.io
17+
uses: docker/login-action@v3
18+
with:
19+
registry: quay.io
20+
username: ${{ secrets.QUAY_USERNAME }}
21+
password: ${{ secrets.QUAY_ROBOT_TOKEN }}
22+
- name: Push docker image for every commit to Quay.io as dev images
23+
run: |
24+
docker push ${{ env.QUAY_REPOSITORY }}/fms-hf-tuning:main-nvcr-latest
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
name: Image
2+
on:
3+
push:
4+
branches: [ "release" ]
5+
pull_request:
6+
branches: [ "release" ]
7+
8+
jobs:
9+
build:
10+
runs-on: ubuntu-latest
11+
steps:
12+
- uses: actions/checkout@v4
13+
- name: "Free up disk space"
14+
uses: ./.github/actions/free-up-disk-space
15+
- name: Build UBI9 Prod Image
16+
run: |
17+
docker build \
18+
-t fms-hf-tuning:ubi9-latest \
19+
-t fms-hf-tuning:release-ubi9-latest \
20+
-f build/Dockerfile .
21+
- name: Login to Quay.io
22+
if: github.event_name == 'push'
23+
uses: docker/login-action@v3
24+
with:
25+
registry: quay.io
26+
username: ${{ secrets.QUAY_USERNAME }}
27+
password: ${{ secrets.QUAY_ROBOT_TOKEN }}
28+
- name: Push docker image to Quay.io
29+
if: github.event_name == 'push'
30+
run: |
31+
docker push ${{ env.QUAY_REPOSITORY }}/fms-hf-tuning:ubi9-latest
32+
docker push ${{ env.QUAY_REPOSITORY }}/fms-hf-tuning:release-ubi9-latest
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
name: dev-image
2+
on:
3+
push:
4+
tags:
5+
- 'v*.*.*'
6+
release:
7+
types: [published]
8+
9+
jobs:
10+
build:
11+
runs-on: ubuntu-latest
12+
steps:
13+
- uses: actions/checkout@v4
14+
- name: "Free up disk space"
15+
uses: ./.github/actions/free-up-disk-space
16+
- name: Determine image tag
17+
id: tag
18+
run: |
19+
if [ "${{ github.event_name }}" = "release" ]; then
20+
TAG="${{ github.event.release.tag_name }}"
21+
elif [ "${{ github.ref_type }}" = "tag" ]; then
22+
TAG="${GITHUB_REF_NAME}"
23+
else
24+
TAG="dev"
25+
fi
26+
echo "IMAGE_TAG=$TAG" >> $GITHUB_ENV
27+
- name: Build image
28+
run: |
29+
docker build \
30+
-t fms-hf-tuning:${IMAGE_TAG}-nvcr \
31+
-t fms-hf-tuning:staging-nvcr-latest \
32+
-f build/nvcr.Dockerfile .
33+
- name: Login to Quay.io
34+
uses: docker/login-action@v3
35+
with:
36+
registry: quay.io
37+
username: ${{ secrets.QUAY_USERNAME }}
38+
password: ${{ secrets.QUAY_ROBOT_TOKEN }}
39+
- name: Push docker image to Quay.io
40+
run: |
41+
docker push ${{ env.QUAY_REPOSITORY }}/fms-hf-tuning:staging-nvcr-latest
42+
docker push ${{ env.QUAY_REPOSITORY }}/fms-hf-tuning:${IMAGE_TAG}-nvcr

build/nvcr.Dockerfile

Lines changed: 33 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -20,36 +20,36 @@ ARG NVCR_IMAGE_VERSION=25.02-py3
2020
# This is based on what is inside the NVCR image already
2121
ARG PYTHON_VERSION=3.12
2222

23-
## Base Layer ##################################################################
24-
FROM nvcr.io/nvidia/pytorch:${NVCR_IMAGE_VERSION} AS dev
23+
######################## BUILDER ########################
24+
FROM nvcr.io/nvidia/pytorch:${NVCR_IMAGE_VERSION} AS builder
2525

2626
ARG USER=root
2727
ARG USER_UID=0
2828
ARG WORKDIR=/app
2929
ARG SOURCE_DIR=${WORKDIR}/fms-hf-tuning
3030

3131
ARG ENABLE_FMS_ACCELERATION=true
32-
ARG ENABLE_AIM=true
33-
ARG ENABLE_MLFLOW=true
34-
ARG ENABLE_SCANNER=true
32+
ARG ENABLE_AIM=false
33+
ARG ENABLE_MLFLOW=false
34+
ARG ENABLE_SCANNER=false
3535
ARG ENABLE_CLEARML=true
3636
ARG ENABLE_TRITON_KERNELS=true
37-
ARG ENABLE_MAMBA_SUPPORT=true
3837

3938
# Ensures to always build mamba_ssm from source
4039
ENV PIP_NO_BINARY=mamba-ssm,mamba_ssm
4140

42-
RUN python -m pip install --upgrade pip
43-
4441
# upgrade torch as the base layer contains only torch 2.7
45-
RUN pip install --upgrade --force-reinstall torch torchaudio torchvision --index-url https://download.pytorch.org/whl/cu128
42+
RUN python -m pip install --upgrade pip && \
43+
pip install --upgrade setuptools && \
44+
pip install --upgrade --force-reinstall torch torchaudio torchvision --index-url https://download.pytorch.org/whl/cu128
4645

4746
# Install main package + flash attention
4847
COPY . ${SOURCE_DIR}
4948
RUN cd ${SOURCE_DIR}
5049

51-
RUN pip install --no-cache-dir ${SOURCE_DIR}
52-
RUN pip install --user --no-build-isolation ${SOURCE_DIR}[flash-attn]
50+
RUN pip install --no-cache-dir ${SOURCE_DIR} && \
51+
pip install --user --no-build-isolation ${SOURCE_DIR}[flash-attn] && \
52+
pip install --no-cache-dir --no-build-isolation ${SOURCE_DIR}[mamba]
5353

5454
# Optional extras
5555
RUN if [[ "${ENABLE_FMS_ACCELERATION}" == "true" ]]; then \
@@ -61,6 +61,12 @@ RUN if [[ "${ENABLE_FMS_ACCELERATION}" == "true" ]]; then \
6161
python -m fms_acceleration.cli install fms_acceleration_odm; \
6262
fi
6363

64+
RUN if [[ "${ENABLE_TRITON_KERNELS}" == "true" ]]; then \
65+
pip install --no-cache-dir "git+https://github.com/triton-lang/triton.git@main#subdirectory=python/triton_kernels"; \
66+
fi
67+
RUN if [[ "${ENABLE_CLEARML}" == "true" ]]; then \
68+
pip install --no-cache-dir ${SOURCE_DIR}[clearml]; \
69+
fi
6470
RUN if [[ "${ENABLE_AIM}" == "true" ]]; then \
6571
pip install --no-cache-dir ${SOURCE_DIR}[aim]; \
6672
fi
@@ -70,15 +76,22 @@ RUN if [[ "${ENABLE_MLFLOW}" == "true" ]]; then \
7076
RUN if [[ "${ENABLE_SCANNER}" == "true" ]]; then \
7177
pip install --no-cache-dir ${SOURCE_DIR}[scanner-dev]; \
7278
fi
73-
RUN if [[ "${ENABLE_CLEARML}" == "true" ]]; then \
74-
pip install --no-cache-dir ${SOURCE_DIR}[clearml]; \
75-
fi
76-
RUN if [[ "${ENABLE_MAMBA_SUPPORT}" == "true" ]]; then \
77-
pip install --no-cache-dir ${SOURCE_DIR}[mamba]; \
78-
fi
79-
RUN if [[ "${ENABLE_TRITON_KERNELS}" == "true" ]]; then \
80-
pip install --no-cache-dir "git+https://github.com/triton-lang/triton.git@main#subdirectory=python/triton_kernels"; \
81-
fi
79+
80+
# cleanup
81+
RUN rm -rf /root/.cache /tmp/* /opt/pytorch
82+
83+
######################## RUNTIME ########################
84+
FROM nvcr.io/nvidia/pytorch:${NVCR_IMAGE_VERSION}
85+
86+
WORKDIR ${WORKDIR}
87+
88+
# Copy only Python site-packages + app
89+
COPY --from=builder /usr/local/lib/python3.12/dist-packages \
90+
/usr/local/lib/python3.12/dist-packages
91+
COPY --from=builder ${SOURCE_DIR} ${SOURCE_DIR}
92+
93+
# Runtime cleanup
94+
RUN rm -rf /opt/pytorch /root/.cache /tmp/*
8295

8396
RUN chmod -R g+rwX $WORKDIR /tmp
8497
RUN mkdir -p /.cache && chmod -R 777 /.cache

0 commit comments

Comments
 (0)