Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 64 additions & 11 deletions sd-finetuner/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,20 +1,73 @@
FROM gooseai/torch-base:6cfdc11
# syntax=docker/dockerfile:1.4

ARG BASE_IMAGE=ghcr.io/coreweave/ml-containers/torch:es-fa3-te-update-1b2ab29-nccl-cuda12.8.1-ubuntu22.04-nccl2.27.5-1-torch2.7.1-vision0.22.1-audio2.7.1-abi1
# Dependencies requiring NVCC are built ahead of time in a separate stage
# so that the ~2 GiB dev library installations don't have to be included
# in the final finetuner image.
# gcc-10/g++-10/lld do not need to be installed here, but they improve the build.
# gfortran-10 is just for compiler_wrapper.f95.
FROM ${BASE_IMAGE} as builder

# Install build dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
cuda-nvcc-12-8 \
cuda-nvml-dev-12-8 \
libcurand-dev-12-8 \
libcublas-dev-12-8 \
libcusparse-dev-12-8 \
libcusolver-dev-12-8 \
cuda-profiler-api-12-8 \
ninja-build \
gcc g++ gfortran && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

# Create and pre-compile wheels for DeepSpeed
RUN mkdir /wheels
WORKDIR /wheels
COPY requirements-precompilable.txt .
RUN python3 -m pip install -U --no-cache-dir wheel && \
MAX_JOBS=16 DS_BUILD_UTILS=1 DS_BUILD_CPU_ADAM=1 \
TORCH_CUDA_ARCH_LIST="8.0 9.0+PTX" \
python3 -m pip wheel \
--no-cache-dir --no-build-isolation --no-deps \
-r requirements-precompilable.txt

# Final image stage
FROM ${BASE_IMAGE}

# Update package lists and install dependencies
RUN apt-get update && \
apt-get install -y --no-install-recommends \
wget \
libglib2.0-0

RUN apt-get install -y cuda-nvcc-11-3 cuda-nvml-dev-11-3 libcurand-dev-11-3 \
libcublas-dev-11-3 libcusparse-dev-11-3 \
libcusolver-dev-11-3 cuda-nvprof-11-3 \
ninja-build git && \
apt-get clean

RUN mkdir /app
WORKDIR /app

ARG COMMIT=master
RUN git clone https://github.com/coreweave/kubernetes-cloud.git && \
cd kubernetes-cloud && \
RUN git clone git@github.com:coreweave/finetuner && \
cd finetuner && \
git checkout ${COMMIT} && \
cd ..
RUN cp kubernetes-cloud/sd-finetuner-workflow/sd-finetuner/* .
cd .. && \
cp finetuner/* . && \
rm -rf finetuner \

# Install the pre-compiled DeepSpeed wheel
RUN --mount=type=bind,from=builder,source=/wheels,target=/wheels \
pip3 install --no-cache-dir /wheels/*.whl

# Install remaining Python requirements
COPY requirements.txt .
COPY requirements-precompilable.txt .
RUN pip3 install --no-cache-dir -r requirements.txt

CMD [ "/usr/bin/python3", "finetuner.py" ]
# Copy application code
COPY ds_config.json .
COPY finetuner.py .
COPY evaluator.py .
COPY inference.py .
COPY utils.py .

CMD [ "/usr/bin/python3", "finetuner.py" ]
Loading