Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 72 additions & 43 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -201,31 +201,38 @@ ARG GITHUB_ARTIFACTORY
ARG KTRANSFORMERS_VERSION
ARG KTRANSFORMERS_WHEEL
ARG FLASH_ATTN_WHEEL
ARG FUNCTIONALITY=sft

WORKDIR /workspace

# Create two conda environments with Python 3.12
# Create conda environments (fine-tune only needed for sft mode)
RUN conda create -n serve python=3.12 -y \
&& conda create -n fine-tune python=3.12 -y
&& if [ "$FUNCTIONALITY" = "sft" ]; then conda create -n fine-tune python=3.12 -y; fi

# Set pip mirror for both conda envs
# Set pip mirror for conda envs
RUN /opt/miniconda3/envs/serve/bin/pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple \
&& /opt/miniconda3/envs/fine-tune/bin/pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
&& if [ "$FUNCTIONALITY" = "sft" ]; then \
/opt/miniconda3/envs/fine-tune/bin/pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple; \
fi

# Clone repositories
# Use kvcache-ai/sglang fork with kimi_k2 branch
RUN git clone https://${GITHUB_ARTIFACTORY}/kvcache-ai/sglang.git /workspace/sglang \
&& cd /workspace/sglang && git checkout kimi_k2

RUN git clone --depth 1 https://${GITHUB_ARTIFACTORY}/hiyouga/LLaMA-Factory.git /workspace/LLaMA-Factory \
&& git clone --depth 1 https://${GITHUB_ARTIFACTORY}/kvcache-ai/ktransformers.git /workspace/ktransformers \
&& cd /workspace/ktransformers && git submodule update --init --recursive
RUN git clone --depth 1 https://${GITHUB_ARTIFACTORY}/kvcache-ai/ktransformers.git /workspace/ktransformers \
&& cd /workspace/ktransformers && git submodule update --init --recursive \
&& if [ "$FUNCTIONALITY" = "sft" ]; then \
git clone --depth 1 https://${GITHUB_ARTIFACTORY}/hiyouga/LLaMA-Factory.git /workspace/LLaMA-Factory; \
fi

# Download ktransformers wheel and flash_attn wheel for fine-tune env
RUN curl --retry 3 --retry-delay 2 -fsSL -o /workspace/${KTRANSFORMERS_WHEEL} \
https://${GITHUB_ARTIFACTORY}/kvcache-ai/ktransformers/releases/download/v${KTRANSFORMERS_VERSION}/${KTRANSFORMERS_WHEEL} \
&& curl --retry 3 --retry-delay 2 -fsSL -o /workspace/${FLASH_ATTN_WHEEL} \
https://${GITHUB_ARTIFACTORY}/Dao-AILab/flash-attention/releases/download/v2.8.3/${FLASH_ATTN_WHEEL}
# Download ktransformers wheel and flash_attn wheel for fine-tune env (sft mode only)
RUN if [ "$FUNCTIONALITY" = "sft" ]; then \
curl --retry 3 --retry-delay 2 -fsSL -o /workspace/${KTRANSFORMERS_WHEEL} \
https://${GITHUB_ARTIFACTORY}/kvcache-ai/ktransformers/releases/download/v${KTRANSFORMERS_VERSION}/${KTRANSFORMERS_WHEEL} \
&& curl --retry 3 --retry-delay 2 -fsSL -o /workspace/${FLASH_ATTN_WHEEL} \
https://${GITHUB_ARTIFACTORY}/Dao-AILab/flash-attention/releases/download/v2.8.3/${FLASH_ATTN_WHEEL}; \
fi

########################################################
# Environment 1: serve (sglang + kt-kernel)
Expand Down Expand Up @@ -318,61 +325,78 @@ RUN . /opt/miniconda3/etc/profile.d/conda.sh && conda activate serve \
&& CPUINFER_BUILD_ALL_VARIANTS=1 ./install.sh build

########################################################
# Environment 2: fine-tune (LLaMA-Factory + ktransformers)
# Environment 2: fine-tune (LLaMA-Factory + ktransformers) - sft mode only
########################################################

# Install dependency libraries for ktransformers (CUDA 11.8 runtime required)
RUN conda install -n fine-tune -y -c conda-forge libstdcxx-ng gcc_impl_linux-64 \
&& conda install -n fine-tune -y -c nvidia/label/cuda-11.8.0 cuda-runtime
RUN if [ "$FUNCTIONALITY" = "sft" ]; then \
conda install -n fine-tune -y -c conda-forge libstdcxx-ng gcc_impl_linux-64 \
&& conda install -n fine-tune -y -c nvidia/label/cuda-11.8.0 cuda-runtime; \
fi

# Install PyTorch 2.8 in fine-tune env
RUN --mount=type=cache,target=/root/.cache/pip \
case "$CUDA_VERSION" in \
12.6.1) CUINDEX=126 ;; \
12.8.1) CUINDEX=128 ;; \
12.9.1) CUINDEX=129 ;; \
13.0.1) CUINDEX=130 ;; \
esac \
&& /opt/miniconda3/envs/fine-tune/bin/pip install --upgrade pip setuptools wheel \
&& /opt/miniconda3/envs/fine-tune/bin/pip install \
torch==2.8.0 \
torchvision \
torchaudio \
--extra-index-url https://download.pytorch.org/whl/cu${CUINDEX}
if [ "$FUNCTIONALITY" = "sft" ]; then \
case "$CUDA_VERSION" in \
12.6.1) CUINDEX=126 ;; \
12.8.1) CUINDEX=128 ;; \
12.9.1) CUINDEX=129 ;; \
13.0.1) CUINDEX=130 ;; \
esac \
&& /opt/miniconda3/envs/fine-tune/bin/pip install --upgrade pip setuptools wheel hatchling \
&& /opt/miniconda3/envs/fine-tune/bin/pip install \
torch==2.8.0 \
torchvision \
torchaudio \
--extra-index-url https://download.pytorch.org/whl/cu${CUINDEX}; \
fi

# Install LLaMA-Factory in fine-tune env
RUN --mount=type=cache,target=/root/.cache/pip \
cd /workspace/LLaMA-Factory \
&& /opt/miniconda3/envs/fine-tune/bin/pip install -e ".[torch,metrics]" --no-build-isolation
if [ "$FUNCTIONALITY" = "sft" ]; then \
cd /workspace/LLaMA-Factory \
&& /opt/miniconda3/envs/fine-tune/bin/pip install -e ".[torch,metrics]" --no-build-isolation; \
fi

# Install ktransformers wheel in fine-tune env
RUN --mount=type=cache,target=/root/.cache/pip \
/opt/miniconda3/envs/fine-tune/bin/pip install /workspace/${KTRANSFORMERS_WHEEL}
if [ "$FUNCTIONALITY" = "sft" ]; then \
/opt/miniconda3/envs/fine-tune/bin/pip install /workspace/${KTRANSFORMERS_WHEEL}; \
fi

# Install flash_attn wheel in fine-tune env
RUN --mount=type=cache,target=/root/.cache/pip \
/opt/miniconda3/envs/fine-tune/bin/pip install /workspace/${FLASH_ATTN_WHEEL}
if [ "$FUNCTIONALITY" = "sft" ]; then \
/opt/miniconda3/envs/fine-tune/bin/pip install /workspace/${FLASH_ATTN_WHEEL}; \
fi

# Install NCCL for fine-tune env
RUN --mount=type=cache,target=/root/.cache/pip \
if [ "${CUDA_VERSION%%.*}" = "12" ]; then \
/opt/miniconda3/envs/fine-tune/bin/pip install nvidia-nccl-cu12==2.28.3 --force-reinstall --no-deps ; \
elif [ "${CUDA_VERSION%%.*}" = "13" ]; then \
/opt/miniconda3/envs/fine-tune/bin/pip install nvidia-nccl-cu13==2.28.3 --force-reinstall --no-deps ; \
if [ "$FUNCTIONALITY" = "sft" ]; then \
if [ "${CUDA_VERSION%%.*}" = "12" ]; then \
/opt/miniconda3/envs/fine-tune/bin/pip install nvidia-nccl-cu12==2.28.3 --force-reinstall --no-deps ; \
elif [ "${CUDA_VERSION%%.*}" = "13" ]; then \
/opt/miniconda3/envs/fine-tune/bin/pip install nvidia-nccl-cu13==2.28.3 --force-reinstall --no-deps ; \
fi; \
fi

########################################################
# Cleanup and final setup
########################################################

# Clean up downloaded wheels
RUN rm -f /workspace/${KTRANSFORMERS_WHEEL} /workspace/${FLASH_ATTN_WHEEL}
RUN if [ "$FUNCTIONALITY" = "sft" ]; then \
rm -f /workspace/${KTRANSFORMERS_WHEEL} /workspace/${FLASH_ATTN_WHEEL}; \
fi

# Initialize conda for bash
RUN /opt/miniconda3/bin/conda init bash

# Create shell aliases for convenience
RUN echo '\n# Conda environment aliases\nalias serve="conda activate serve"\nalias finetune="conda activate fine-tune"' >> /root/.bashrc
RUN echo '\n# Conda environment aliases\nalias serve="conda activate serve"' >> /root/.bashrc \
&& if [ "$FUNCTIONALITY" = "sft" ]; then \
echo 'alias finetune="conda activate fine-tune"' >> /root/.bashrc; \
fi

########################################################
# Extract version information for image naming
Expand All @@ -392,12 +416,17 @@ RUN set -x && \
echo "KTRANSFORMERS_VERSION=$KTRANSFORMERS_VERSION" >> /workspace/versions.env && \
echo "Extracted KTransformers version: $KTRANSFORMERS_VERSION" && \
\
# LLaMA-Factory version (from fine-tune environment)
. /opt/miniconda3/etc/profile.d/conda.sh && conda activate fine-tune && \
cd /workspace/LLaMA-Factory && \
LLAMAFACTORY_VERSION=$(python -c "import sys; sys.path.insert(0, 'src'); from llamafactory import __version__; print(__version__)" 2>/dev/null || echo "unknown") && \
echo "LLAMAFACTORY_VERSION=$LLAMAFACTORY_VERSION" >> /workspace/versions.env && \
echo "Extracted LLaMA-Factory version: $LLAMAFACTORY_VERSION" && \
# LLaMA-Factory version (from fine-tune environment, sft mode only)
if [ "$FUNCTIONALITY" = "sft" ]; then \
. /opt/miniconda3/etc/profile.d/conda.sh && conda activate fine-tune && \
cd /workspace/LLaMA-Factory && \
LLAMAFACTORY_VERSION=$(python -c "import sys; sys.path.insert(0, 'src'); from llamafactory import __version__; print(__version__)" 2>/dev/null || echo "unknown") && \
echo "LLAMAFACTORY_VERSION=$LLAMAFACTORY_VERSION" >> /workspace/versions.env && \
echo "Extracted LLaMA-Factory version: $LLAMAFACTORY_VERSION"; \
else \
echo "LLAMAFACTORY_VERSION=none" >> /workspace/versions.env && \
echo "LLaMA-Factory not installed (infer mode)"; \
fi && \
\
# Display all versions
echo "=== Version Summary ===" && \
Expand Down
1 change: 1 addition & 0 deletions docker/build-docker-tar.sh
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,7 @@ build_image() {
build_args+=("--build-arg" "UBUNTU_MIRROR=$UBUNTU_MIRROR")
build_args+=("--build-arg" "CPU_VARIANT=$CPU_VARIANT")
build_args+=("--build-arg" "BUILD_ALL_CPU_VARIANTS=1")
build_args+=("--build-arg" "FUNCTIONALITY=$FUNCTIONALITY")

# Add proxy settings if provided
if [ -n "$HTTP_PROXY" ]; then
Expand Down
2 changes: 1 addition & 1 deletion docker/docker-utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ generate_image_name() {
llama_ver=$(echo "$versions" | grep "^LLAMAFACTORY_VERSION=" | cut -d= -f2)

# Validate versions were extracted
if [ -z "$sglang_ver" ] || [ -z "$ktrans_ver" ] || [ -z "$llama_ver" ]; then
if [ -z "$sglang_ver" ] || [ -z "$ktrans_ver" ]; then
log_error "Failed to parse versions from input"
return 1
fi
Expand Down
2 changes: 2 additions & 0 deletions docker/push-to-dockerhub.sh
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,7 @@ build_image() {
build_args+=("--build-arg" "UBUNTU_MIRROR=$UBUNTU_MIRROR")
build_args+=("--build-arg" "CPU_VARIANT=$CPU_VARIANT")
build_args+=("--build-arg" "BUILD_ALL_CPU_VARIANTS=1")
build_args+=("--build-arg" "FUNCTIONALITY=$FUNCTIONALITY")

# Add proxy settings if provided
if [ -n "$HTTP_PROXY" ]; then
Expand Down Expand Up @@ -884,6 +885,7 @@ build_image() {
build_args+=("--build-arg" "UBUNTU_MIRROR=$UBUNTU_MIRROR")
build_args+=("--build-arg" "CPU_VARIANT=$CPU_VARIANT")
build_args+=("--build-arg" "BUILD_ALL_CPU_VARIANTS=1")
build_args+=("--build-arg" "FUNCTIONALITY=$FUNCTIONALITY")

# Add proxy settings if provided
if [ -n "$HTTP_PROXY" ]; then
Expand Down