@@ -201,31 +201,38 @@ ARG GITHUB_ARTIFACTORY
201201ARG KTRANSFORMERS_VERSION
202202ARG KTRANSFORMERS_WHEEL
203203ARG FLASH_ATTN_WHEEL
204+ ARG FUNCTIONALITY=sft
204205
205206WORKDIR /workspace
206207
207- # Create two conda environments with Python 3.12
208+ # Create conda environments (fine-tune only needed for sft mode)
208209RUN conda create -n serve python=3.12 -y \
209- && conda create -n fine-tune python=3.12 -y
210+ && if [ "$FUNCTIONALITY" = "sft" ]; then conda create -n fine-tune python=3.12 -y; fi
210211
211- # Set pip mirror for both conda envs
212+ # Set pip mirror for conda envs
212213RUN /opt/miniconda3/envs/serve/bin/pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple \
213- && /opt/miniconda3/envs/fine-tune/bin/pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
214+ && if [ "$FUNCTIONALITY" = "sft" ]; then \
215+ /opt/miniconda3/envs/fine-tune/bin/pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple; \
216+ fi
214217
215218# Clone repositories
216219# Use kvcache-ai/sglang fork with kimi_k2 branch
217220RUN git clone https://${GITHUB_ARTIFACTORY}/kvcache-ai/sglang.git /workspace/sglang \
218221 && cd /workspace/sglang && git checkout kimi_k2
219222
220- RUN git clone --depth 1 https://${GITHUB_ARTIFACTORY}/hiyouga/LLaMA-Factory.git /workspace/LLaMA-Factory \
221- && git clone --depth 1 https://${GITHUB_ARTIFACTORY}/kvcache-ai/ktransformers.git /workspace/ktransformers \
222- && cd /workspace/ktransformers && git submodule update --init --recursive
223+ RUN git clone --depth 1 https://${GITHUB_ARTIFACTORY}/kvcache-ai/ktransformers.git /workspace/ktransformers \
224+ && cd /workspace/ktransformers && git submodule update --init --recursive \
225+ && if [ "$FUNCTIONALITY" = "sft" ]; then \
226+ git clone --depth 1 https://${GITHUB_ARTIFACTORY}/hiyouga/LLaMA-Factory.git /workspace/LLaMA-Factory; \
227+ fi
223228
224- # Download ktransformers wheel and flash_attn wheel for fine-tune env
225- RUN curl --retry 3 --retry-delay 2 -fsSL -o /workspace/${KTRANSFORMERS_WHEEL} \
226- https://${GITHUB_ARTIFACTORY}/kvcache-ai/ktransformers/releases/download/v${KTRANSFORMERS_VERSION}/${KTRANSFORMERS_WHEEL} \
227- && curl --retry 3 --retry-delay 2 -fsSL -o /workspace/${FLASH_ATTN_WHEEL} \
228- https://${GITHUB_ARTIFACTORY}/Dao-AILab/flash-attention/releases/download/v2.8.3/${FLASH_ATTN_WHEEL}
229+ # Download ktransformers wheel and flash_attn wheel for fine-tune env (sft mode only)
230+ RUN if [ "$FUNCTIONALITY" = "sft" ]; then \
231+ curl --retry 3 --retry-delay 2 -fsSL -o /workspace/${KTRANSFORMERS_WHEEL} \
232+ https://${GITHUB_ARTIFACTORY}/kvcache-ai/ktransformers/releases/download/v${KTRANSFORMERS_VERSION}/${KTRANSFORMERS_WHEEL} \
233+ && curl --retry 3 --retry-delay 2 -fsSL -o /workspace/${FLASH_ATTN_WHEEL} \
234+ https://${GITHUB_ARTIFACTORY}/Dao-AILab/flash-attention/releases/download/v2.8.3/${FLASH_ATTN_WHEEL}; \
235+ fi
229236
230237# #######################################################
231238# Environment 1: serve (sglang + kt-kernel)
@@ -318,61 +325,78 @@ RUN . /opt/miniconda3/etc/profile.d/conda.sh && conda activate serve \
318325 && CPUINFER_BUILD_ALL_VARIANTS=1 ./install.sh build
319326
320327# #######################################################
321- # Environment 2: fine-tune (LLaMA-Factory + ktransformers)
328+ # Environment 2: fine-tune (LLaMA-Factory + ktransformers) - sft mode only
322329# #######################################################
323330
324331# Install dependency libraries for ktransformers (CUDA 11.8 runtime required)
325- RUN conda install -n fine-tune -y -c conda-forge libstdcxx-ng gcc_impl_linux-64 \
326- && conda install -n fine-tune -y -c nvidia/label/cuda-11.8.0 cuda-runtime
332+ RUN if [ "$FUNCTIONALITY" = "sft" ]; then \
333+ conda install -n fine-tune -y -c conda-forge libstdcxx-ng gcc_impl_linux-64 \
334+ && conda install -n fine-tune -y -c nvidia/label/cuda-11.8.0 cuda-runtime; \
335+ fi
327336
328337# Install PyTorch 2.8 in fine-tune env
329338RUN --mount=type=cache,target=/root/.cache/pip \
330- case "$CUDA_VERSION" in \
331- 12.6.1) CUINDEX=126 ;; \
332- 12.8.1) CUINDEX=128 ;; \
333- 12.9.1) CUINDEX=129 ;; \
334- 13.0.1) CUINDEX=130 ;; \
335- esac \
336- && /opt/miniconda3/envs/fine-tune/bin/pip install --upgrade pip setuptools wheel \
337- && /opt/miniconda3/envs/fine-tune/bin/pip install \
338- torch==2.8.0 \
339- torchvision \
340- torchaudio \
341- --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX}
339+ if [ "$FUNCTIONALITY" = "sft" ]; then \
340+ case "$CUDA_VERSION" in \
341+ 12.6.1) CUINDEX=126 ;; \
342+ 12.8.1) CUINDEX=128 ;; \
343+ 12.9.1) CUINDEX=129 ;; \
344+ 13.0.1) CUINDEX=130 ;; \
345+ esac \
346+ && /opt/miniconda3/envs/fine-tune/bin/pip install --upgrade pip setuptools wheel hatchling \
347+ && /opt/miniconda3/envs/fine-tune/bin/pip install \
348+ torch==2.8.0 \
349+ torchvision \
350+ torchaudio \
351+ --extra-index-url https://download.pytorch.org/whl/cu${CUINDEX}; \
352+ fi
342353
343354# Install LLaMA-Factory in fine-tune env
344355RUN --mount=type=cache,target=/root/.cache/pip \
345- cd /workspace/LLaMA-Factory \
346- && /opt/miniconda3/envs/fine-tune/bin/pip install -e ".[torch,metrics]" --no-build-isolation
356+ if [ "$FUNCTIONALITY" = "sft" ]; then \
357+ cd /workspace/LLaMA-Factory \
358+ && /opt/miniconda3/envs/fine-tune/bin/pip install -e ".[torch,metrics]" --no-build-isolation; \
359+ fi
347360
348361# Install ktransformers wheel in fine-tune env
349362RUN --mount=type=cache,target=/root/.cache/pip \
350- /opt/miniconda3/envs/fine-tune/bin/pip install /workspace/${KTRANSFORMERS_WHEEL}
363+ if [ "$FUNCTIONALITY" = "sft" ]; then \
364+ /opt/miniconda3/envs/fine-tune/bin/pip install /workspace/${KTRANSFORMERS_WHEEL}; \
365+ fi
351366
352367# Install flash_attn wheel in fine-tune env
353368RUN --mount=type=cache,target=/root/.cache/pip \
354- /opt/miniconda3/envs/fine-tune/bin/pip install /workspace/${FLASH_ATTN_WHEEL}
369+ if [ "$FUNCTIONALITY" = "sft" ]; then \
370+ /opt/miniconda3/envs/fine-tune/bin/pip install /workspace/${FLASH_ATTN_WHEEL}; \
371+ fi
355372
356373# Install NCCL for fine-tune env
357374RUN --mount=type=cache,target=/root/.cache/pip \
358- if [ "${CUDA_VERSION%%.*}" = "12" ]; then \
359- /opt/miniconda3/envs/fine-tune/bin/pip install nvidia-nccl-cu12==2.28.3 --force-reinstall --no-deps ; \
360- elif [ "${CUDA_VERSION%%.*}" = "13" ]; then \
361- /opt/miniconda3/envs/fine-tune/bin/pip install nvidia-nccl-cu13==2.28.3 --force-reinstall --no-deps ; \
375+ if [ "$FUNCTIONALITY" = "sft" ]; then \
376+ if [ "${CUDA_VERSION%%.*}" = "12" ]; then \
377+ /opt/miniconda3/envs/fine-tune/bin/pip install nvidia-nccl-cu12==2.28.3 --force-reinstall --no-deps ; \
378+ elif [ "${CUDA_VERSION%%.*}" = "13" ]; then \
379+ /opt/miniconda3/envs/fine-tune/bin/pip install nvidia-nccl-cu13==2.28.3 --force-reinstall --no-deps ; \
380+ fi; \
362381 fi
363382
364383# #######################################################
365384# Cleanup and final setup
366385# #######################################################
367386
368387# Clean up downloaded wheels
369- RUN rm -f /workspace/${KTRANSFORMERS_WHEEL} /workspace/${FLASH_ATTN_WHEEL}
388+ RUN if [ "$FUNCTIONALITY" = "sft" ]; then \
389+ rm -f /workspace/${KTRANSFORMERS_WHEEL} /workspace/${FLASH_ATTN_WHEEL}; \
390+ fi
370391
371392# Initialize conda for bash
372393RUN /opt/miniconda3/bin/conda init bash
373394
374395# Create shell aliases for convenience
375- RUN echo '\n # Conda environment aliases\n alias serve="conda activate serve"\n alias finetune="conda activate fine-tune"' >> /root/.bashrc
396+ RUN echo '\n # Conda environment aliases\n alias serve="conda activate serve"' >> /root/.bashrc \
397+ && if [ "$FUNCTIONALITY" = "sft" ]; then \
398+ echo 'alias finetune="conda activate fine-tune"' >> /root/.bashrc; \
399+ fi
376400
377401# #######################################################
378402# Extract version information for image naming
@@ -392,12 +416,17 @@ RUN set -x && \
392416 echo "KTRANSFORMERS_VERSION=$KTRANSFORMERS_VERSION" >> /workspace/versions.env && \
393417 echo "Extracted KTransformers version: $KTRANSFORMERS_VERSION" && \
394418 \
395- # LLaMA-Factory version (from fine-tune environment)
396- . /opt/miniconda3/etc/profile.d/conda.sh && conda activate fine-tune && \
397- cd /workspace/LLaMA-Factory && \
398- LLAMAFACTORY_VERSION=$(python -c "import sys; sys.path.insert(0, 'src'); from llamafactory import __version__; print(__version__)" 2>/dev/null || echo "unknown" ) && \
399- echo "LLAMAFACTORY_VERSION=$LLAMAFACTORY_VERSION" >> /workspace/versions.env && \
400- echo "Extracted LLaMA-Factory version: $LLAMAFACTORY_VERSION" && \
419+ # LLaMA-Factory version (from fine-tune environment, sft mode only)
420+ if [ "$FUNCTIONALITY" = "sft" ]; then \
421+ . /opt/miniconda3/etc/profile.d/conda.sh && conda activate fine-tune && \
422+ cd /workspace/LLaMA-Factory && \
423+ LLAMAFACTORY_VERSION=$(python -c "import sys; sys.path.insert(0, 'src'); from llamafactory import __version__; print(__version__)" 2>/dev/null || echo "unknown" ) && \
424+ echo "LLAMAFACTORY_VERSION=$LLAMAFACTORY_VERSION" >> /workspace/versions.env && \
425+ echo "Extracted LLaMA-Factory version: $LLAMAFACTORY_VERSION" ; \
426+ else \
427+ echo "LLAMAFACTORY_VERSION=none" >> /workspace/versions.env && \
428+ echo "LLaMA-Factory not installed (infer mode)" ; \
429+ fi && \
401430 \
402431 # Display all versions
403432 echo "=== Version Summary ===" && \
0 commit comments