axolotl-ai-cloud · winglian · Jan 22, 2026 · Jan 6, 2026 · Jan 21, 2026 · Jan 21, 2026
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -38,7 +38,7 @@ jobs:
             cuda_version: 12.9.1
             python_version: "3.12"
             pytorch: 2.9.1
-            axolotl_extras:
+            axolotl_extras: vllm
             platforms: "linux/amd64,linux/arm64"
           - cuda: 130
             cuda_version: 13.0.0

diff --git a/.github/workflows/multi-gpu-e2e.yml b/.github/workflows/multi-gpu-e2e.yml
@@ -35,22 +35,26 @@ jobs:
             pytorch: 2.8.0
             axolotl_extras: fbgemm-gpu
             num_gpus: 2
-            nightly_build: "true"
           - cuda: 128
             cuda_version: 12.8.1
             python_version: "3.11"
             pytorch: 2.9.1
-            axolotl_extras: fbgemm-gpu
+            axolotl_extras: "fbgemm-gpu"
+            num_gpus: 2
+          - cuda: 129
+            cuda_version: 12.9.1
+            python_version: "3.12"
+            pytorch: 2.9.1
+            axolotl_extras: "fbgemm-gpu,vllm"
             num_gpus: 2
-            nightly_build: "true"
+            dockerfile: "Dockerfile-uv.jinja"
           - cuda: 130
             cuda_version: 13.0.0
             python_version: "3.11"
             pytorch: 2.9.1
             axolotl_extras:
 #            axolotl_extras: fbgemm-gpu
             num_gpus: 2
-            nightly_build: "true"
     runs-on: [self-hosted, modal]
     timeout-minutes: 120
     steps:
@@ -72,8 +76,8 @@ jobs:
           echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV
           echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV
           echo "N_GPUS=${{ matrix.num_gpus }}" >> $GITHUB_ENV
-          echo "NIGHTLY_BUILD=${{ matrix.nightly_build }}" >> $GITHUB_ENV
           echo "CODECOV_TOKEN=${{ secrets.CODECOV_TOKEN }}" >> $GITHUB_ENV
+          echo "E2E_DOCKERFILE=${{ matrix.dockerfile || 'Dockerfile.jinja'}}" >> $GITHUB_ENV
       - name: Run tests job on Modal
         run: |
           modal run -m cicd.multigpu
diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml
@@ -40,7 +40,7 @@ jobs:
 
       - name: Install dependencies
         run: |
-          pip3 install wheel packaging==23.2
+          pip3 install wheel packaging==26.0
           pip3 install --no-build-isolation -e .
           pip3 install -r requirements-dev.txt -r requirements-tests.txt
 

diff --git a/.github/workflows/tests-nightly.yml b/.github/workflows/tests-nightly.yml
@@ -48,7 +48,7 @@ jobs:
       - name: upgrade pip
         run: |
           pip3 install --upgrade pip
-          pip3 install --upgrade packaging==23.2 setuptools==75.8.0 wheel
+          pip3 install --upgrade packaging==26.0 setuptools==75.8.0 wheel
 
       - name: Install PyTorch
         run: |

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -87,7 +87,7 @@ jobs:
       - name: upgrade pip
         run: |
           pip3 install --upgrade pip
-          pip3 install --upgrade packaging==23.2 setuptools==75.8.0 wheel
+          pip3 install --upgrade packaging==26.0 setuptools==75.8.0 wheel
 
       - name: Install PyTorch
         run: |
@@ -182,7 +182,7 @@ jobs:
       - name: upgrade pip
         run: |
           pip3 install --upgrade pip
-          pip3 install --upgrade packaging==23.2 setuptools==75.8.0 setuptools_scm build wheel psutil
+          pip3 install --upgrade packaging==26.0 setuptools==75.8.0 setuptools_scm build wheel psutil
 
       - name: Install PyTorch
         run: |
@@ -269,7 +269,7 @@ jobs:
             python_version: "3.12"
             pytorch: 2.9.1
             num_gpus: 1
-            axolotl_extras:
+            axolotl_extras: vllm
             dockerfile: "Dockerfile-uv.jinja"
     steps:
       - name: Checkout

diff --git a/README.md b/README.md
@@ -88,7 +88,7 @@ Features:
 #### Using pip
 
 ```bash
-pip3 install -U packaging==23.2 setuptools==75.8.0 wheel ninja
+pip3 install -U packaging==26.0 setuptools==75.8.0 wheel ninja
 pip3 install --no-build-isolation axolotl[flash-attn,deepspeed]
 
 # Download example axolotl configs, deepspeed configs

diff --git a/cicd/Dockerfile-uv.jinja b/cicd/Dockerfile-uv.jinja
@@ -31,7 +31,7 @@ RUN if [ "$NIGHTLY_BUILD" = "true" ] ; then \
         sed -i 's#^datasets.*#datasets @ git+https://github.com/huggingface/datasets.git@main#' requirements.txt; \
     fi
 
-RUN uv pip install packaging==23.2 setuptools==75.8.0
+RUN uv pip install packaging==26.0 setuptools==75.8.0
 RUN uv pip install torchvision
 RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
         uv pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \

diff --git a/cicd/Dockerfile.jinja b/cicd/Dockerfile.jinja
@@ -32,7 +32,7 @@ RUN if [ "$NIGHTLY_BUILD" = "true" ] ; then \
         sed -i 's#^datasets.*#datasets @ git+https://github.com/huggingface/datasets.git@main#' requirements.txt; \
     fi
 
-RUN pip install packaging==23.2 setuptools==75.8.0 psutil
+RUN pip install packaging==26.0 setuptools==75.8.0 psutil
 RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
         pip install --no-build-isolation -e .[deepspeed,flash-attn,ring-flash-attn,optimizers,ray,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \
     else \

diff --git a/cicd/multigpu.py b/cicd/multigpu.py
@@ -17,7 +17,8 @@
 template_env = jinja2.Environment(
     loader=template_loader, autoescape=select_autoescape()
 )
-df_template = template_env.get_template("Dockerfile.jinja")
+dockerfile = os.environ.get("E2E_DOCKERFILE", "Dockerfile.jinja")
+df_template = template_env.get_template(dockerfile)
 
 df_args = {
     "AXOLOTL_EXTRAS": os.environ.get("AXOLOTL_EXTRAS", ""),
@@ -27,8 +28,11 @@
     "CUDA": os.environ.get("CUDA", "126"),
     "GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"),
     "GITHUB_SHA": os.environ.get("GITHUB_SHA", ""),
+    "NIGHTLY_BUILD": os.environ.get("NIGHTLY_BUILD", ""),
     "CODECOV_TOKEN": os.environ.get("CODECOV_TOKEN", ""),
     "HF_HOME": "/workspace/data/huggingface-cache/hub",
+    "PYTHONUNBUFFERED": os.environ.get("PYTHONUNBUFFERED", "1"),
+    "DEEPSPEED_LOG_LEVEL": os.environ.get("DEEPSPEED_LOG_LEVEL", "WARNING"),
 }
 
 dockerfile_contents = df_template.render(**df_args)

diff --git a/docker/Dockerfile-base b/docker/Dockerfile-base
@@ -43,7 +43,7 @@ ENV PATH="/root/miniconda3/envs/py${PYTHON_VERSION}/bin:${PATH}"
 
 WORKDIR /workspace
 
-RUN python3 -m pip install --upgrade pip && pip3 install -U packaging==23.2 setuptools==75.8.0 wheel psutil && \
+RUN python3 -m pip install --upgrade pip && pip3 install -U packaging==26.0 setuptools==75.8.0 wheel psutil && \
     python3 -m pip install --no-cache-dir -U torch==${PYTORCH_VERSION}+cu${CUDA} torchvision --extra-index-url https://download.pytorch.org/whl/cu$CUDA && \
     python3 -m pip cache purge
 

diff --git a/docker/Dockerfile-base-nightly b/docker/Dockerfile-base-nightly
@@ -30,7 +30,7 @@ ENV PATH="/root/miniconda3/envs/py${PYTHON_VERSION}/bin:${PATH}"
 
 WORKDIR /workspace
 
-RUN python3 -m pip install --upgrade pip && pip3 install -U packaging==23.2 setuptools==75.8.0 wheel && \
+RUN python3 -m pip install --upgrade pip && pip3 install -U packaging==26.0 setuptools==75.8.0 wheel && \
     python3 -m pip install --no-cache-dir -U torch --extra-index-url https://download.pytorch.org/whl/nightly/cu$CUDA && \
     python3 -m pip install --no-cache-dir "causal_conv1d @ git+https://github.com/Dao-AILab/causal-conv1d.git@main" && \
     python3 -m pip install --no-cache-dir "mamba_ssm @ git+https://github.com/state-spaces/mamba.git@main" && \

diff --git a/examples/apertus/README.md b/examples/apertus/README.md
@@ -15,7 +15,7 @@ This guide shows how to fine-tune it with Axolotl with multi-turn conversations
 git clone https://github.com/axolotl-ai-cloud/axolotl.git
 cd axolotl
 
-pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
+pip3 install packaging==26.0 setuptools==75.8.0 wheel ninja
 pip3 install --no-build-isolation -e '.[flash-attn]'
 
 # Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy

diff --git a/examples/arcee/README.md b/examples/arcee/README.md
@@ -17,7 +17,7 @@ Thanks to the team at Arcee.ai for using Axolotl in supervised fine-tuning the A
 git clone https://github.com/axolotl-ai-cloud/axolotl.git
 cd axolotl
 
-pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
+pip3 install packaging==26.0 setuptools==75.8.0 wheel ninja
 pip3 install --no-build-isolation -e '.[flash-attn]'
 
 # Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy

diff --git a/examples/devstral/README.md b/examples/devstral/README.md
@@ -16,7 +16,7 @@ Thanks to the team at MistralAI for giving us early access to prepare for this r
 
 ```bash
 # Ensure you have Pytorch installed (Pytorch 2.6.0 min)
-pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
+pip3 install packaging==26.0 setuptools==75.8.0 wheel ninja
 pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
 ```
 

diff --git a/examples/gemma3n/README.md b/examples/gemma3n/README.md
@@ -10,7 +10,7 @@ Gemma-3n is a family of multimodal models from Google found on [HuggingFace](htt
 
 ```bash
 # Ensure you have Pytorch installed (Pytorch 2.6.0 min)
-pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
+pip3 install packaging==26.0 setuptools==75.8.0 wheel ninja
 pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
 ```
 

diff --git a/examples/gpt-oss/README.md b/examples/gpt-oss/README.md
@@ -14,7 +14,7 @@ This guide shows how to fine-tune it with Axolotl with multi-turn conversations
 
 ```bash
 # Ensure you have Pytorch installed (Pytorch 2.6.0 min)
-pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
+pip3 install packaging==26.0 setuptools==75.8.0 wheel ninja
 pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
 ```
 

diff --git a/examples/granite4/README.md b/examples/granite4/README.md
@@ -15,7 +15,7 @@ This guide shows how to fine-tune it with Axolotl with multi-turn conversations
 git clone https://github.com/axolotl-ai-cloud/axolotl.git
 cd axolotl
 
-pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
+pip3 install packaging==26.0 setuptools==75.8.0 wheel ninja
 pip3 install --no-build-isolation -e '.[flash-attn]'
 
 # Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy

diff --git a/examples/hunyuan/README.md b/examples/hunyuan/README.md
@@ -13,7 +13,7 @@ Tencent released a family of opensource models called HunYuan with varying param
 git clone https://github.com/axolotl-ai-cloud/axolotl.git
 cd axolotl
 
-pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
+pip3 install packaging==26.0 setuptools==75.8.0 wheel ninja
 pip3 install --no-build-isolation -e '.[flash-attn]'
 
 # Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy

diff --git a/examples/magistral/README.md b/examples/magistral/README.md
@@ -14,7 +14,7 @@ Thanks to the team at MistralAI for giving us early access to prepare for these
 
 ```bash
 # Ensure you have Pytorch installed (Pytorch 2.7.0 min)
-pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
+pip3 install packaging==26.0 setuptools==75.8.0 wheel ninja
 pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
 ```
 

diff --git a/examples/qwen3-next/README.md b/examples/qwen3-next/README.md
@@ -15,7 +15,7 @@ This guide shows how to fine-tune it with Axolotl with multi-turn conversations
 git clone https://github.com/axolotl-ai-cloud/axolotl.git
 cd axolotl
 
-pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
+pip3 install packaging==26.0 setuptools==75.8.0 wheel ninja
 pip3 install --no-build-isolation -e '.[flash-attn]'
 
 # Install CCE https://docs.axolotl.ai/docs/custom_integrations.html#cut-cross-entropy

diff --git a/examples/voxtral/README.md b/examples/voxtral/README.md
@@ -12,7 +12,7 @@ Thanks to the team at MistralAI for giving us early access to prepare for this r
 
 ```bash
 # Ensure you have Pytorch installed (Pytorch 2.6.0 min)
-pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
+pip3 install packaging==26.0 setuptools==75.8.0 wheel ninja
 pip3 install --no-build-isolation 'axolotl[flash-attn]>=0.12.0'
 ```
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,5 +1,5 @@
 [build-system]
-requires = ["setuptools>=64", "wheel", "setuptools_scm>=8", "packaging==23.2"]
+requires = ["setuptools>=64", "wheel", "setuptools_scm>=8", "packaging==26.0"]
 build-backend = "setuptools.build_meta"
 
 [project]

diff --git a/requirements.txt b/requirements.txt
@@ -8,7 +8,7 @@ xformers>=0.0.23.post1
 liger-kernel==0.6.4
 # END section
 
-packaging==23.2
+packaging==26.0
 
 huggingface_hub>=0.36.0
 peft>=0.18.1
@@ -72,4 +72,4 @@ axolotl-contribs-mit==0.0.6
 # telemetry
 posthog==6.7.11
 
-mistral-common==1.8.6
+mistral-common==1.8.8
diff --git a/setup.py b/setup.py
@@ -78,6 +78,11 @@ def parse_requirements(extras_require_map):
                 extras_require_map["vllm"] = ["vllm==0.11.1"]
                 if not install_xformers:
                     _install_requires.pop(_install_requires.index(xformers_version))
+                extras_require_map["vllm"] = ["vllm==0.13.0"]
+                if patch == 0:
+                    extras_require_map["vllm"] = ["vllm==0.13.0"]
+                else:
+                    extras_require_map["vllm"] = ["vllm==0.14.0"]
             elif (major, minor) >= (2, 8):
                 extras_require_map.pop("fbgemm-gpu")
                 extras_require_map["fbgemm-gpu"] = ["fbgemm-gpu-genai==1.3.0"]