From 8499149029b5c4bf6de407c682f8a70703335a58 Mon Sep 17 00:00:00 2001
From: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
Date: Wed, 3 Dec 2025 13:23:07 +0100
Subject: [PATCH 01/13] ci: bump GitHub Actions images to Ubuntu 24.04

Update GitHub Actions workflows to use ubuntu:24.04 where applicable.
---
 .github/workflows/backend.yml               | 324 ++++++++++++--------
 .github/workflows/generate_grpc_cache.yaml  |   2 +-
 .github/workflows/generate_intel_image.yaml |   2 +-
 .github/workflows/image-pr.yml              |  10 +-
 .github/workflows/image.yml                 |  12 +-
 5 files changed, 205 insertions(+), 145 deletions(-)

diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml
index a18068c754f5..2dd8871fcecf 100644
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -52,7 +52,7 @@ jobs:
             backend: "rerankers"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "11"
             cuda-minor-version: "7"
@@ -65,7 +65,7 @@ jobs:
             backend: "llama-cpp"
             dockerfile: "./backend/Dockerfile.llama-cpp"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "11"
             cuda-minor-version: "7"
@@ -78,7 +78,7 @@ jobs:
             backend: "transformers"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "11"
             cuda-minor-version: "7"
@@ -91,7 +91,7 @@ jobs:
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -104,7 +104,7 @@ jobs:
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: ''
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -112,12 +112,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-cpu-diffusers'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'true'
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: ''
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -125,12 +125,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-cpu-chatterbox'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'true'
             backend: "chatterbox"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           # CUDA 11 additional backends
           - build-type: 'cublas'
             cuda-major-version: "11"
@@ -144,7 +144,7 @@ jobs:
             backend: "kokoro"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "11"
             cuda-minor-version: "7"
@@ -157,7 +157,7 @@ jobs:
             backend: "faster-whisper"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "11"
             cuda-minor-version: "7"
@@ -170,7 +170,7 @@ jobs:
             backend: "coqui"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "11"
             cuda-minor-version: "7"
@@ -183,7 +183,7 @@ jobs:
             backend: "bark"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "11"
             cuda-minor-version: "7"
@@ -196,7 +196,7 @@ jobs:
             backend: "chatterbox"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           # CUDA 12 builds
           - build-type: 'cublas'
             cuda-major-version: "12"
@@ -205,12 +205,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-rerankers'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "rerankers"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -218,12 +218,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "llama-cpp"
             dockerfile: "./backend/Dockerfile.llama-cpp"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -231,12 +231,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-vllm'
             runs-on: 'arc-runner-set'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "vllm"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -244,12 +244,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-transformers'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "transformers"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -257,12 +257,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-diffusers'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -270,12 +270,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-kokoro'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "kokoro"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -283,12 +283,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-faster-whisper'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "faster-whisper"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -296,12 +296,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-coqui'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "coqui"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -309,12 +309,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-bark'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "bark"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -322,12 +322,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-chatterbox'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "chatterbox"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -340,7 +340,7 @@ jobs:
             backend: "stablediffusion-ggml"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -353,7 +353,7 @@ jobs:
             backend: "whisper"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -366,7 +366,7 @@ jobs:
             backend: "rfdetr"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -379,7 +379,7 @@ jobs:
             backend: "exllama2"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -392,7 +392,7 @@ jobs:
             backend: "neutts"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           # cuda 13
           - build-type: 'cublas'
             cuda-major-version: "13"
@@ -406,7 +406,7 @@ jobs:
             backend: "rerankers"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -419,7 +419,7 @@ jobs:
             backend: "llama-cpp"
             dockerfile: "./backend/Dockerfile.llama-cpp"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -445,7 +445,7 @@ jobs:
             backend: "transformers"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -458,7 +458,7 @@ jobs:
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -484,7 +484,7 @@ jobs:
             backend: "kokoro"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -497,7 +497,7 @@ jobs:
             backend: "faster-whisper"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -510,7 +510,7 @@ jobs:
             backend: "bark"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -523,7 +523,7 @@ jobs:
             backend: "chatterbox"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -536,7 +536,7 @@ jobs:
             backend: "stablediffusion-ggml"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -562,7 +562,7 @@ jobs:
             backend: "whisper"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -588,7 +588,7 @@ jobs:
             backend: "rfdetr"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           # hipblas builds
           - build-type: 'hipblas'
             cuda-major-version: ""
@@ -597,12 +597,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-rerankers'
             runs-on: 'ubuntu-latest'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
             skip-drivers: 'false'
             backend: "rerankers"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -610,12 +610,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-llama-cpp'
             runs-on: 'ubuntu-latest'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
             skip-drivers: 'false'
             backend: "llama-cpp"
             dockerfile: "./backend/Dockerfile.llama-cpp"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -623,12 +623,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-vllm'
             runs-on: 'arc-runner-set'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
             skip-drivers: 'false'
             backend: "vllm"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -636,12 +636,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-transformers'
             runs-on: 'arc-runner-set'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
             skip-drivers: 'false'
             backend: "transformers"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -649,12 +649,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-diffusers'
             runs-on: 'arc-runner-set'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
             skip-drivers: 'false'
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           # ROCm additional backends
           - build-type: 'hipblas'
             cuda-major-version: ""
@@ -663,12 +663,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-kokoro'
             runs-on: 'arc-runner-set'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
             skip-drivers: 'false'
             backend: "kokoro"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -676,12 +676,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-faster-whisper'
             runs-on: 'ubuntu-latest'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
             skip-drivers: 'false'
             backend: "faster-whisper"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -689,12 +689,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-coqui'
             runs-on: 'ubuntu-latest'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
             skip-drivers: 'false'
             backend: "coqui"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -702,12 +702,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-bark'
             runs-on: 'arc-runner-set'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
             skip-drivers: 'false'
             backend: "bark"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
             # sycl builds
           - build-type: 'intel'
             cuda-major-version: ""
@@ -721,7 +721,7 @@ jobs:
             backend: "rerankers"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'sycl_f32'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -734,7 +734,7 @@ jobs:
             backend: "llama-cpp"
             dockerfile: "./backend/Dockerfile.llama-cpp"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'sycl_f16'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -747,7 +747,7 @@ jobs:
             backend: "llama-cpp"
             dockerfile: "./backend/Dockerfile.llama-cpp"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -760,7 +760,7 @@ jobs:
             backend: "vllm"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -773,7 +773,7 @@ jobs:
             backend: "transformers"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -786,7 +786,7 @@ jobs:
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -799,7 +799,7 @@ jobs:
             backend: "kokoro"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           # SYCL additional backends
           - build-type: 'intel'
             cuda-major-version: ""
@@ -813,7 +813,7 @@ jobs:
             backend: "kokoro"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -826,7 +826,7 @@ jobs:
             backend: "faster-whisper"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -839,7 +839,7 @@ jobs:
             backend: "coqui"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -852,7 +852,7 @@ jobs:
             backend: "bark"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           # piper
           - build-type: ''
             cuda-major-version: ""
@@ -861,12 +861,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-piper'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "piper"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           # bark-cpp
           - build-type: ''
             cuda-major-version: ""
@@ -875,12 +875,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-bark-cpp'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "bark-cpp"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: ''
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -888,12 +888,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-cpu-llama-cpp'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "llama-cpp"
             dockerfile: "./backend/Dockerfile.llama-cpp"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -906,7 +906,7 @@ jobs:
             backend: "llama-cpp"
             dockerfile: "./backend/Dockerfile.llama-cpp"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'vulkan'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -914,12 +914,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-vulkan-llama-cpp'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "llama-cpp"
             dockerfile: "./backend/Dockerfile.llama-cpp"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           # Stablediffusion-ggml
           - build-type: ''
             cuda-major-version: ""
@@ -928,12 +928,24 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-cpu-stablediffusion-ggml'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
+            skip-drivers: 'false'
+            backend: "stablediffusion-ggml"
+            dockerfile: "./backend/Dockerfile.golang"
+            context: "./"
+            ubuntu-version: '2404'
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "stablediffusion-ggml"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
           - build-type: 'cublas'
             cuda-major-version: "11"
             cuda-minor-version: "7"
@@ -946,7 +958,7 @@ jobs:
             backend: "stablediffusion-ggml"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'sycl_f32'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -959,7 +971,7 @@ jobs:
             backend: "stablediffusion-ggml"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'sycl_f16'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -972,7 +984,7 @@ jobs:
             backend: "stablediffusion-ggml"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'vulkan'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -980,12 +992,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-vulkan-stablediffusion-ggml'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "stablediffusion-ggml"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -998,7 +1010,7 @@ jobs:
             backend: "stablediffusion-ggml"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           # whisper
           - build-type: ''
             cuda-major-version: ""
@@ -1007,12 +1019,24 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-cpu-whisper'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
+            skip-drivers: 'false'
+            backend: "whisper"
+            dockerfile: "./backend/Dockerfile.golang"
+            context: "./"
+            ubuntu-version: '2404'
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-12-whisper'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "whisper"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
           - build-type: 'cublas'
             cuda-major-version: "11"
             cuda-minor-version: "7"
@@ -1025,7 +1049,7 @@ jobs:
             backend: "whisper"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'sycl_f32'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1038,7 +1062,7 @@ jobs:
             backend: "whisper"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'sycl_f16'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1051,7 +1075,7 @@ jobs:
             backend: "whisper"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'vulkan'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1059,12 +1083,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-vulkan-whisper'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "whisper"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -1077,20 +1101,20 @@ jobs:
             backend: "whisper"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
             cuda-minor-version: ""
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-whisper'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
             runs-on: 'ubuntu-latest'
             skip-drivers: 'false'
             backend: "whisper"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           #silero-vad
           - build-type: ''
             cuda-major-version: ""
@@ -1099,12 +1123,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-cpu-silero-vad'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "silero-vad"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           # local-store
           - build-type: ''
             cuda-major-version: ""
@@ -1113,12 +1137,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-cpu-local-store'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "local-store"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           # huggingface
           - build-type: ''
             cuda-major-version: ""
@@ -1127,12 +1151,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-huggingface'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "huggingface"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           # rfdetr
           - build-type: ''
             cuda-major-version: ""
@@ -1141,12 +1165,24 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-cpu-rfdetr'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
+            skip-drivers: 'false'
+            backend: "rfdetr"
+            dockerfile: "./backend/Dockerfile.python"
+            context: "./backend"
+            ubuntu-version: '2404'
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-12-rfdetr'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "rfdetr"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
           - build-type: 'cublas'
             cuda-major-version: "11"
             cuda-minor-version: "7"
@@ -1159,7 +1195,7 @@ jobs:
             backend: "rfdetr"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1172,7 +1208,7 @@ jobs:
             backend: "rfdetr"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -1185,7 +1221,7 @@ jobs:
             backend: "rfdetr"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           # exllama2
           - build-type: ''
             cuda-major-version: ""
@@ -1194,12 +1230,24 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-cpu-exllama2'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
+            skip-drivers: 'false'
+            backend: "exllama2"
+            dockerfile: "./backend/Dockerfile.python"
+            context: "./backend"
+            ubuntu-version: '2404'
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-12-exllama2'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "exllama2"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
           - build-type: 'cublas'
             cuda-major-version: "11"
             cuda-minor-version: "7"
@@ -1212,7 +1260,7 @@ jobs:
             backend: "exllama2"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1225,7 +1273,7 @@ jobs:
             backend: "exllama2"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1233,12 +1281,12 @@ jobs:
             skip-drivers: 'true'
             tag-latest: 'auto'
             tag-suffix: '-gpu-hipblas-exllama2'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
             runs-on: 'ubuntu-latest'
             backend: "exllama2"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -1251,7 +1299,7 @@ jobs:
             backend: "chatterbox"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           # runs out of space on the runner
           # - build-type: 'hipblas'
           #   cuda-major-version: ""
@@ -1259,7 +1307,7 @@ jobs:
           #   platforms: 'linux/amd64'
           #   tag-latest: 'auto'
           #   tag-suffix: '-gpu-hipblas-rfdetr'
-          #   base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+          #   base-image: "rocm/dev-ubuntu-24.04:6.4.4"
           #   runs-on: 'ubuntu-latest'
           #   skip-drivers: 'false'
           #   backend: "rfdetr"
@@ -1273,12 +1321,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-kitten-tts'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "kitten-tts"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           # neutts
           - build-type: ''
             cuda-major-version: ""
@@ -1287,12 +1335,24 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-cpu-neutts'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
+            skip-drivers: 'false'
+            backend: "neutts"
+            dockerfile: "./backend/Dockerfile.python"
+            context: "./backend"
+            ubuntu-version: '2404'
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "0"
+            platforms: 'linux/amd64'
+            tag-latest: 'auto'
+            tag-suffix: '-gpu-nvidia-cuda-12-neutts'
+            runs-on: 'ubuntu-latest'
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "neutts"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
           - build-type: 'hipblas'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1300,12 +1360,12 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-rocm-hipblas-neutts'
             runs-on: 'arc-runner-set'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
             skip-drivers: 'false'
             backend: "neutts"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -1318,7 +1378,7 @@ jobs:
             backend: "neutts"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2204'
+            ubuntu-version: '2404'
   backend-jobs-darwin:
     uses: ./.github/workflows/backend_build_darwin.yml
     strategy:
diff --git a/.github/workflows/generate_grpc_cache.yaml b/.github/workflows/generate_grpc_cache.yaml
index feadf0948bdc..72a2b306741d 100644
--- a/.github/workflows/generate_grpc_cache.yaml
+++ b/.github/workflows/generate_grpc_cache.yaml
@@ -16,7 +16,7 @@ jobs:
     strategy:
       matrix:
         include:
-          - grpc-base-image: ubuntu:22.04
+          - grpc-base-image: ubuntu:24.04
             runs-on: 'ubuntu-latest'
             platforms: 'linux/amd64,linux/arm64'
     runs-on: ${{matrix.runs-on}}
diff --git a/.github/workflows/generate_intel_image.yaml b/.github/workflows/generate_intel_image.yaml
index 5c0160addb38..0dc47da211ec 100644
--- a/.github/workflows/generate_intel_image.yaml
+++ b/.github/workflows/generate_intel_image.yaml
@@ -15,7 +15,7 @@ jobs:
     strategy:
       matrix:
         include:
-          - base-image: intel/oneapi-basekit:2025.2.0-0-devel-ubuntu22.04
+          - base-image: intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04
             runs-on: 'arc-runner-set'
             platforms: 'linux/amd64'
     runs-on: ${{matrix.runs-on}}
diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml
index 84ffa5a1320c..055f26036cfd 100644
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -42,7 +42,7 @@ jobs:
             tag-latest: 'false'
             tag-suffix: '-gpu-nvidia-cuda-12'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             makeflags: "--jobs=3 --output-sync=target"
             ubuntu-version: '2204'
           - build-type: 'cublas'
@@ -59,8 +59,8 @@ jobs:
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-hipblas'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
-            grpc-base-image: "ubuntu:22.04"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
+            grpc-base-image: "ubuntu:24.04"
             runs-on: 'ubuntu-latest'
             makeflags: "--jobs=3 --output-sync=target"
             ubuntu-version: '2204'
@@ -68,7 +68,7 @@ jobs:
             platforms: 'linux/amd64'
             tag-latest: 'false'
             base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            grpc-base-image: "ubuntu:22.04"
+            grpc-base-image: "ubuntu:24.04"
             tag-suffix: 'sycl'
             runs-on: 'ubuntu-latest'
             makeflags: "--jobs=3 --output-sync=target"
@@ -78,7 +78,7 @@ jobs:
             tag-latest: 'false'
             tag-suffix: '-vulkan-core'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             makeflags: "--jobs=4 --output-sync=target"
             ubuntu-version: '2204'
           - build-type: 'cublas'
diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
index 7389760912c5..8c67434f9d1a 100644
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -40,8 +40,8 @@ jobs:
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-hipblas'
-            base-image: "rocm/dev-ubuntu-22.04:6.4.3"
-            grpc-base-image: "ubuntu:22.04"
+            base-image: "rocm/dev-ubuntu-24.04:6.4.4"
+            grpc-base-image: "ubuntu:24.04"
             runs-on: 'ubuntu-latest'
             makeflags: "--jobs=3 --output-sync=target"
             aio: "-aio-gpu-hipblas"
@@ -76,7 +76,7 @@ jobs:
             platforms: 'linux/amd64,linux/arm64'
             tag-latest: 'auto'
             tag-suffix: ''
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             runs-on: 'ubuntu-latest'
             aio: "-aio-cpu"
             makeflags: "--jobs=4 --output-sync=target"
@@ -101,7 +101,7 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             makeflags: "--jobs=4 --output-sync=target"
             aio: "-aio-gpu-nvidia-cuda-12"
@@ -123,7 +123,7 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-vulkan'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             makeflags: "--jobs=4 --output-sync=target"
             aio: "-aio-gpu-vulkan"
@@ -132,7 +132,7 @@ jobs:
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
-            grpc-base-image: "ubuntu:22.04"
+            grpc-base-image: "ubuntu:24.04"
             tag-suffix: '-gpu-intel'
             runs-on: 'ubuntu-latest'
             makeflags: "--jobs=3 --output-sync=target"

From c17e27f03d48a701150edff7ba15f8c2ad0581ea Mon Sep 17 00:00:00 2001
From: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
Date: Wed, 3 Dec 2025 13:27:37 +0100
Subject: [PATCH 02/13] ci: removes CUDA 11.x support from GitHub Actions

Removes CUDA 11 support from GitHub Actions workflows because it is no longer
supported in Ubuntu 24.04.
---
 .github/workflows/backend.yml | 171 ----------------------------------
 .github/workflows/image.yml   |  12 ---
 2 files changed, 183 deletions(-)

diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml
index 2dd8871fcecf..875cfaf2d61d 100644
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -40,58 +40,6 @@ jobs:
       matrix:
         include:
           # CUDA 11 builds
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-rerankers'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "rerankers"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2404'
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-llama-cpp'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "llama-cpp"
-            dockerfile: "./backend/Dockerfile.llama-cpp"
-            context: "./"
-            ubuntu-version: '2404'
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-transformers'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "transformers"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2404'
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-diffusers'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "diffusers"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "12"
             cuda-minor-version: "0"
@@ -130,73 +78,6 @@ jobs:
             backend: "chatterbox"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-            ubuntu-version: '2404'
-          # CUDA 11 additional backends
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-kokoro'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "kokoro"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2404'
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-faster-whisper'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "faster-whisper"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2404'
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-coqui'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "coqui"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2404'
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-bark'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "bark"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2404'
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-chatterbox'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "chatterbox"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2404'
           # CUDA 12 builds
           - build-type: 'cublas'
             cuda-major-version: "12"
@@ -946,19 +827,6 @@ jobs:
             backend: "stablediffusion-ggml"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-stablediffusion-ggml'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "stablediffusion-ggml"
-            dockerfile: "./backend/Dockerfile.golang"
-            context: "./"
-            ubuntu-version: '2404'
           - build-type: 'sycl_f32'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1037,19 +905,6 @@ jobs:
             backend: "whisper"
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-whisper'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "whisper"
-            dockerfile: "./backend/Dockerfile.golang"
-            context: "./"
-            ubuntu-version: '2404'
           - build-type: 'sycl_f32'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1183,19 +1038,6 @@ jobs:
             backend: "rfdetr"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-rfdetr'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "rfdetr"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1248,19 +1090,6 @@ jobs:
             backend: "exllama2"
             dockerfile: "./backend/Dockerfile.python"
             context: "./backend"
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11-exllama2'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            skip-drivers: 'false'
-            backend: "exllama2"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
-            ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
             cuda-minor-version: ""
diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
index 8c67434f9d1a..ab69c98ac3c4 100644
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -82,18 +82,6 @@ jobs:
             makeflags: "--jobs=4 --output-sync=target"
             skip-drivers: 'false'
             ubuntu-version: '2204'
-          - build-type: 'cublas'
-            cuda-major-version: "11"
-            cuda-minor-version: "7"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-11'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
-            makeflags: "--jobs=4 --output-sync=target"
-            skip-drivers: 'false'
-            aio: "-aio-gpu-nvidia-cuda-11"
-            ubuntu-version: '2204'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"

From 8e80a19b9462608eeda486336dc8160a91dfd0b9 Mon Sep 17 00:00:00 2001
From: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
Date: Wed, 3 Dec 2025 13:29:46 +0100
Subject: [PATCH 03/13] ci: bump GitHub Actions images CUDA support to 12.9

Update GitHub Actions workflows to use CUDA 12.9 where applicable.
---
 .github/workflows/backend.yml     | 44 +++++++++++++++----------------
 .github/workflows/image-pr.yml    |  2 +-
 .github/workflows/image.yml       |  4 +--
 .github/workflows/image_build.yml |  2 +-
 4 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml
index 875cfaf2d61d..6e2da168e95d 100644
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -42,7 +42,7 @@ jobs:
           # CUDA 11 builds
           - build-type: 'l4t'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/arm64'
             tag-latest: 'auto'
             tag-suffix: '-nvidia-l4t-diffusers'
@@ -81,7 +81,7 @@ jobs:
           # CUDA 12 builds
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-rerankers'
@@ -94,7 +94,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp'
@@ -107,7 +107,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-vllm'
@@ -120,7 +120,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-transformers'
@@ -133,7 +133,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-diffusers'
@@ -146,7 +146,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-kokoro'
@@ -159,7 +159,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-faster-whisper'
@@ -172,7 +172,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-coqui'
@@ -185,7 +185,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-bark'
@@ -198,7 +198,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-chatterbox'
@@ -670,7 +670,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/arm64'
             tag-latest: 'auto'
             tag-suffix: '-nvidia-l4t-kokoro'
@@ -777,7 +777,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/arm64'
             skip-drivers: 'true'
             tag-latest: 'auto'
@@ -817,7 +817,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml'
@@ -868,7 +868,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/arm64'
             skip-drivers: 'true'
             tag-latest: 'auto'
@@ -895,7 +895,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-whisper'
@@ -946,7 +946,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/arm64'
             skip-drivers: 'true'
             tag-latest: 'auto'
@@ -1028,7 +1028,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-rfdetr'
@@ -1080,7 +1080,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-exllama2'
@@ -1118,7 +1118,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/arm64'
             skip-drivers: 'true'
             tag-latest: 'auto'
@@ -1172,7 +1172,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-neutts'
@@ -1197,7 +1197,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/arm64'
             skip-drivers: 'true'
             tag-latest: 'auto'
diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml
index 055f26036cfd..2db9e5cbafa4 100644
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -37,7 +37,7 @@ jobs:
         include:
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'false'
             tag-suffix: '-gpu-nvidia-cuda-12'
diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
index ab69c98ac3c4..ad8ce97bcd4d 100644
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -84,7 +84,7 @@ jobs:
             ubuntu-version: '2204'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12'
@@ -153,7 +153,7 @@ jobs:
         include:
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/arm64'
             tag-latest: 'auto'
             tag-suffix: '-nvidia-l4t-arm64'
diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml
index 31a1f2310ea4..39cfa1401052 100644
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -23,7 +23,7 @@ on:
         type: string
       cuda-minor-version:
         description: 'CUDA minor version'
-        default: "4"
+        default: "9"
         type: string
       platforms:
         description: 'Platforms'

From 56dae448c853bd37cc2de33b1c8d33cab123ba3b Mon Sep 17 00:00:00 2001
From: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
Date: Wed, 3 Dec 2025 15:00:22 +0100
Subject: [PATCH 04/13] docker: bump base image to ubuntu:24.04 and adjust
 packages

Change base images to ubuntu:24.04, update Vulkan SDK and package
names to match the Ubuntu 24.04 repositories.
---
 Dockerfile                   | 40 ++++++++++++++++++++++++++----------
 Dockerfile.aio               |  2 +-
 Makefile                     |  2 +-
 backend/Dockerfile.golang    | 37 +++++++++++++++++++++++++--------
 backend/Dockerfile.llama-cpp | 35 +++++++++++++++++++++++--------
 backend/Dockerfile.python    | 35 +++++++++++++++++++++++--------
 docker-compose.yaml          |  2 +-
 7 files changed, 114 insertions(+), 39 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index a253237c974c..8c20a702e65c 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,4 +1,4 @@
-ARG BASE_IMAGE=ubuntu:22.04
+ARG BASE_IMAGE=ubuntu:24.04
 ARG GRPC_BASE_IMAGE=${BASE_IMAGE}
 ARG INTEL_BASE_IMAGE=${BASE_IMAGE}
 
@@ -9,7 +9,7 @@ ENV DEBIAN_FRONTEND=noninteractive
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
         ca-certificates curl wget espeak-ng libgomp1 \
-        ffmpeg && \
+        ffmpeg libopenblas0 libopenblas-dev libquadmath0 && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
 
@@ -34,11 +34,30 @@ RUN <<EOT bash
         apt-get update && \
         apt-get install -y  --no-install-recommends \
             software-properties-common pciutils wget gpg-agent && \
-        wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
-        wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
-        apt-get update && \
-        apt-get install -y \
-            vulkan-sdk && \
+        apt-get install -y libglm-dev cmake libxcb-dri3-0 libxcb-present0 libpciaccess0 \
+            libpng-dev libxcb-keysyms1-dev libxcb-dri3-dev libx11-dev g++ gcc \
+            libwayland-dev libxrandr-dev libxcb-randr0-dev libxcb-ewmh-dev \
+            git python-is-python3 bison libx11-xcb-dev liblz4-dev libzstd-dev \
+            ocaml-core ninja-build pkg-config libxml2-dev wayland-protocols python3-jsonschema \
+            clang-format qtbase5-dev qt6-base-dev libxcb-glx0-dev sudo xz-utils mesa-vulkan-drivers && \
+        wget "https://sdk.lunarg.com/sdk/download/1.4.328.1/linux/vulkansdk-linux-x86_64-1.4.328.1.tar.xz" && \
+        tar -xf vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
+        rm vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
+        mkdir -p /opt/vulkan-sdk && \
+        mv 1.4.328.1 /opt/vulkan-sdk/ && \
+        cd /opt/vulkan-sdk/1.4.328.1 && \
+        ./vulkansdk --no-deps --maxjobs \
+            vulkan-loader \
+            vulkan-validationlayers \
+            vulkan-extensionlayer \
+            vulkan-tools \
+            shaderc && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/bin/* /usr/bin/ && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/lib/* /usr/lib/x86_64-linux-gnu/ && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/include/* /usr/include/ && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/share/* /usr/share/ && \
+        rm -rf /opt/vulkan-sdk && \
+        ldconfig && \
         apt-get clean && \
         rm -rf /var/lib/apt/lists/* && \
         echo "vulkan" > /run/localai/capability
@@ -141,13 +160,12 @@ ENV PATH=/opt/rocm/bin:${PATH}
 # The requirements-core target is common to all images.  It should not be placed in requirements-core unless every single build will use it.
 FROM requirements-drivers AS build-requirements
 
-ARG GO_VERSION=1.22.6
-ARG CMAKE_VERSION=3.26.4
+ARG GO_VERSION=1.25.4
+ARG CMAKE_VERSION=3.28.3
 ARG CMAKE_FROM_SOURCE=false
 ARG TARGETARCH
 ARG TARGETVARIANT
 
-
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
         build-essential \
@@ -206,7 +224,7 @@ WORKDIR /build
 FROM ${INTEL_BASE_IMAGE} AS intel
 RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
 gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg
-RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy/lts/2350 unified" > /etc/apt/sources.list.d/intel-graphics.list
+RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu noble/lts/2350 unified" > /etc/apt/sources.list.d/intel-graphics.list
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
         intel-oneapi-runtime-libs && \
diff --git a/Dockerfile.aio b/Dockerfile.aio
index 81063bb4dbeb..ccc2fc94b9ed 100644
--- a/Dockerfile.aio
+++ b/Dockerfile.aio
@@ -1,4 +1,4 @@
-ARG BASE_IMAGE=ubuntu:22.04
+ARG BASE_IMAGE=ubuntu:24.04
 
 FROM ${BASE_IMAGE} 
 
diff --git a/Makefile b/Makefile
index 1f855b02a673..450295fcc348 100644
--- a/Makefile
+++ b/Makefile
@@ -297,7 +297,7 @@ test-extra: prepare-test-extra
 DOCKER_IMAGE?=local-ai
 DOCKER_AIO_IMAGE?=local-ai-aio
 IMAGE_TYPE?=core
-BASE_IMAGE?=ubuntu:22.04
+BASE_IMAGE?=ubuntu:24.04
 
 docker:
 	docker build \
diff --git a/backend/Dockerfile.golang b/backend/Dockerfile.golang
index dbfee61e2902..8467521c0fdf 100644
--- a/backend/Dockerfile.golang
+++ b/backend/Dockerfile.golang
@@ -1,4 +1,4 @@
-ARG BASE_IMAGE=ubuntu:22.04
+ARG BASE_IMAGE=ubuntu:24.04
 
 FROM ${BASE_IMAGE} AS builder
 ARG BACKEND=rerankers
@@ -12,7 +12,7 @@ ENV CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION}
 ENV DEBIAN_FRONTEND=noninteractive
 ARG TARGETARCH
 ARG TARGETVARIANT
-ARG GO_VERSION=1.22.6
+ARG GO_VERSION=1.25.4
 
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
@@ -38,11 +38,30 @@ RUN <<EOT bash
         apt-get update && \
         apt-get install -y  --no-install-recommends \
             software-properties-common pciutils wget gpg-agent && \
-        wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
-        wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
-        apt-get update && \
-        apt-get install -y \
-            vulkan-sdk && \
+        apt-get install -y libglm-dev cmake libxcb-dri3-0 libxcb-present0 libpciaccess0 \
+            libpng-dev libxcb-keysyms1-dev libxcb-dri3-dev libx11-dev g++ gcc \
+            libwayland-dev libxrandr-dev libxcb-randr0-dev libxcb-ewmh-dev \
+            git python-is-python3 bison libx11-xcb-dev liblz4-dev libzstd-dev \
+            ocaml-core ninja-build pkg-config libxml2-dev wayland-protocols python3-jsonschema \
+            clang-format qtbase5-dev qt6-base-dev libxcb-glx0-dev sudo xz-utils && \
+        wget "https://sdk.lunarg.com/sdk/download/1.4.328.1/linux/vulkansdk-linux-x86_64-1.4.328.1.tar.xz" && \
+        tar -xf vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
+        rm vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
+        mkdir -p /opt/vulkan-sdk && \
+        mv 1.4.328.1 /opt/vulkan-sdk/ && \
+        cd /opt/vulkan-sdk/1.4.328.1 && \
+        ./vulkansdk --no-deps --maxjobs \
+            vulkan-loader \
+            vulkan-validationlayers \
+            vulkan-extensionlayer \
+            vulkan-tools \
+            shaderc && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/bin/* /usr/bin/ && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/lib/* /usr/lib/x86_64-linux-gnu/ && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/include/* /usr/include/ && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/share/* /usr/share/ && \
+        rm -rf /opt/vulkan-sdk && \
+        ldconfig && \
         apt-get clean && \
         rm -rf /var/lib/apt/lists/*
     fi
@@ -55,10 +74,10 @@ RUN <<EOT bash
         apt-get install -y  --no-install-recommends \
             software-properties-common pciutils
         if [ "amd64" = "$TARGETARCH" ]; then
-            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
+            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/cuda-keyring_1.1-1_all.deb
         fi
         if [ "arm64" = "$TARGETARCH" ]; then
-            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/cuda-keyring_1.1-1_all.deb
+            curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/arm64/cuda-keyring_1.1-1_all.deb
         fi
         dpkg -i cuda-keyring_1.1-1_all.deb && \
         rm -f cuda-keyring_1.1-1_all.deb && \
diff --git a/backend/Dockerfile.llama-cpp b/backend/Dockerfile.llama-cpp
index 37195abe5e43..d66a86139989 100644
--- a/backend/Dockerfile.llama-cpp
+++ b/backend/Dockerfile.llama-cpp
@@ -1,4 +1,4 @@
-ARG BASE_IMAGE=ubuntu:22.04
+ARG BASE_IMAGE=ubuntu:24.04
 ARG GRPC_BASE_IMAGE=${BASE_IMAGE}
 
 
@@ -10,7 +10,7 @@ FROM ${GRPC_BASE_IMAGE} AS grpc
 ARG GRPC_MAKEFLAGS="-j4 -Otarget"
 ARG GRPC_VERSION=v1.65.0
 ARG CMAKE_FROM_SOURCE=false
-ARG CMAKE_VERSION=3.26.4
+ARG CMAKE_VERSION=3.28.3
 
 ENV MAKEFLAGS=${GRPC_MAKEFLAGS}
 
@@ -61,7 +61,7 @@ ENV CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION}
 ENV DEBIAN_FRONTEND=noninteractive
 ARG TARGETARCH
 ARG TARGETVARIANT
-ARG GO_VERSION=1.22.6
+ARG GO_VERSION=1.25.4
 
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
@@ -87,11 +87,30 @@ RUN <<EOT bash
         apt-get update && \
         apt-get install -y  --no-install-recommends \
             software-properties-common pciutils wget gpg-agent && \
-        wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
-        wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
-        apt-get update && \
-        apt-get install -y \
-            vulkan-sdk && \
+        apt-get install -y libglm-dev cmake libxcb-dri3-0 libxcb-present0 libpciaccess0 \
+            libpng-dev libxcb-keysyms1-dev libxcb-dri3-dev libx11-dev g++ gcc \
+            libwayland-dev libxrandr-dev libxcb-randr0-dev libxcb-ewmh-dev \
+            git python-is-python3 bison libx11-xcb-dev liblz4-dev libzstd-dev \
+            ocaml-core ninja-build pkg-config libxml2-dev wayland-protocols python3-jsonschema \
+            clang-format qtbase5-dev qt6-base-dev libxcb-glx0-dev sudo xz-utils && \
+        wget "https://sdk.lunarg.com/sdk/download/1.4.328.1/linux/vulkansdk-linux-x86_64-1.4.328.1.tar.xz" && \
+        tar -xf vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
+        rm vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
+        mkdir -p /opt/vulkan-sdk && \
+        mv 1.4.328.1 /opt/vulkan-sdk/ && \
+        cd /opt/vulkan-sdk/1.4.328.1 && \
+        ./vulkansdk --no-deps --maxjobs \
+            vulkan-loader \
+            vulkan-validationlayers \
+            vulkan-extensionlayer \
+            vulkan-tools \
+            shaderc && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/bin/* /usr/bin/ && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/lib/* /usr/lib/x86_64-linux-gnu/ && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/include/* /usr/include/ && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/share/* /usr/share/ && \
+        rm -rf /opt/vulkan-sdk && \
+        ldconfig && \
         apt-get clean && \
         rm -rf /var/lib/apt/lists/*
     fi
diff --git a/backend/Dockerfile.python b/backend/Dockerfile.python
index 932f3f24a27e..96e147fce40f 100644
--- a/backend/Dockerfile.python
+++ b/backend/Dockerfile.python
@@ -1,4 +1,4 @@
-ARG BASE_IMAGE=ubuntu:22.04
+ARG BASE_IMAGE=ubuntu:24.04
 
 FROM ${BASE_IMAGE} AS builder
 ARG BACKEND=rerankers
@@ -47,11 +47,30 @@ RUN <<EOT bash
         apt-get update && \
         apt-get install -y  --no-install-recommends \
             software-properties-common pciutils wget gpg-agent && \
-        wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
-        wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
-        apt-get update && \
-        apt-get install -y \
-            vulkan-sdk && \
+        apt-get install -y libglm-dev cmake libxcb-dri3-0 libxcb-present0 libpciaccess0 \
+            libpng-dev libxcb-keysyms1-dev libxcb-dri3-dev libx11-dev g++ gcc \
+            libwayland-dev libxrandr-dev libxcb-randr0-dev libxcb-ewmh-dev \
+            git python-is-python3 bison libx11-xcb-dev liblz4-dev libzstd-dev \
+            ocaml-core ninja-build pkg-config libxml2-dev wayland-protocols python3-jsonschema \
+            clang-format qtbase5-dev qt6-base-dev libxcb-glx0-dev sudo xz-utils && \
+        wget "https://sdk.lunarg.com/sdk/download/1.4.328.1/linux/vulkansdk-linux-x86_64-1.4.328.1.tar.xz" && \
+        tar -xf vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
+        rm vulkansdk-linux-x86_64-1.4.328.1.tar.xz && \
+        mkdir -p /opt/vulkan-sdk && \
+        mv 1.4.328.1 /opt/vulkan-sdk/ && \
+        cd /opt/vulkan-sdk/1.4.328.1 && \
+        ./vulkansdk --no-deps --maxjobs \
+            vulkan-loader \
+            vulkan-validationlayers \
+            vulkan-extensionlayer \
+            vulkan-tools \
+            shaderc && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/bin/* /usr/bin/ && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/lib/* /usr/lib/x86_64-linux-gnu/ && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/include/* /usr/include/ && \
+        cp -rfv /opt/vulkan-sdk/1.4.328.1/x86_64/share/* /usr/share/ && \
+        rm -rf /opt/vulkan-sdk && \
+        ldconfig && \
         apt-get clean && \
         rm -rf /var/lib/apt/lists/*
     fi
@@ -137,8 +156,8 @@ ENV PATH="/root/.cargo/bin:${PATH}"
 
 RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
 
-# Install grpcio-tools (the version in 22.04 is too old)
-RUN pip install --user grpcio-tools==1.71.0 grpcio==1.71.0
+# Install grpcio-tools
+RUN pip install --break-system-packages --user grpcio-tools==1.71.0 grpcio==1.71.0 torch diffusers transformers compel optimum.quanto accelerate gguf
 
 COPY python/${BACKEND} /${BACKEND}
 COPY backend.proto /${BACKEND}/backend.proto
diff --git a/docker-compose.yaml b/docker-compose.yaml
index b9880352ad8f..765a3fb63b2e 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -11,7 +11,7 @@ services:
       dockerfile: Dockerfile
       args:
       - IMAGE_TYPE=core
-      - BASE_IMAGE=ubuntu:22.04
+      - BASE_IMAGE=ubuntu:24.04
     ports:
       - 8080:8080
     env_file:

From cdbf2d8598374298897a99c5d636fe0def96af53 Mon Sep 17 00:00:00 2001
From: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
Date: Wed, 3 Dec 2025 19:39:44 +0100
Subject: [PATCH 05/13] backend: Fix context for Python backends

Fix context path for all Python backends in GitHub Actions workflows, Makefile and
Python backends Dockerfile.
---
 .github/workflows/backend.yml | 144 +++++++++++++++++-----------------
 Makefile                      |  12 +--
 backend/Dockerfile.python     |   6 +-
 3 files changed, 81 insertions(+), 81 deletions(-)

diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml
index 6e2da168e95d..e03913741e14 100644
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -51,7 +51,7 @@ jobs:
             skip-drivers: 'true'
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: ''
             cuda-major-version: ""
@@ -64,7 +64,7 @@ jobs:
             skip-drivers: 'true'
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./""
             ubuntu-version: '2404'
           - build-type: ''
             cuda-major-version: ""
@@ -77,7 +77,7 @@ jobs:
             skip-drivers: 'true'
             backend: "chatterbox"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
           # CUDA 12 builds
           - build-type: 'cublas'
             cuda-major-version: "12"
@@ -90,7 +90,7 @@ jobs:
             skip-drivers: 'false'
             backend: "rerankers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
@@ -116,7 +116,7 @@ jobs:
             skip-drivers: 'false'
             backend: "vllm"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
@@ -129,7 +129,7 @@ jobs:
             skip-drivers: 'false'
             backend: "transformers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
@@ -142,7 +142,7 @@ jobs:
             skip-drivers: 'false'
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
@@ -155,7 +155,7 @@ jobs:
             skip-drivers: 'false'
             backend: "kokoro"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
@@ -168,7 +168,7 @@ jobs:
             skip-drivers: 'false'
             backend: "faster-whisper"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
@@ -181,7 +181,7 @@ jobs:
             skip-drivers: 'false'
             backend: "coqui"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
@@ -194,7 +194,7 @@ jobs:
             skip-drivers: 'false'
             backend: "bark"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
@@ -207,7 +207,7 @@ jobs:
             skip-drivers: 'false'
             backend: "chatterbox"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
@@ -216,7 +216,7 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "stablediffusion-ggml"
             dockerfile: "./backend/Dockerfile.golang"
@@ -229,7 +229,7 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-whisper'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "whisper"
             dockerfile: "./backend/Dockerfile.golang"
@@ -242,11 +242,11 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-rfdetr'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "rfdetr"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
@@ -255,11 +255,11 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-exllama2'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "exllama2"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
@@ -268,11 +268,11 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-neutts'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "neutts"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           # cuda 13
           - build-type: 'cublas'
@@ -282,11 +282,11 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-13-rerankers'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "rerankers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
@@ -295,7 +295,7 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-13-llama-cpp'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "llama-cpp"
             dockerfile: "./backend/Dockerfile.llama-cpp"
@@ -321,11 +321,11 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-13-transformers'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "transformers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
@@ -334,11 +334,11 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-13-diffusers'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "13"
@@ -352,7 +352,7 @@ jobs:
             ubuntu-version: '2404'
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
           - build-type: 'cublas'
             cuda-major-version: "13"
             cuda-minor-version: "0"
@@ -360,11 +360,11 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-13-kokoro'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "kokoro"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
@@ -373,11 +373,11 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-13-faster-whisper'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "faster-whisper"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
@@ -386,11 +386,11 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-13-bark'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "bark"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
@@ -399,11 +399,11 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-13-chatterbox'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "chatterbox"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "13"
@@ -412,7 +412,7 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-13-stablediffusion-ggml'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "stablediffusion-ggml"
             dockerfile: "./backend/Dockerfile.golang"
@@ -438,7 +438,7 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-13-whisper'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "whisper"
             dockerfile: "./backend/Dockerfile.golang"
@@ -464,11 +464,11 @@ jobs:
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-13-rfdetr'
             runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:22.04"
+            base-image: "ubuntu:24.04"
             skip-drivers: 'false'
             backend: "rfdetr"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           # hipblas builds
           - build-type: 'hipblas'
@@ -482,7 +482,7 @@ jobs:
             skip-drivers: 'false'
             backend: "rerankers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
@@ -508,7 +508,7 @@ jobs:
             skip-drivers: 'false'
             backend: "vllm"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
@@ -521,7 +521,7 @@ jobs:
             skip-drivers: 'false'
             backend: "transformers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
@@ -534,7 +534,7 @@ jobs:
             skip-drivers: 'false'
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           # ROCm additional backends
           - build-type: 'hipblas'
@@ -548,7 +548,7 @@ jobs:
             skip-drivers: 'false'
             backend: "kokoro"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
@@ -561,7 +561,7 @@ jobs:
             skip-drivers: 'false'
             backend: "faster-whisper"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
@@ -574,7 +574,7 @@ jobs:
             skip-drivers: 'false'
             backend: "coqui"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
@@ -587,7 +587,7 @@ jobs:
             skip-drivers: 'false'
             backend: "bark"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
             # sycl builds
           - build-type: 'intel'
@@ -601,7 +601,7 @@ jobs:
             skip-drivers: 'false'
             backend: "rerankers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'sycl_f32'
             cuda-major-version: ""
@@ -640,7 +640,7 @@ jobs:
             skip-drivers: 'false'
             backend: "vllm"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
@@ -653,7 +653,7 @@ jobs:
             skip-drivers: 'false'
             backend: "transformers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
@@ -666,7 +666,7 @@ jobs:
             skip-drivers: 'false'
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "12"
@@ -679,7 +679,7 @@ jobs:
             skip-drivers: 'true'
             backend: "kokoro"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           # SYCL additional backends
           - build-type: 'intel'
@@ -693,7 +693,7 @@ jobs:
             skip-drivers: 'false'
             backend: "kokoro"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
@@ -706,7 +706,7 @@ jobs:
             skip-drivers: 'false'
             backend: "faster-whisper"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
@@ -719,7 +719,7 @@ jobs:
             skip-drivers: 'false'
             backend: "coqui"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'intel'
             cuda-major-version: ""
@@ -732,7 +732,7 @@ jobs:
             skip-drivers: 'false'
             backend: "bark"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           # piper
           - build-type: ''
@@ -1024,7 +1024,7 @@ jobs:
             skip-drivers: 'false'
             backend: "rfdetr"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
@@ -1037,7 +1037,7 @@ jobs:
             skip-drivers: 'false'
             backend: "rfdetr"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
           - build-type: 'intel'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1049,7 +1049,7 @@ jobs:
             skip-drivers: 'false'
             backend: "rfdetr"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "12"
@@ -1062,7 +1062,7 @@ jobs:
             runs-on: 'ubuntu-24.04-arm'
             backend: "rfdetr"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           # exllama2
           - build-type: ''
@@ -1076,7 +1076,7 @@ jobs:
             skip-drivers: 'false'
             backend: "exllama2"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
@@ -1089,7 +1089,7 @@ jobs:
             skip-drivers: 'false'
             backend: "exllama2"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
           - build-type: 'intel'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1101,7 +1101,7 @@ jobs:
             skip-drivers: 'false'
             backend: "exllama2"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'hipblas'
             cuda-major-version: ""
@@ -1114,7 +1114,7 @@ jobs:
             runs-on: 'ubuntu-latest'
             backend: "exllama2"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "12"
@@ -1127,7 +1127,7 @@ jobs:
             runs-on: 'ubuntu-24.04-arm'
             backend: "chatterbox"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           # runs out of space on the runner
           # - build-type: 'hipblas'
@@ -1141,7 +1141,7 @@ jobs:
           #   skip-drivers: 'false'
           #   backend: "rfdetr"
           #   dockerfile: "./backend/Dockerfile.python"
-          #   context: "./backend"
+          #   context: "./"
           # kitten-tts
           - build-type: ''
             cuda-major-version: ""
@@ -1154,7 +1154,7 @@ jobs:
             skip-drivers: 'false'
             backend: "kitten-tts"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           # neutts
           - build-type: ''
@@ -1168,7 +1168,7 @@ jobs:
             skip-drivers: 'false'
             backend: "neutts"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
@@ -1181,7 +1181,7 @@ jobs:
             skip-drivers: 'false'
             backend: "neutts"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
           - build-type: 'hipblas'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1193,7 +1193,7 @@ jobs:
             skip-drivers: 'false'
             backend: "neutts"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "12"
@@ -1206,7 +1206,7 @@ jobs:
             runs-on: 'ubuntu-24.04-arm'
             backend: "neutts"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./backend"
+            context: "./"
             ubuntu-version: '2404'
   backend-jobs-darwin:
     uses: ./.github/workflows/backend_build_darwin.yml
diff --git a/Makefile b/Makefile
index 450295fcc348..2a8efe81c827 100644
--- a/Makefile
+++ b/Makefile
@@ -434,10 +434,10 @@ docker-build-huggingface:
 	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:huggingface -f backend/Dockerfile.golang --build-arg BACKEND=huggingface .
 
 docker-build-rfdetr:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:rfdetr -f backend/Dockerfile.python --build-arg BACKEND=rfdetr ./backend
+	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:rfdetr -f backend/Dockerfile.python --build-arg BACKEND=rfdetr .
 
 docker-build-kitten-tts:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:kitten-tts -f backend/Dockerfile.python --build-arg BACKEND=kitten-tts ./backend
+	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:kitten-tts -f backend/Dockerfile.python --build-arg BACKEND=kitten-tts .
 
 docker-save-kitten-tts: backend-images
 	docker save local-ai-backend:kitten-tts -o backend-images/kitten-tts.tar
@@ -446,13 +446,13 @@ docker-save-chatterbox: backend-images
 	docker save local-ai-backend:chatterbox -o backend-images/chatterbox.tar
 
 docker-build-neutts:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:neutts -f backend/Dockerfile.python --build-arg BACKEND=neutts ./backend
+	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:neutts -f backend/Dockerfile.python --build-arg BACKEND=neutts .
 
 docker-save-neutts: backend-images
 	docker save local-ai-backend:neutts -o backend-images/neutts.tar
 
 docker-build-kokoro:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:kokoro -f backend/Dockerfile.python --build-arg BACKEND=kokoro ./backend
+	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:kokoro -f backend/Dockerfile.python --build-arg BACKEND=kokoro .
 
 docker-build-vllm:
 	docker build --build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) --build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:vllm -f backend/Dockerfile.python --build-arg BACKEND=vllm ./backend
@@ -500,7 +500,7 @@ docker-build-transformers:
 	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:transformers -f backend/Dockerfile.python --build-arg BACKEND=transformers .
 
 docker-build-diffusers:
-	docker build --progress=plain --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:diffusers -f backend/Dockerfile.python --build-arg BACKEND=diffusers ./backend
+	docker build --progress=plain --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:diffusers -f backend/Dockerfile.python --build-arg BACKEND=diffusers .
 
 docker-save-diffusers: backend-images
 	docker save local-ai-backend:diffusers -o backend-images/diffusers.tar
@@ -521,7 +521,7 @@ docker-build-bark:
 	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:bark -f backend/Dockerfile.python --build-arg BACKEND=bark .
 
 docker-build-chatterbox:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:chatterbox -f backend/Dockerfile.python --build-arg BACKEND=chatterbox ./backend
+	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:chatterbox -f backend/Dockerfile.python --build-arg BACKEND=chatterbox .
 
 docker-build-exllama2:
 	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:exllama2 -f backend/Dockerfile.python --build-arg BACKEND=exllama2 .
diff --git a/backend/Dockerfile.python b/backend/Dockerfile.python
index 96e147fce40f..d9a3e1afadb6 100644
--- a/backend/Dockerfile.python
+++ b/backend/Dockerfile.python
@@ -159,9 +159,9 @@ RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
 # Install grpcio-tools
 RUN pip install --break-system-packages --user grpcio-tools==1.71.0 grpcio==1.71.0 torch diffusers transformers compel optimum.quanto accelerate gguf
 
-COPY python/${BACKEND} /${BACKEND}
-COPY backend.proto /${BACKEND}/backend.proto
-COPY python/common/ /${BACKEND}/common
+COPY backend/python/${BACKEND} /${BACKEND}
+COPY backend/backend.proto /${BACKEND}/backend.proto
+COPY backend/python/common/ /${BACKEND}/common
 
 RUN cd /${BACKEND} && PORTABLE_PYTHON=true make
 

From 0d75455cddfb0129cf01ff65cac594841dde562e Mon Sep 17 00:00:00 2001
From: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
Date: Wed, 3 Dec 2025 19:41:55 +0100
Subject: [PATCH 06/13] make: disable parallel backend builds

Add .NOTPARALLEL for backend build target to avoid concurrent build
race conditions.
---
 Makefile | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Makefile b/Makefile
index 2a8efe81c827..d1c448fd36e6 100644
--- a/Makefile
+++ b/Makefile
@@ -1,3 +1,6 @@
+# Disable parallel execution for backend builds
+.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/stablediffusion-ggml-darwin
+
 GOCMD=go
 GOTEST=$(GOCMD) test
 GOVET=$(GOCMD) vet

From 62c2d9fb1c380c5d4c76bbbee212a2491efc7ffa Mon Sep 17 00:00:00 2001
From: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
Date: Wed, 3 Dec 2025 19:46:47 +0100
Subject: [PATCH 07/13] make: export CUDA_MAJOR_VERSION and CUDA_MINOR_VERSION

Export CUDA_MAJOR_VERSION and CUDA_MINOR_VERSION so CI/users can
override the CUDA version used during building.
---
 Makefile | 55 ++++++++++++++++++++++++++++++-------------------------
 1 file changed, 30 insertions(+), 25 deletions(-)

diff --git a/Makefile b/Makefile
index d1c448fd36e6..960061866c4e 100644
--- a/Makefile
+++ b/Makefile
@@ -13,6 +13,8 @@ CUDA_MINOR_VERSION?=0
 GORELEASER?=
 
 export BUILD_TYPE?=
+export CUDA_MAJOR_VERSION?=12
+export CUDA_MINOR_VERSION?=9
 
 GO_TAGS?=
 BUILD_ID?=
@@ -180,7 +182,7 @@ prepare-e2e:
 	mkdir -p $(TEST_DIR)
 	cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml
 	test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin
-	docker build --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 -t localai-tests .
+	docker build --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t localai-tests .
 
 run-e2e-image:
 	ls -liah $(abspath ./tests/e2e-fixtures)
@@ -311,16 +313,16 @@ docker:
 		--build-arg BUILD_TYPE=$(BUILD_TYPE) \
 		-t $(DOCKER_IMAGE) .
 
-docker-cuda11:
+docker-cuda12:
 	docker build \
-		--build-arg CUDA_MAJOR_VERSION=11 \
-		--build-arg CUDA_MINOR_VERSION=8 \
+		--build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} \
+		--build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} \
 		--build-arg BASE_IMAGE=$(BASE_IMAGE) \
 		--build-arg IMAGE_TYPE=$(IMAGE_TYPE) \
 		--build-arg GO_TAGS="$(GO_TAGS)" \
 		--build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \
 		--build-arg BUILD_TYPE=$(BUILD_TYPE) \
-		-t $(DOCKER_IMAGE)-cuda-11 .
+		-t $(DOCKER_IMAGE)-cuda-12 .
 
 docker-aio:
 	@echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)"
@@ -422,25 +424,25 @@ backend-images:
 	mkdir -p backend-images
 
 docker-build-llama-cpp:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:llama-cpp -f backend/Dockerfile.llama-cpp .
+	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:llama-cpp -f backend/Dockerfile.llama-cpp .
 
 docker-build-bark-cpp:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:bark-cpp -f backend/Dockerfile.golang --build-arg BACKEND=bark-cpp .
+	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:bark-cpp -f backend/Dockerfile.golang --build-arg BACKEND=bark-cpp .
 
 docker-build-piper:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:piper -f backend/Dockerfile.golang --build-arg BACKEND=piper .
+	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:piper -f backend/Dockerfile.golang --build-arg BACKEND=piper .
 
 docker-build-local-store:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:local-store -f backend/Dockerfile.golang --build-arg BACKEND=local-store .
+	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:local-store -f backend/Dockerfile.golang --build-arg BACKEND=local-store .
 
 docker-build-huggingface:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:huggingface -f backend/Dockerfile.golang --build-arg BACKEND=huggingface .
+	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:huggingface -f backend/Dockerfile.golang --build-arg BACKEND=huggingface .
 
 docker-build-rfdetr:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:rfdetr -f backend/Dockerfile.python --build-arg BACKEND=rfdetr .
+	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:rfdetr -f backend/Dockerfile.python --build-arg BACKEND=rfdetr .
 
 docker-build-kitten-tts:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:kitten-tts -f backend/Dockerfile.python --build-arg BACKEND=kitten-tts .
+	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:kitten-tts -f backend/Dockerfile.python --build-arg BACKEND=kitten-tts .
 
 docker-save-kitten-tts: backend-images
 	docker save local-ai-backend:kitten-tts -o backend-images/kitten-tts.tar
@@ -449,13 +451,13 @@ docker-save-chatterbox: backend-images
 	docker save local-ai-backend:chatterbox -o backend-images/chatterbox.tar
 
 docker-build-neutts:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:neutts -f backend/Dockerfile.python --build-arg BACKEND=neutts .
+	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:neutts -f backend/Dockerfile.python --build-arg BACKEND=neutts .
 
 docker-save-neutts: backend-images
 	docker save local-ai-backend:neutts -o backend-images/neutts.tar
 
 docker-build-kokoro:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:kokoro -f backend/Dockerfile.python --build-arg BACKEND=kokoro .
+	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:kokoro -f backend/Dockerfile.python --build-arg BACKEND=kokoro .
 
 docker-build-vllm:
 	docker build --build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) --build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:vllm -f backend/Dockerfile.python --build-arg BACKEND=vllm ./backend
@@ -476,7 +478,7 @@ docker-save-local-store: backend-images
 	docker save local-ai-backend:local-store -o backend-images/local-store.tar
 
 docker-build-silero-vad:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:silero-vad -f backend/Dockerfile.golang --build-arg BACKEND=silero-vad .
+	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:silero-vad -f backend/Dockerfile.golang --build-arg BACKEND=silero-vad .
 
 docker-save-silero-vad: backend-images
 	docker save local-ai-backend:silero-vad -o backend-images/silero-vad.tar
@@ -491,43 +493,46 @@ docker-save-bark-cpp: backend-images
 	docker save local-ai-backend:bark-cpp -o backend-images/bark-cpp.tar
 
 docker-build-stablediffusion-ggml:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:stablediffusion-ggml -f backend/Dockerfile.golang --build-arg BACKEND=stablediffusion-ggml .
+	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:stablediffusion-ggml -f backend/Dockerfile.golang --build-arg BACKEND=stablediffusion-ggml .
 
 docker-save-stablediffusion-ggml: backend-images
 	docker save local-ai-backend:stablediffusion-ggml -o backend-images/stablediffusion-ggml.tar
 
 docker-build-rerankers:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:rerankers -f backend/Dockerfile.python --build-arg BACKEND=rerankers .
+	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:rerankers -f backend/Dockerfile.python --build-arg BACKEND=rerankers .
+
+docker-build-vllm:
+	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:vllm -f backend/Dockerfile.python --build-arg BACKEND=vllm .
 
 docker-build-transformers:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:transformers -f backend/Dockerfile.python --build-arg BACKEND=transformers .
+	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:transformers -f backend/Dockerfile.python --build-arg BACKEND=transformers .
 
 docker-build-diffusers:
-	docker build --progress=plain --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:diffusers -f backend/Dockerfile.python --build-arg BACKEND=diffusers .
+	docker build --progress=plain --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:diffusers -f backend/Dockerfile.python --build-arg BACKEND=diffusers .
 
 docker-save-diffusers: backend-images
 	docker save local-ai-backend:diffusers -o backend-images/diffusers.tar
 
 docker-build-whisper:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:whisper -f backend/Dockerfile.golang --build-arg BACKEND=whisper  .
+	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:whisper -f backend/Dockerfile.golang --build-arg BACKEND=whisper  .
 
 docker-save-whisper: backend-images
 	docker save local-ai-backend:whisper -o backend-images/whisper.tar
 
 docker-build-faster-whisper:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:faster-whisper -f backend/Dockerfile.python --build-arg BACKEND=faster-whisper .
+	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:faster-whisper -f backend/Dockerfile.python --build-arg BACKEND=faster-whisper .
 
 docker-build-coqui:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:coqui -f backend/Dockerfile.python --build-arg BACKEND=coqui .
+	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:coqui -f backend/Dockerfile.python --build-arg BACKEND=coqui .
 
 docker-build-bark:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:bark -f backend/Dockerfile.python --build-arg BACKEND=bark .
+	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:bark -f backend/Dockerfile.python --build-arg BACKEND=bark .
 
 docker-build-chatterbox:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:chatterbox -f backend/Dockerfile.python --build-arg BACKEND=chatterbox .
+	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:chatterbox -f backend/Dockerfile.python --build-arg BACKEND=chatterbox .
 
 docker-build-exllama2:
-	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:exllama2 -f backend/Dockerfile.python --build-arg BACKEND=exllama2 .
+	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:exllama2 -f backend/Dockerfile.python --build-arg BACKEND=exllama2 .
 
 docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-transformers docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-bark docker-build-chatterbox docker-build-exllama2
 

From b466fcbe2b4e1950ddf0c27b1d3f7c05441d1082 Mon Sep 17 00:00:00 2001
From: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
Date: Wed, 3 Dec 2025 19:48:25 +0100
Subject: [PATCH 08/13] make: add backends/faster-whisper and
 docker-save-faster-whisper targets

---
 Makefile | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/Makefile b/Makefile
index 960061866c4e..30aa9a14bac3 100644
--- a/Makefile
+++ b/Makefile
@@ -363,6 +363,9 @@ backends/stablediffusion-ggml: docker-build-stablediffusion-ggml docker-save-sta
 backends/whisper: docker-build-whisper docker-save-whisper build
 	./local-ai backends install "ocifile://$(abspath ./backend-images/whisper.tar)"
 
+backends/faster-whisper: docker-build-faster-whisper docker-save-faster-whisper build
+	./local-ai backends install "ocifile://$(abspath ./backend-images/faster-whisper.tar)"
+
 backends/silero-vad: docker-build-silero-vad docker-save-silero-vad build
 	./local-ai backends install "ocifile://$(abspath ./backend-images/silero-vad.tar)"
 
@@ -522,6 +525,9 @@ docker-save-whisper: backend-images
 docker-build-faster-whisper:
 	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:faster-whisper -f backend/Dockerfile.python --build-arg BACKEND=faster-whisper .
 
+docker-save-faster-whisper:
+	docker save local-ai-backend:faster-whisper -o backend-images/faster-whisper.tar
+
 docker-build-coqui:
 	docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:coqui -f backend/Dockerfile.python --build-arg BACKEND=coqui .
 

From 89eb4fd4349aab1ef3cd65d7ec552a67a3d6d3c6 Mon Sep 17 00:00:00 2001
From: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
Date: Wed, 3 Dec 2025 19:51:57 +0100
Subject: [PATCH 09/13] backend: update backend Dockerfiles for Ubuntu 24.04

Align backend Dockerfiles to build with Ubuntu 24.04 base image.
---
 backend/Dockerfile.golang | 2 ++
 backend/Dockerfile.python | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/backend/Dockerfile.golang b/backend/Dockerfile.golang
index 8467521c0fdf..93c2b2ccaa47 100644
--- a/backend/Dockerfile.golang
+++ b/backend/Dockerfile.golang
@@ -142,6 +142,8 @@ EOT
 
 COPY . /LocalAI
 
+RUN git config --global --add safe.directory /LocalAI
+
 RUN cd /LocalAI && make protogen-go && make -C /LocalAI/backend/go/${BACKEND} build
 
 FROM scratch
diff --git a/backend/Dockerfile.python b/backend/Dockerfile.python
index d9a3e1afadb6..afd88efafaf8 100644
--- a/backend/Dockerfile.python
+++ b/backend/Dockerfile.python
@@ -32,7 +32,7 @@ RUN apt-get update && \
         python3-venv make cmake && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/* && \
-    pip install --upgrade pip
+    pip install --break-system-packages --user --upgrade pip
 
 
 # Cuda

From 612010685b2ca4c32351e5fc613922c70d04f8d3 Mon Sep 17 00:00:00 2001
From: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
Date: Wed, 3 Dec 2025 19:55:31 +0100
Subject: [PATCH 10/13] backend: add ROCm env vars and default AMDGPU_TARGETS
 for hipblas build in stablediffusion-ggml

Provide ROCM_HOME/ROCM_PATH and export CC/CXX to make possible to build
stablediffusion-ggml with ROCm libraries.
---
 backend/go/stablediffusion-ggml/Makefile | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/backend/go/stablediffusion-ggml/Makefile b/backend/go/stablediffusion-ggml/Makefile
index c1c22680b001..0dd7cb88a344 100644
--- a/backend/go/stablediffusion-ggml/Makefile
+++ b/backend/go/stablediffusion-ggml/Makefile
@@ -28,7 +28,12 @@ else ifeq ($(BUILD_TYPE),clblas)
 	CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path
 # If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++
 else ifeq ($(BUILD_TYPE),hipblas)
-	CMAKE_ARGS+=-DSD_HIPBLAS=ON -DGGML_HIPBLAS=ON
+	ROCM_HOME ?= /opt/rocm
+	ROCM_PATH ?= /opt/rocm
+	export CXX=$(ROCM_HOME)/llvm/bin/clang++
+	export CC=$(ROCM_HOME)/llvm/bin/clang
+	AMDGPU_TARGETS?=gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102,gfx1200,gfx1201
+	CMAKE_ARGS+=-DSD_HIPBLAS=ON -DGGML_HIPBLAS=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS)
 else ifeq ($(BUILD_TYPE),vulkan)
 	CMAKE_ARGS+=-DSD_VULKAN=ON -DGGML_VULKAN=ON
 else ifeq ($(OS),Darwin)

From fd7592b1a432cfd858254b1ef0b06fa69bd835c5 Mon Sep 17 00:00:00 2001
From: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
Date: Wed, 3 Dec 2025 19:56:06 +0100
Subject: [PATCH 11/13] chatterbox: bump ROCm PyTorch to 2.9.1+rocm6.4 and
 update index url

Align chatterbox hipblas requirements with ROCm 6.4-compatible wheels.
---
 backend/python/chatterbox/requirements-hipblas.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/backend/python/chatterbox/requirements-hipblas.txt b/backend/python/chatterbox/requirements-hipblas.txt
index 6c21992a7585..ed30fb824107 100644
--- a/backend/python/chatterbox/requirements-hipblas.txt
+++ b/backend/python/chatterbox/requirements-hipblas.txt
@@ -1,6 +1,6 @@
---extra-index-url https://download.pytorch.org/whl/rocm6.0
-torch==2.6.0+rocm6.1
-torchaudio==2.6.0+rocm6.1
+--extra-index-url https://download.pytorch.org/whl/rocm6.4
+torch==2.9.1+rocm6.4
+torchaudio==2.9.1+rocm6.4
 transformers
 numpy>=1.24.0,<1.26.0
 # https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289

From 58026e773f7de5ceb6338773e919d77f5ddb6c78 Mon Sep 17 00:00:00 2001
From: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
Date: Wed, 3 Dec 2025 20:36:16 +0100
Subject: [PATCH 12/13] misc: add local-ai-launcher to .gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index caae10a218a1..2ee2ab8588b1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,6 +25,7 @@ go-bert
 # LocalAI build binary
 LocalAI
 /local-ai
+/local-ai-launcher
 # prevent above rules from omitting the helm chart
 !charts/*
 # prevent above rules from omitting the api/localai folder

From 3ed9d65a081dff30589044716cc4c1bea7add115 Mon Sep 17 00:00:00 2001
From: Alessandro Sturniolo <alessandro.sturniolo@gmail.com>
Date: Wed, 3 Dec 2025 21:27:29 +0100
Subject: [PATCH 13/13] ci: fix GitHub Actions backends workflows after rebase

---
 .github/workflows/backend.yml | 80 +++++------------------------------
 1 file changed, 10 insertions(+), 70 deletions(-)

diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml
index e03913741e14..c6ec48315d27 100644
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -39,7 +39,6 @@ jobs:
       #max-parallel: ${{ github.event_name != 'pull_request' && 6 || 4 }}
       matrix:
         include:
-          # CUDA 11 builds
           - build-type: 'l4t'
             cuda-major-version: "12"
             cuda-minor-version: "9"
@@ -64,7 +63,7 @@ jobs:
             skip-drivers: 'true'
             backend: "diffusers"
             dockerfile: "./backend/Dockerfile.python"
-            context: "./""
+            context: "./"
             ubuntu-version: '2404'
           - build-type: ''
             cuda-major-version: ""
@@ -78,6 +77,7 @@ jobs:
             backend: "chatterbox"
             dockerfile: "./backend/Dockerfile.python"
             context: "./"
+            ubuntu-version: '2404'
           # CUDA 12 builds
           - build-type: 'cublas'
             cuda-major-version: "12"
@@ -211,7 +211,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml'
@@ -224,7 +224,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-whisper'
@@ -237,7 +237,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-rfdetr'
@@ -250,7 +250,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-exllama2'
@@ -263,7 +263,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'cublas'
             cuda-major-version: "12"
-            cuda-minor-version: "0"
+            cuda-minor-version: "9"
             platforms: 'linux/amd64'
             tag-latest: 'auto'
             tag-suffix: '-gpu-nvidia-cuda-12-neutts'
@@ -670,7 +670,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "12"
-            cuda-minor-version: "9"
+            cuda-minor-version: "0"
             platforms: 'linux/arm64'
             tag-latest: 'auto'
             tag-suffix: '-nvidia-l4t-kokoro'
@@ -815,18 +815,6 @@ jobs:
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
             ubuntu-version: '2404'
-          - build-type: 'cublas'
-            cuda-major-version: "12"
-            cuda-minor-version: "9"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:24.04"
-            skip-drivers: 'false'
-            backend: "stablediffusion-ggml"
-            dockerfile: "./backend/Dockerfile.golang"
-            context: "./"
           - build-type: 'sycl_f32'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -893,18 +881,6 @@ jobs:
             dockerfile: "./backend/Dockerfile.golang"
             context: "./"
             ubuntu-version: '2404'
-          - build-type: 'cublas'
-            cuda-major-version: "12"
-            cuda-minor-version: "9"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-12-whisper'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:24.04"
-            skip-drivers: 'false'
-            backend: "whisper"
-            dockerfile: "./backend/Dockerfile.golang"
-            context: "./"
           - build-type: 'sycl_f32'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1026,18 +1002,6 @@ jobs:
             dockerfile: "./backend/Dockerfile.python"
             context: "./"
             ubuntu-version: '2404'
-          - build-type: 'cublas'
-            cuda-major-version: "12"
-            cuda-minor-version: "9"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-12-rfdetr'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:24.04"
-            skip-drivers: 'false'
-            backend: "rfdetr"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./"
           - build-type: 'intel'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1078,18 +1042,6 @@ jobs:
             dockerfile: "./backend/Dockerfile.python"
             context: "./"
             ubuntu-version: '2404'
-          - build-type: 'cublas'
-            cuda-major-version: "12"
-            cuda-minor-version: "9"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-12-exllama2'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:24.04"
-            skip-drivers: 'false'
-            backend: "exllama2"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./"
           - build-type: 'intel'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1118,7 +1070,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "12"
-            cuda-minor-version: "9"
+            cuda-minor-version: "0"
             platforms: 'linux/arm64'
             skip-drivers: 'true'
             tag-latest: 'auto'
@@ -1170,18 +1122,6 @@ jobs:
             dockerfile: "./backend/Dockerfile.python"
             context: "./"
             ubuntu-version: '2404'
-          - build-type: 'cublas'
-            cuda-major-version: "12"
-            cuda-minor-version: "9"
-            platforms: 'linux/amd64'
-            tag-latest: 'auto'
-            tag-suffix: '-gpu-nvidia-cuda-12-neutts'
-            runs-on: 'ubuntu-latest'
-            base-image: "ubuntu:24.04"
-            skip-drivers: 'false'
-            backend: "neutts"
-            dockerfile: "./backend/Dockerfile.python"
-            context: "./"
           - build-type: 'hipblas'
             cuda-major-version: ""
             cuda-minor-version: ""
@@ -1197,7 +1137,7 @@ jobs:
             ubuntu-version: '2404'
           - build-type: 'l4t'
             cuda-major-version: "12"
-            cuda-minor-version: "9"
+            cuda-minor-version: "0"
             platforms: 'linux/arm64'
             skip-drivers: 'true'
             tag-latest: 'auto'