From 8499149029b5c4bf6de407c682f8a70703335a58 Mon Sep 17 00:00:00 2001 From: Alessandro Sturniolo Date: Wed, 3 Dec 2025 13:23:07 +0100 Subject: [PATCH 01/13] ci: bump GitHub Actions images to Ubuntu 24.04 Update GitHub Actions workflows to use ubuntu:24.04 where applicable. --- .github/workflows/backend.yml | 324 ++++++++++++-------- .github/workflows/generate_grpc_cache.yaml | 2 +- .github/workflows/generate_intel_image.yaml | 2 +- .github/workflows/image-pr.yml | 10 +- .github/workflows/image.yml | 12 +- 5 files changed, 205 insertions(+), 145 deletions(-) diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index a18068c754f5..2dd8871fcecf 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -52,7 +52,7 @@ jobs: backend: "rerankers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -65,7 +65,7 @@ jobs: backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -78,7 +78,7 @@ jobs: backend: "transformers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -91,7 +91,7 @@ jobs: backend: "diffusers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "12" cuda-minor-version: "0" @@ -104,7 +104,7 @@ jobs: backend: "diffusers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: '' cuda-major-version: "" cuda-minor-version: "" @@ -112,12 +112,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-cpu-diffusers' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'true' backend: "diffusers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: '' cuda-major-version: "" cuda-minor-version: "" @@ -125,12 +125,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-cpu-chatterbox' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'true' backend: "chatterbox" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' # CUDA 11 additional backends - build-type: 'cublas' cuda-major-version: "11" @@ -144,7 +144,7 @@ jobs: backend: "kokoro" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -157,7 +157,7 @@ jobs: backend: "faster-whisper" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -170,7 +170,7 @@ jobs: backend: "coqui" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -183,7 +183,7 @@ jobs: backend: "bark" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -196,7 +196,7 @@ jobs: backend: "chatterbox" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' # CUDA 12 builds - build-type: 'cublas' cuda-major-version: "12" @@ -205,12 +205,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-rerankers' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "rerankers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -218,12 +218,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -231,12 +231,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-vllm' runs-on: 'arc-runner-set' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "vllm" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -244,12 +244,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-transformers' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "transformers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -257,12 +257,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-diffusers' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "diffusers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -270,12 +270,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-kokoro' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "kokoro" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -283,12 +283,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-faster-whisper' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "faster-whisper" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -296,12 +296,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-coqui' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "coqui" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -309,12 +309,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-bark' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "bark" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -322,12 +322,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-chatterbox' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "chatterbox" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -340,7 +340,7 @@ jobs: backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -353,7 +353,7 @@ jobs: backend: "whisper" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -366,7 +366,7 @@ jobs: backend: "rfdetr" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -379,7 +379,7 @@ jobs: backend: "exllama2" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -392,7 +392,7 @@ jobs: backend: "neutts" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' # cuda 13 - build-type: 'cublas' cuda-major-version: "13" @@ -406,7 +406,7 @@ jobs: backend: "rerankers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -419,7 +419,7 @@ jobs: backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -445,7 +445,7 @@ jobs: backend: "transformers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -458,7 +458,7 @@ jobs: backend: "diffusers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "13" cuda-minor-version: "0" @@ -484,7 +484,7 @@ jobs: backend: "kokoro" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -497,7 +497,7 @@ jobs: backend: "faster-whisper" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -510,7 +510,7 @@ jobs: backend: "bark" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -523,7 +523,7 @@ jobs: backend: "chatterbox" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -536,7 +536,7 @@ jobs: backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -562,7 +562,7 @@ jobs: backend: "whisper" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -588,7 +588,7 @@ jobs: backend: "rfdetr" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' # hipblas builds - build-type: 'hipblas' cuda-major-version: "" @@ -597,12 +597,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-rerankers' runs-on: 'ubuntu-latest' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" skip-drivers: 'false' backend: "rerankers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -610,12 +610,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-llama-cpp' runs-on: 'ubuntu-latest' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" skip-drivers: 'false' backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -623,12 +623,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-vllm' runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" skip-drivers: 'false' backend: "vllm" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -636,12 +636,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-transformers' runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" skip-drivers: 'false' backend: "transformers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -649,12 +649,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-diffusers' runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" skip-drivers: 'false' backend: "diffusers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' # ROCm additional backends - build-type: 'hipblas' cuda-major-version: "" @@ -663,12 +663,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-kokoro' runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" skip-drivers: 'false' backend: "kokoro" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -676,12 +676,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-faster-whisper' runs-on: 'ubuntu-latest' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" skip-drivers: 'false' backend: "faster-whisper" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -689,12 +689,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-coqui' runs-on: 'ubuntu-latest' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" skip-drivers: 'false' backend: "coqui" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -702,12 +702,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-bark' runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" skip-drivers: 'false' backend: "bark" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' # sycl builds - build-type: 'intel' cuda-major-version: "" @@ -721,7 +721,7 @@ jobs: backend: "rerankers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'sycl_f32' cuda-major-version: "" cuda-minor-version: "" @@ -734,7 +734,7 @@ jobs: backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'sycl_f16' cuda-major-version: "" cuda-minor-version: "" @@ -747,7 +747,7 @@ jobs: backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" cuda-minor-version: "" @@ -760,7 +760,7 @@ jobs: backend: "vllm" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" cuda-minor-version: "" @@ -773,7 +773,7 @@ jobs: backend: "transformers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" cuda-minor-version: "" @@ -786,7 +786,7 @@ jobs: backend: "diffusers" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "12" cuda-minor-version: "0" @@ -799,7 +799,7 @@ jobs: backend: "kokoro" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' # SYCL additional backends - build-type: 'intel' cuda-major-version: "" @@ -813,7 +813,7 @@ jobs: backend: "kokoro" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" cuda-minor-version: "" @@ -826,7 +826,7 @@ jobs: backend: "faster-whisper" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" cuda-minor-version: "" @@ -839,7 +839,7 @@ jobs: backend: "coqui" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" cuda-minor-version: "" @@ -852,7 +852,7 @@ jobs: backend: "bark" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' # piper - build-type: '' cuda-major-version: "" @@ -861,12 +861,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-piper' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "piper" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' # bark-cpp - build-type: '' cuda-major-version: "" @@ -875,12 +875,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-bark-cpp' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "bark-cpp" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: '' cuda-major-version: "" cuda-minor-version: "" @@ -888,12 +888,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-cpu-llama-cpp' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -906,7 +906,7 @@ jobs: backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'vulkan' cuda-major-version: "" cuda-minor-version: "" @@ -914,12 +914,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-vulkan-llama-cpp' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' # Stablediffusion-ggml - build-type: '' cuda-major-version: "" @@ -928,12 +928,24 @@ jobs: tag-latest: 'auto' tag-suffix: '-cpu-stablediffusion-ggml' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "stablediffusion-ggml" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + ubuntu-version: '2404' + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -946,7 +958,7 @@ jobs: backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'sycl_f32' cuda-major-version: "" cuda-minor-version: "" @@ -959,7 +971,7 @@ jobs: backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'sycl_f16' cuda-major-version: "" cuda-minor-version: "" @@ -972,7 +984,7 @@ jobs: backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'vulkan' cuda-major-version: "" cuda-minor-version: "" @@ -980,12 +992,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-vulkan-stablediffusion-ggml' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -998,7 +1010,7 @@ jobs: backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' # whisper - build-type: '' cuda-major-version: "" @@ -1007,12 +1019,24 @@ jobs: tag-latest: 'auto' tag-suffix: '-cpu-whisper' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "whisper" + dockerfile: "./backend/Dockerfile.golang" + context: "./" + ubuntu-version: '2404' + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-12-whisper' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "whisper" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -1025,7 +1049,7 @@ jobs: backend: "whisper" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'sycl_f32' cuda-major-version: "" cuda-minor-version: "" @@ -1038,7 +1062,7 @@ jobs: backend: "whisper" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'sycl_f16' cuda-major-version: "" cuda-minor-version: "" @@ -1051,7 +1075,7 @@ jobs: backend: "whisper" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'vulkan' cuda-major-version: "" cuda-minor-version: "" @@ -1059,12 +1083,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-vulkan-whisper' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "whisper" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" @@ -1077,20 +1101,20 @@ jobs: backend: "whisper" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-whisper' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" runs-on: 'ubuntu-latest' skip-drivers: 'false' backend: "whisper" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' #silero-vad - build-type: '' cuda-major-version: "" @@ -1099,12 +1123,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-cpu-silero-vad' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "silero-vad" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' # local-store - build-type: '' cuda-major-version: "" @@ -1113,12 +1137,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-cpu-local-store' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "local-store" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' # huggingface - build-type: '' cuda-major-version: "" @@ -1127,12 +1151,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-huggingface' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "huggingface" dockerfile: "./backend/Dockerfile.golang" context: "./" - ubuntu-version: '2204' + ubuntu-version: '2404' # rfdetr - build-type: '' cuda-major-version: "" @@ -1141,12 +1165,24 @@ jobs: tag-latest: 'auto' tag-suffix: '-cpu-rfdetr' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "rfdetr" + dockerfile: "./backend/Dockerfile.python" + context: "./backend" + ubuntu-version: '2404' + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-12-rfdetr' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "rfdetr" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -1159,7 +1195,7 @@ jobs: backend: "rfdetr" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" cuda-minor-version: "" @@ -1172,7 +1208,7 @@ jobs: backend: "rfdetr" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "12" cuda-minor-version: "0" @@ -1185,7 +1221,7 @@ jobs: backend: "rfdetr" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' # exllama2 - build-type: '' cuda-major-version: "" @@ -1194,12 +1230,24 @@ jobs: tag-latest: 'auto' tag-suffix: '-cpu-exllama2' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "exllama2" + dockerfile: "./backend/Dockerfile.python" + context: "./backend" + ubuntu-version: '2404' + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-12-exllama2' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "exllama2" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" @@ -1212,7 +1260,7 @@ jobs: backend: "exllama2" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" cuda-minor-version: "" @@ -1225,7 +1273,7 @@ jobs: backend: "exllama2" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -1233,12 +1281,12 @@ jobs: skip-drivers: 'true' tag-latest: 'auto' tag-suffix: '-gpu-hipblas-exllama2' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" runs-on: 'ubuntu-latest' backend: "exllama2" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "12" cuda-minor-version: "0" @@ -1251,7 +1299,7 @@ jobs: backend: "chatterbox" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' # runs out of space on the runner # - build-type: 'hipblas' # cuda-major-version: "" @@ -1259,7 +1307,7 @@ jobs: # platforms: 'linux/amd64' # tag-latest: 'auto' # tag-suffix: '-gpu-hipblas-rfdetr' - # base-image: "rocm/dev-ubuntu-22.04:6.4.3" + # base-image: "rocm/dev-ubuntu-24.04:6.4.4" # runs-on: 'ubuntu-latest' # skip-drivers: 'false' # backend: "rfdetr" @@ -1273,12 +1321,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-kitten-tts' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "kitten-tts" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' # neutts - build-type: '' cuda-major-version: "" @@ -1287,12 +1335,24 @@ jobs: tag-latest: 'auto' tag-suffix: '-cpu-neutts' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" + skip-drivers: 'false' + backend: "neutts" + dockerfile: "./backend/Dockerfile.python" + context: "./backend" + ubuntu-version: '2404' + - build-type: 'cublas' + cuda-major-version: "12" + cuda-minor-version: "0" + platforms: 'linux/amd64' + tag-latest: 'auto' + tag-suffix: '-gpu-nvidia-cuda-12-neutts' + runs-on: 'ubuntu-latest' + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "neutts" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -1300,12 +1360,12 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-rocm-hipblas-neutts' runs-on: 'arc-runner-set' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" skip-drivers: 'false' backend: "neutts" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "12" cuda-minor-version: "0" @@ -1318,7 +1378,7 @@ jobs: backend: "neutts" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2204' + ubuntu-version: '2404' backend-jobs-darwin: uses: ./.github/workflows/backend_build_darwin.yml strategy: diff --git a/.github/workflows/generate_grpc_cache.yaml b/.github/workflows/generate_grpc_cache.yaml index feadf0948bdc..72a2b306741d 100644 --- a/.github/workflows/generate_grpc_cache.yaml +++ b/.github/workflows/generate_grpc_cache.yaml @@ -16,7 +16,7 @@ jobs: strategy: matrix: include: - - grpc-base-image: ubuntu:22.04 + - grpc-base-image: ubuntu:24.04 runs-on: 'ubuntu-latest' platforms: 'linux/amd64,linux/arm64' runs-on: ${{matrix.runs-on}} diff --git a/.github/workflows/generate_intel_image.yaml b/.github/workflows/generate_intel_image.yaml index 5c0160addb38..0dc47da211ec 100644 --- a/.github/workflows/generate_intel_image.yaml +++ b/.github/workflows/generate_intel_image.yaml @@ -15,7 +15,7 @@ jobs: strategy: matrix: include: - - base-image: intel/oneapi-basekit:2025.2.0-0-devel-ubuntu22.04 + - base-image: intel/oneapi-basekit:2025.3.0-0-devel-ubuntu24.04 runs-on: 'arc-runner-set' platforms: 'linux/amd64' runs-on: ${{matrix.runs-on}} diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml index 84ffa5a1320c..055f26036cfd 100644 --- a/.github/workflows/image-pr.yml +++ b/.github/workflows/image-pr.yml @@ -42,7 +42,7 @@ jobs: tag-latest: 'false' tag-suffix: '-gpu-nvidia-cuda-12' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" makeflags: "--jobs=3 --output-sync=target" ubuntu-version: '2204' - build-type: 'cublas' @@ -59,8 +59,8 @@ jobs: platforms: 'linux/amd64' tag-latest: 'false' tag-suffix: '-hipblas' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" - grpc-base-image: "ubuntu:22.04" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" + grpc-base-image: "ubuntu:24.04" runs-on: 'ubuntu-latest' makeflags: "--jobs=3 --output-sync=target" ubuntu-version: '2204' @@ -68,7 +68,7 @@ jobs: platforms: 'linux/amd64' tag-latest: 'false' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" - grpc-base-image: "ubuntu:22.04" + grpc-base-image: "ubuntu:24.04" tag-suffix: 'sycl' runs-on: 'ubuntu-latest' makeflags: "--jobs=3 --output-sync=target" @@ -78,7 +78,7 @@ jobs: tag-latest: 'false' tag-suffix: '-vulkan-core' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" makeflags: "--jobs=4 --output-sync=target" ubuntu-version: '2204' - build-type: 'cublas' diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 7389760912c5..8c67434f9d1a 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -40,8 +40,8 @@ jobs: platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-hipblas' - base-image: "rocm/dev-ubuntu-22.04:6.4.3" - grpc-base-image: "ubuntu:22.04" + base-image: "rocm/dev-ubuntu-24.04:6.4.4" + grpc-base-image: "ubuntu:24.04" runs-on: 'ubuntu-latest' makeflags: "--jobs=3 --output-sync=target" aio: "-aio-gpu-hipblas" @@ -76,7 +76,7 @@ jobs: platforms: 'linux/amd64,linux/arm64' tag-latest: 'auto' tag-suffix: '' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" runs-on: 'ubuntu-latest' aio: "-aio-cpu" makeflags: "--jobs=4 --output-sync=target" @@ -101,7 +101,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' makeflags: "--jobs=4 --output-sync=target" aio: "-aio-gpu-nvidia-cuda-12" @@ -123,7 +123,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-vulkan' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' makeflags: "--jobs=4 --output-sync=target" aio: "-aio-gpu-vulkan" @@ -132,7 +132,7 @@ jobs: platforms: 'linux/amd64' tag-latest: 'auto' base-image: "quay.io/go-skynet/intel-oneapi-base:latest" - grpc-base-image: "ubuntu:22.04" + grpc-base-image: "ubuntu:24.04" tag-suffix: '-gpu-intel' runs-on: 'ubuntu-latest' makeflags: "--jobs=3 --output-sync=target" From c17e27f03d48a701150edff7ba15f8c2ad0581ea Mon Sep 17 00:00:00 2001 From: Alessandro Sturniolo Date: Wed, 3 Dec 2025 13:27:37 +0100 Subject: [PATCH 02/13] ci: removes CUDA 11.x support from GitHub Actions Removes CUDA 11 support from GitHub Actions workflows because it is no longer supported in Ubuntu 24.04. --- .github/workflows/backend.yml | 171 ---------------------------------- .github/workflows/image.yml | 12 --- 2 files changed, 183 deletions(-) diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index 2dd8871fcecf..875cfaf2d61d 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -40,58 +40,6 @@ jobs: matrix: include: # CUDA 11 builds - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-rerankers' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "rerankers" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2404' - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-llama-cpp' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "llama-cpp" - dockerfile: "./backend/Dockerfile.llama-cpp" - context: "./" - ubuntu-version: '2404' - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-transformers' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "transformers" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2404' - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-diffusers' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "diffusers" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "12" cuda-minor-version: "0" @@ -130,73 +78,6 @@ jobs: backend: "chatterbox" dockerfile: "./backend/Dockerfile.python" context: "./backend" - ubuntu-version: '2404' - # CUDA 11 additional backends - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-kokoro' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "kokoro" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2404' - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-faster-whisper' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "faster-whisper" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2404' - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-coqui' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "coqui" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2404' - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-bark' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "bark" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2404' - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-chatterbox' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "chatterbox" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2404' # CUDA 12 builds - build-type: 'cublas' cuda-major-version: "12" @@ -946,19 +827,6 @@ jobs: backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.golang" context: "./" - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-stablediffusion-ggml' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "stablediffusion-ggml" - dockerfile: "./backend/Dockerfile.golang" - context: "./" - ubuntu-version: '2404' - build-type: 'sycl_f32' cuda-major-version: "" cuda-minor-version: "" @@ -1037,19 +905,6 @@ jobs: backend: "whisper" dockerfile: "./backend/Dockerfile.golang" context: "./" - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-whisper' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "whisper" - dockerfile: "./backend/Dockerfile.golang" - context: "./" - ubuntu-version: '2404' - build-type: 'sycl_f32' cuda-major-version: "" cuda-minor-version: "" @@ -1183,19 +1038,6 @@ jobs: backend: "rfdetr" dockerfile: "./backend/Dockerfile.python" context: "./backend" - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-rfdetr' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "rfdetr" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" cuda-minor-version: "" @@ -1248,19 +1090,6 @@ jobs: backend: "exllama2" dockerfile: "./backend/Dockerfile.python" context: "./backend" - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11-exllama2' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - skip-drivers: 'false' - backend: "exllama2" - dockerfile: "./backend/Dockerfile.python" - context: "./backend" - ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" cuda-minor-version: "" diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 8c67434f9d1a..ab69c98ac3c4 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -82,18 +82,6 @@ jobs: makeflags: "--jobs=4 --output-sync=target" skip-drivers: 'false' ubuntu-version: '2204' - - build-type: 'cublas' - cuda-major-version: "11" - cuda-minor-version: "7" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-11' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" - makeflags: "--jobs=4 --output-sync=target" - skip-drivers: 'false' - aio: "-aio-gpu-nvidia-cuda-11" - ubuntu-version: '2204' - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "0" From 8e80a19b9462608eeda486336dc8160a91dfd0b9 Mon Sep 17 00:00:00 2001 From: Alessandro Sturniolo Date: Wed, 3 Dec 2025 13:29:46 +0100 Subject: [PATCH 03/13] ci: bump GitHub Actions images CUDA support to 12.9 Update GitHub Actions workflows to use CUDA 12.9 where applicable. --- .github/workflows/backend.yml | 44 +++++++++++++++---------------- .github/workflows/image-pr.yml | 2 +- .github/workflows/image.yml | 4 +-- .github/workflows/image_build.yml | 2 +- 4 files changed, 26 insertions(+), 26 deletions(-) diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index 875cfaf2d61d..6e2da168e95d 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -42,7 +42,7 @@ jobs: # CUDA 11 builds - build-type: 'l4t' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/arm64' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-diffusers' @@ -81,7 +81,7 @@ jobs: # CUDA 12 builds - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-rerankers' @@ -94,7 +94,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-llama-cpp' @@ -107,7 +107,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-vllm' @@ -120,7 +120,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-transformers' @@ -133,7 +133,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-diffusers' @@ -146,7 +146,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-kokoro' @@ -159,7 +159,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-faster-whisper' @@ -172,7 +172,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-coqui' @@ -185,7 +185,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-bark' @@ -198,7 +198,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-chatterbox' @@ -670,7 +670,7 @@ jobs: ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/arm64' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-kokoro' @@ -777,7 +777,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/arm64' skip-drivers: 'true' tag-latest: 'auto' @@ -817,7 +817,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml' @@ -868,7 +868,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/arm64' skip-drivers: 'true' tag-latest: 'auto' @@ -895,7 +895,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-whisper' @@ -946,7 +946,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/arm64' skip-drivers: 'true' tag-latest: 'auto' @@ -1028,7 +1028,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-rfdetr' @@ -1080,7 +1080,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-exllama2' @@ -1118,7 +1118,7 @@ jobs: ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/arm64' skip-drivers: 'true' tag-latest: 'auto' @@ -1172,7 +1172,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-neutts' @@ -1197,7 +1197,7 @@ jobs: ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/arm64' skip-drivers: 'true' tag-latest: 'auto' diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml index 055f26036cfd..2db9e5cbafa4 100644 --- a/.github/workflows/image-pr.yml +++ b/.github/workflows/image-pr.yml @@ -37,7 +37,7 @@ jobs: include: - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'false' tag-suffix: '-gpu-nvidia-cuda-12' diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index ab69c98ac3c4..ad8ce97bcd4d 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -84,7 +84,7 @@ jobs: ubuntu-version: '2204' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12' @@ -153,7 +153,7 @@ jobs: include: - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/arm64' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-arm64' diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index 31a1f2310ea4..39cfa1401052 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -23,7 +23,7 @@ on: type: string cuda-minor-version: description: 'CUDA minor version' - default: "4" + default: "9" type: string platforms: description: 'Platforms' From 56dae448c853bd37cc2de33b1c8d33cab123ba3b Mon Sep 17 00:00:00 2001 From: Alessandro Sturniolo Date: Wed, 3 Dec 2025 15:00:22 +0100 Subject: [PATCH 04/13] docker: bump base image to ubuntu:24.04 and adjust packages Change base images to ubuntu:24.04, update Vulkan SDK and package names to match the Ubuntu 24.04 repositories. --- Dockerfile | 40 ++++++++++++++++++++++++++---------- Dockerfile.aio | 2 +- Makefile | 2 +- backend/Dockerfile.golang | 37 +++++++++++++++++++++++++-------- backend/Dockerfile.llama-cpp | 35 +++++++++++++++++++++++-------- backend/Dockerfile.python | 35 +++++++++++++++++++++++-------- docker-compose.yaml | 2 +- 7 files changed, 114 insertions(+), 39 deletions(-) diff --git a/Dockerfile b/Dockerfile index a253237c974c..8c20a702e65c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -ARG BASE_IMAGE=ubuntu:22.04 +ARG BASE_IMAGE=ubuntu:24.04 ARG GRPC_BASE_IMAGE=${BASE_IMAGE} ARG INTEL_BASE_IMAGE=${BASE_IMAGE} @@ -9,7 +9,7 @@ ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && \ apt-get install -y --no-install-recommends \ ca-certificates curl wget espeak-ng libgomp1 \ - ffmpeg && \ + ffmpeg libopenblas0 libopenblas-dev libquadmath0 && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* @@ -34,11 +34,30 @@ RUN < /run/localai/capability @@ -141,13 +160,12 @@ ENV PATH=/opt/rocm/bin:${PATH} # The requirements-core target is common to all images. It should not be placed in requirements-core unless every single build will use it. FROM requirements-drivers AS build-requirements -ARG GO_VERSION=1.22.6 -ARG CMAKE_VERSION=3.26.4 +ARG GO_VERSION=1.25.4 +ARG CMAKE_VERSION=3.28.3 ARG CMAKE_FROM_SOURCE=false ARG TARGETARCH ARG TARGETVARIANT - RUN apt-get update && \ apt-get install -y --no-install-recommends \ build-essential \ @@ -206,7 +224,7 @@ WORKDIR /build FROM ${INTEL_BASE_IMAGE} AS intel RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \ gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg -RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy/lts/2350 unified" > /etc/apt/sources.list.d/intel-graphics.list +RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu noble/lts/2350 unified" > /etc/apt/sources.list.d/intel-graphics.list RUN apt-get update && \ apt-get install -y --no-install-recommends \ intel-oneapi-runtime-libs && \ diff --git a/Dockerfile.aio b/Dockerfile.aio index 81063bb4dbeb..ccc2fc94b9ed 100644 --- a/Dockerfile.aio +++ b/Dockerfile.aio @@ -1,4 +1,4 @@ -ARG BASE_IMAGE=ubuntu:22.04 +ARG BASE_IMAGE=ubuntu:24.04 FROM ${BASE_IMAGE} diff --git a/Makefile b/Makefile index 1f855b02a673..450295fcc348 100644 --- a/Makefile +++ b/Makefile @@ -297,7 +297,7 @@ test-extra: prepare-test-extra DOCKER_IMAGE?=local-ai DOCKER_AIO_IMAGE?=local-ai-aio IMAGE_TYPE?=core -BASE_IMAGE?=ubuntu:22.04 +BASE_IMAGE?=ubuntu:24.04 docker: docker build \ diff --git a/backend/Dockerfile.golang b/backend/Dockerfile.golang index dbfee61e2902..8467521c0fdf 100644 --- a/backend/Dockerfile.golang +++ b/backend/Dockerfile.golang @@ -1,4 +1,4 @@ -ARG BASE_IMAGE=ubuntu:22.04 +ARG BASE_IMAGE=ubuntu:24.04 FROM ${BASE_IMAGE} AS builder ARG BACKEND=rerankers @@ -12,7 +12,7 @@ ENV CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} ENV DEBIAN_FRONTEND=noninteractive ARG TARGETARCH ARG TARGETVARIANT -ARG GO_VERSION=1.22.6 +ARG GO_VERSION=1.25.4 RUN apt-get update && \ apt-get install -y --no-install-recommends \ @@ -38,11 +38,30 @@ RUN < Date: Wed, 3 Dec 2025 19:39:44 +0100 Subject: [PATCH 05/13] backend: Fix context for Python backends Fix context path for all Python backends in GitHub Actions workflows, Makefile and Python backends Dockerfile. --- .github/workflows/backend.yml | 144 +++++++++++++++++----------------- Makefile | 12 +-- backend/Dockerfile.python | 6 +- 3 files changed, 81 insertions(+), 81 deletions(-) diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index 6e2da168e95d..e03913741e14 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -51,7 +51,7 @@ jobs: skip-drivers: 'true' backend: "diffusers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: '' cuda-major-version: "" @@ -64,7 +64,7 @@ jobs: skip-drivers: 'true' backend: "diffusers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./"" ubuntu-version: '2404' - build-type: '' cuda-major-version: "" @@ -77,7 +77,7 @@ jobs: skip-drivers: 'true' backend: "chatterbox" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" # CUDA 12 builds - build-type: 'cublas' cuda-major-version: "12" @@ -90,7 +90,7 @@ jobs: skip-drivers: 'false' backend: "rerankers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" @@ -116,7 +116,7 @@ jobs: skip-drivers: 'false' backend: "vllm" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" @@ -129,7 +129,7 @@ jobs: skip-drivers: 'false' backend: "transformers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" @@ -142,7 +142,7 @@ jobs: skip-drivers: 'false' backend: "diffusers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" @@ -155,7 +155,7 @@ jobs: skip-drivers: 'false' backend: "kokoro" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" @@ -168,7 +168,7 @@ jobs: skip-drivers: 'false' backend: "faster-whisper" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" @@ -181,7 +181,7 @@ jobs: skip-drivers: 'false' backend: "coqui" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" @@ -194,7 +194,7 @@ jobs: skip-drivers: 'false' backend: "bark" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" @@ -207,7 +207,7 @@ jobs: skip-drivers: 'false' backend: "chatterbox" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" @@ -216,7 +216,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.golang" @@ -229,7 +229,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-whisper' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "whisper" dockerfile: "./backend/Dockerfile.golang" @@ -242,11 +242,11 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-rfdetr' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "rfdetr" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" @@ -255,11 +255,11 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-exllama2' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "exllama2" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" @@ -268,11 +268,11 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-neutts' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "neutts" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' # cuda 13 - build-type: 'cublas' @@ -282,11 +282,11 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-13-rerankers' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "rerankers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" @@ -295,7 +295,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-13-llama-cpp' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "llama-cpp" dockerfile: "./backend/Dockerfile.llama-cpp" @@ -321,11 +321,11 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-13-transformers' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "transformers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" @@ -334,11 +334,11 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-13-diffusers' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "diffusers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "13" @@ -352,7 +352,7 @@ jobs: ubuntu-version: '2404' backend: "diffusers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" - build-type: 'cublas' cuda-major-version: "13" cuda-minor-version: "0" @@ -360,11 +360,11 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-13-kokoro' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "kokoro" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" @@ -373,11 +373,11 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-13-faster-whisper' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "faster-whisper" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" @@ -386,11 +386,11 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-13-bark' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "bark" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" @@ -399,11 +399,11 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-13-chatterbox' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "chatterbox" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "13" @@ -412,7 +412,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-13-stablediffusion-ggml' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "stablediffusion-ggml" dockerfile: "./backend/Dockerfile.golang" @@ -438,7 +438,7 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-13-whisper' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "whisper" dockerfile: "./backend/Dockerfile.golang" @@ -464,11 +464,11 @@ jobs: tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-13-rfdetr' runs-on: 'ubuntu-latest' - base-image: "ubuntu:22.04" + base-image: "ubuntu:24.04" skip-drivers: 'false' backend: "rfdetr" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' # hipblas builds - build-type: 'hipblas' @@ -482,7 +482,7 @@ jobs: skip-drivers: 'false' backend: "rerankers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" @@ -508,7 +508,7 @@ jobs: skip-drivers: 'false' backend: "vllm" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" @@ -521,7 +521,7 @@ jobs: skip-drivers: 'false' backend: "transformers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" @@ -534,7 +534,7 @@ jobs: skip-drivers: 'false' backend: "diffusers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' # ROCm additional backends - build-type: 'hipblas' @@ -548,7 +548,7 @@ jobs: skip-drivers: 'false' backend: "kokoro" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" @@ -561,7 +561,7 @@ jobs: skip-drivers: 'false' backend: "faster-whisper" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" @@ -574,7 +574,7 @@ jobs: skip-drivers: 'false' backend: "coqui" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" @@ -587,7 +587,7 @@ jobs: skip-drivers: 'false' backend: "bark" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' # sycl builds - build-type: 'intel' @@ -601,7 +601,7 @@ jobs: skip-drivers: 'false' backend: "rerankers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'sycl_f32' cuda-major-version: "" @@ -640,7 +640,7 @@ jobs: skip-drivers: 'false' backend: "vllm" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" @@ -653,7 +653,7 @@ jobs: skip-drivers: 'false' backend: "transformers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" @@ -666,7 +666,7 @@ jobs: skip-drivers: 'false' backend: "diffusers" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "12" @@ -679,7 +679,7 @@ jobs: skip-drivers: 'true' backend: "kokoro" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' # SYCL additional backends - build-type: 'intel' @@ -693,7 +693,7 @@ jobs: skip-drivers: 'false' backend: "kokoro" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" @@ -706,7 +706,7 @@ jobs: skip-drivers: 'false' backend: "faster-whisper" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" @@ -719,7 +719,7 @@ jobs: skip-drivers: 'false' backend: "coqui" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'intel' cuda-major-version: "" @@ -732,7 +732,7 @@ jobs: skip-drivers: 'false' backend: "bark" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' # piper - build-type: '' @@ -1024,7 +1024,7 @@ jobs: skip-drivers: 'false' backend: "rfdetr" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" @@ -1037,7 +1037,7 @@ jobs: skip-drivers: 'false' backend: "rfdetr" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" - build-type: 'intel' cuda-major-version: "" cuda-minor-version: "" @@ -1049,7 +1049,7 @@ jobs: skip-drivers: 'false' backend: "rfdetr" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "12" @@ -1062,7 +1062,7 @@ jobs: runs-on: 'ubuntu-24.04-arm' backend: "rfdetr" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' # exllama2 - build-type: '' @@ -1076,7 +1076,7 @@ jobs: skip-drivers: 'false' backend: "exllama2" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" @@ -1089,7 +1089,7 @@ jobs: skip-drivers: 'false' backend: "exllama2" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" - build-type: 'intel' cuda-major-version: "" cuda-minor-version: "" @@ -1101,7 +1101,7 @@ jobs: skip-drivers: 'false' backend: "exllama2" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'hipblas' cuda-major-version: "" @@ -1114,7 +1114,7 @@ jobs: runs-on: 'ubuntu-latest' backend: "exllama2" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "12" @@ -1127,7 +1127,7 @@ jobs: runs-on: 'ubuntu-24.04-arm' backend: "chatterbox" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' # runs out of space on the runner # - build-type: 'hipblas' @@ -1141,7 +1141,7 @@ jobs: # skip-drivers: 'false' # backend: "rfdetr" # dockerfile: "./backend/Dockerfile.python" - # context: "./backend" + # context: "./" # kitten-tts - build-type: '' cuda-major-version: "" @@ -1154,7 +1154,7 @@ jobs: skip-drivers: 'false' backend: "kitten-tts" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' # neutts - build-type: '' @@ -1168,7 +1168,7 @@ jobs: skip-drivers: 'false' backend: "neutts" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" @@ -1181,7 +1181,7 @@ jobs: skip-drivers: 'false' backend: "neutts" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -1193,7 +1193,7 @@ jobs: skip-drivers: 'false' backend: "neutts" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "12" @@ -1206,7 +1206,7 @@ jobs: runs-on: 'ubuntu-24.04-arm' backend: "neutts" dockerfile: "./backend/Dockerfile.python" - context: "./backend" + context: "./" ubuntu-version: '2404' backend-jobs-darwin: uses: ./.github/workflows/backend_build_darwin.yml diff --git a/Makefile b/Makefile index 450295fcc348..2a8efe81c827 100644 --- a/Makefile +++ b/Makefile @@ -434,10 +434,10 @@ docker-build-huggingface: docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:huggingface -f backend/Dockerfile.golang --build-arg BACKEND=huggingface . docker-build-rfdetr: - docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:rfdetr -f backend/Dockerfile.python --build-arg BACKEND=rfdetr ./backend + docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:rfdetr -f backend/Dockerfile.python --build-arg BACKEND=rfdetr . docker-build-kitten-tts: - docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:kitten-tts -f backend/Dockerfile.python --build-arg BACKEND=kitten-tts ./backend + docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:kitten-tts -f backend/Dockerfile.python --build-arg BACKEND=kitten-tts . docker-save-kitten-tts: backend-images docker save local-ai-backend:kitten-tts -o backend-images/kitten-tts.tar @@ -446,13 +446,13 @@ docker-save-chatterbox: backend-images docker save local-ai-backend:chatterbox -o backend-images/chatterbox.tar docker-build-neutts: - docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:neutts -f backend/Dockerfile.python --build-arg BACKEND=neutts ./backend + docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:neutts -f backend/Dockerfile.python --build-arg BACKEND=neutts . docker-save-neutts: backend-images docker save local-ai-backend:neutts -o backend-images/neutts.tar docker-build-kokoro: - docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:kokoro -f backend/Dockerfile.python --build-arg BACKEND=kokoro ./backend + docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:kokoro -f backend/Dockerfile.python --build-arg BACKEND=kokoro . docker-build-vllm: docker build --build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) --build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:vllm -f backend/Dockerfile.python --build-arg BACKEND=vllm ./backend @@ -500,7 +500,7 @@ docker-build-transformers: docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:transformers -f backend/Dockerfile.python --build-arg BACKEND=transformers . docker-build-diffusers: - docker build --progress=plain --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:diffusers -f backend/Dockerfile.python --build-arg BACKEND=diffusers ./backend + docker build --progress=plain --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:diffusers -f backend/Dockerfile.python --build-arg BACKEND=diffusers . docker-save-diffusers: backend-images docker save local-ai-backend:diffusers -o backend-images/diffusers.tar @@ -521,7 +521,7 @@ docker-build-bark: docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:bark -f backend/Dockerfile.python --build-arg BACKEND=bark . docker-build-chatterbox: - docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:chatterbox -f backend/Dockerfile.python --build-arg BACKEND=chatterbox ./backend + docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:chatterbox -f backend/Dockerfile.python --build-arg BACKEND=chatterbox . docker-build-exllama2: docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:exllama2 -f backend/Dockerfile.python --build-arg BACKEND=exllama2 . diff --git a/backend/Dockerfile.python b/backend/Dockerfile.python index 96e147fce40f..d9a3e1afadb6 100644 --- a/backend/Dockerfile.python +++ b/backend/Dockerfile.python @@ -159,9 +159,9 @@ RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y # Install grpcio-tools RUN pip install --break-system-packages --user grpcio-tools==1.71.0 grpcio==1.71.0 torch diffusers transformers compel optimum.quanto accelerate gguf -COPY python/${BACKEND} /${BACKEND} -COPY backend.proto /${BACKEND}/backend.proto -COPY python/common/ /${BACKEND}/common +COPY backend/python/${BACKEND} /${BACKEND} +COPY backend/backend.proto /${BACKEND}/backend.proto +COPY backend/python/common/ /${BACKEND}/common RUN cd /${BACKEND} && PORTABLE_PYTHON=true make From 0d75455cddfb0129cf01ff65cac594841dde562e Mon Sep 17 00:00:00 2001 From: Alessandro Sturniolo Date: Wed, 3 Dec 2025 19:41:55 +0100 Subject: [PATCH 06/13] make: disable parallel backend builds Add .NOTPARALLEL for backend build target to avoid concurrent build race conditions. --- Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile b/Makefile index 2a8efe81c827..d1c448fd36e6 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,6 @@ +# Disable parallel execution for backend builds +.NOTPARALLEL: backends/diffusers backends/llama-cpp backends/piper backends/stablediffusion-ggml backends/whisper backends/faster-whisper backends/silero-vad backends/local-store backends/huggingface backends/rfdetr backends/kitten-tts backends/kokoro backends/chatterbox backends/llama-cpp-darwin backends/neutts build-darwin-python-backend build-darwin-go-backend backends/mlx backends/diffuser-darwin backends/mlx-vlm backends/mlx-audio backends/stablediffusion-ggml-darwin + GOCMD=go GOTEST=$(GOCMD) test GOVET=$(GOCMD) vet From 62c2d9fb1c380c5d4c76bbbee212a2491efc7ffa Mon Sep 17 00:00:00 2001 From: Alessandro Sturniolo Date: Wed, 3 Dec 2025 19:46:47 +0100 Subject: [PATCH 07/13] make: export CUDA_MAJOR_VERSION and CUDA_MINOR_VERSION Export CUDA_MAJOR_VERSION and CUDA_MINOR_VERSION so CI/users can override the CUDA version used during building. --- Makefile | 55 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 30 insertions(+), 25 deletions(-) diff --git a/Makefile b/Makefile index d1c448fd36e6..960061866c4e 100644 --- a/Makefile +++ b/Makefile @@ -13,6 +13,8 @@ CUDA_MINOR_VERSION?=0 GORELEASER?= export BUILD_TYPE?= +export CUDA_MAJOR_VERSION?=12 +export CUDA_MINOR_VERSION?=9 GO_TAGS?= BUILD_ID?= @@ -180,7 +182,7 @@ prepare-e2e: mkdir -p $(TEST_DIR) cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin - docker build --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=12 --build-arg CUDA_MINOR_VERSION=0 -t localai-tests . + docker build --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t localai-tests . run-e2e-image: ls -liah $(abspath ./tests/e2e-fixtures) @@ -311,16 +313,16 @@ docker: --build-arg BUILD_TYPE=$(BUILD_TYPE) \ -t $(DOCKER_IMAGE) . -docker-cuda11: +docker-cuda12: docker build \ - --build-arg CUDA_MAJOR_VERSION=11 \ - --build-arg CUDA_MINOR_VERSION=8 \ + --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} \ + --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} \ --build-arg BASE_IMAGE=$(BASE_IMAGE) \ --build-arg IMAGE_TYPE=$(IMAGE_TYPE) \ --build-arg GO_TAGS="$(GO_TAGS)" \ --build-arg MAKEFLAGS="$(DOCKER_MAKEFLAGS)" \ --build-arg BUILD_TYPE=$(BUILD_TYPE) \ - -t $(DOCKER_IMAGE)-cuda-11 . + -t $(DOCKER_IMAGE)-cuda-12 . docker-aio: @echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)" @@ -422,25 +424,25 @@ backend-images: mkdir -p backend-images docker-build-llama-cpp: - docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:llama-cpp -f backend/Dockerfile.llama-cpp . + docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:llama-cpp -f backend/Dockerfile.llama-cpp . docker-build-bark-cpp: - docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:bark-cpp -f backend/Dockerfile.golang --build-arg BACKEND=bark-cpp . + docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:bark-cpp -f backend/Dockerfile.golang --build-arg BACKEND=bark-cpp . docker-build-piper: - docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:piper -f backend/Dockerfile.golang --build-arg BACKEND=piper . + docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:piper -f backend/Dockerfile.golang --build-arg BACKEND=piper . docker-build-local-store: - docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:local-store -f backend/Dockerfile.golang --build-arg BACKEND=local-store . + docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:local-store -f backend/Dockerfile.golang --build-arg BACKEND=local-store . docker-build-huggingface: - docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:huggingface -f backend/Dockerfile.golang --build-arg BACKEND=huggingface . + docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:huggingface -f backend/Dockerfile.golang --build-arg BACKEND=huggingface . docker-build-rfdetr: - docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:rfdetr -f backend/Dockerfile.python --build-arg BACKEND=rfdetr . + docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:rfdetr -f backend/Dockerfile.python --build-arg BACKEND=rfdetr . docker-build-kitten-tts: - docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:kitten-tts -f backend/Dockerfile.python --build-arg BACKEND=kitten-tts . + docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:kitten-tts -f backend/Dockerfile.python --build-arg BACKEND=kitten-tts . docker-save-kitten-tts: backend-images docker save local-ai-backend:kitten-tts -o backend-images/kitten-tts.tar @@ -449,13 +451,13 @@ docker-save-chatterbox: backend-images docker save local-ai-backend:chatterbox -o backend-images/chatterbox.tar docker-build-neutts: - docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:neutts -f backend/Dockerfile.python --build-arg BACKEND=neutts . + docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:neutts -f backend/Dockerfile.python --build-arg BACKEND=neutts . docker-save-neutts: backend-images docker save local-ai-backend:neutts -o backend-images/neutts.tar docker-build-kokoro: - docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:kokoro -f backend/Dockerfile.python --build-arg BACKEND=kokoro . + docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:kokoro -f backend/Dockerfile.python --build-arg BACKEND=kokoro . docker-build-vllm: docker build --build-arg CUDA_MAJOR_VERSION=$(CUDA_MAJOR_VERSION) --build-arg CUDA_MINOR_VERSION=$(CUDA_MINOR_VERSION) --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:vllm -f backend/Dockerfile.python --build-arg BACKEND=vllm ./backend @@ -476,7 +478,7 @@ docker-save-local-store: backend-images docker save local-ai-backend:local-store -o backend-images/local-store.tar docker-build-silero-vad: - docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:silero-vad -f backend/Dockerfile.golang --build-arg BACKEND=silero-vad . + docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:silero-vad -f backend/Dockerfile.golang --build-arg BACKEND=silero-vad . docker-save-silero-vad: backend-images docker save local-ai-backend:silero-vad -o backend-images/silero-vad.tar @@ -491,43 +493,46 @@ docker-save-bark-cpp: backend-images docker save local-ai-backend:bark-cpp -o backend-images/bark-cpp.tar docker-build-stablediffusion-ggml: - docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:stablediffusion-ggml -f backend/Dockerfile.golang --build-arg BACKEND=stablediffusion-ggml . + docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:stablediffusion-ggml -f backend/Dockerfile.golang --build-arg BACKEND=stablediffusion-ggml . docker-save-stablediffusion-ggml: backend-images docker save local-ai-backend:stablediffusion-ggml -o backend-images/stablediffusion-ggml.tar docker-build-rerankers: - docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:rerankers -f backend/Dockerfile.python --build-arg BACKEND=rerankers . + docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:rerankers -f backend/Dockerfile.python --build-arg BACKEND=rerankers . + +docker-build-vllm: + docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:vllm -f backend/Dockerfile.python --build-arg BACKEND=vllm . docker-build-transformers: - docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:transformers -f backend/Dockerfile.python --build-arg BACKEND=transformers . + docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:transformers -f backend/Dockerfile.python --build-arg BACKEND=transformers . docker-build-diffusers: - docker build --progress=plain --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:diffusers -f backend/Dockerfile.python --build-arg BACKEND=diffusers . + docker build --progress=plain --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:diffusers -f backend/Dockerfile.python --build-arg BACKEND=diffusers . docker-save-diffusers: backend-images docker save local-ai-backend:diffusers -o backend-images/diffusers.tar docker-build-whisper: - docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:whisper -f backend/Dockerfile.golang --build-arg BACKEND=whisper . + docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:whisper -f backend/Dockerfile.golang --build-arg BACKEND=whisper . docker-save-whisper: backend-images docker save local-ai-backend:whisper -o backend-images/whisper.tar docker-build-faster-whisper: - docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:faster-whisper -f backend/Dockerfile.python --build-arg BACKEND=faster-whisper . + docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:faster-whisper -f backend/Dockerfile.python --build-arg BACKEND=faster-whisper . docker-build-coqui: - docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:coqui -f backend/Dockerfile.python --build-arg BACKEND=coqui . + docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:coqui -f backend/Dockerfile.python --build-arg BACKEND=coqui . docker-build-bark: - docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:bark -f backend/Dockerfile.python --build-arg BACKEND=bark . + docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:bark -f backend/Dockerfile.python --build-arg BACKEND=bark . docker-build-chatterbox: - docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:chatterbox -f backend/Dockerfile.python --build-arg BACKEND=chatterbox . + docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:chatterbox -f backend/Dockerfile.python --build-arg BACKEND=chatterbox . docker-build-exllama2: - docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:exllama2 -f backend/Dockerfile.python --build-arg BACKEND=exllama2 . + docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:exllama2 -f backend/Dockerfile.python --build-arg BACKEND=exllama2 . docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-transformers docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-bark docker-build-chatterbox docker-build-exllama2 From b466fcbe2b4e1950ddf0c27b1d3f7c05441d1082 Mon Sep 17 00:00:00 2001 From: Alessandro Sturniolo Date: Wed, 3 Dec 2025 19:48:25 +0100 Subject: [PATCH 08/13] make: add backends/faster-whisper and docker-save-faster-whisper targets --- Makefile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Makefile b/Makefile index 960061866c4e..30aa9a14bac3 100644 --- a/Makefile +++ b/Makefile @@ -363,6 +363,9 @@ backends/stablediffusion-ggml: docker-build-stablediffusion-ggml docker-save-sta backends/whisper: docker-build-whisper docker-save-whisper build ./local-ai backends install "ocifile://$(abspath ./backend-images/whisper.tar)" +backends/faster-whisper: docker-build-faster-whisper docker-save-faster-whisper build + ./local-ai backends install "ocifile://$(abspath ./backend-images/faster-whisper.tar)" + backends/silero-vad: docker-build-silero-vad docker-save-silero-vad build ./local-ai backends install "ocifile://$(abspath ./backend-images/silero-vad.tar)" @@ -522,6 +525,9 @@ docker-save-whisper: backend-images docker-build-faster-whisper: docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:faster-whisper -f backend/Dockerfile.python --build-arg BACKEND=faster-whisper . +docker-save-faster-whisper: + docker save local-ai-backend:faster-whisper -o backend-images/faster-whisper.tar + docker-build-coqui: docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) --build-arg CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} --build-arg CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} -t local-ai-backend:coqui -f backend/Dockerfile.python --build-arg BACKEND=coqui . From 89eb4fd4349aab1ef3cd65d7ec552a67a3d6d3c6 Mon Sep 17 00:00:00 2001 From: Alessandro Sturniolo Date: Wed, 3 Dec 2025 19:51:57 +0100 Subject: [PATCH 09/13] backend: update backend Dockerfiles for Ubuntu 24.04 Align backend Dockerfiles to build with Ubuntu 24.04 base image. --- backend/Dockerfile.golang | 2 ++ backend/Dockerfile.python | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/backend/Dockerfile.golang b/backend/Dockerfile.golang index 8467521c0fdf..93c2b2ccaa47 100644 --- a/backend/Dockerfile.golang +++ b/backend/Dockerfile.golang @@ -142,6 +142,8 @@ EOT COPY . /LocalAI +RUN git config --global --add safe.directory /LocalAI + RUN cd /LocalAI && make protogen-go && make -C /LocalAI/backend/go/${BACKEND} build FROM scratch diff --git a/backend/Dockerfile.python b/backend/Dockerfile.python index d9a3e1afadb6..afd88efafaf8 100644 --- a/backend/Dockerfile.python +++ b/backend/Dockerfile.python @@ -32,7 +32,7 @@ RUN apt-get update && \ python3-venv make cmake && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* && \ - pip install --upgrade pip + pip install --break-system-packages --user --upgrade pip # Cuda From 612010685b2ca4c32351e5fc613922c70d04f8d3 Mon Sep 17 00:00:00 2001 From: Alessandro Sturniolo Date: Wed, 3 Dec 2025 19:55:31 +0100 Subject: [PATCH 10/13] backend: add ROCm env vars and default AMDGPU_TARGETS for hipblas build in stablediffusion-ggml Provide ROCM_HOME/ROCM_PATH and export CC/CXX to make possible to build stablediffusion-ggml with ROCm libraries. --- backend/go/stablediffusion-ggml/Makefile | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/backend/go/stablediffusion-ggml/Makefile b/backend/go/stablediffusion-ggml/Makefile index c1c22680b001..0dd7cb88a344 100644 --- a/backend/go/stablediffusion-ggml/Makefile +++ b/backend/go/stablediffusion-ggml/Makefile @@ -28,7 +28,12 @@ else ifeq ($(BUILD_TYPE),clblas) CMAKE_ARGS+=-DGGML_CLBLAST=ON -DCLBlast_DIR=/some/path # If it's hipblas we do have also to set CC=/opt/rocm/llvm/bin/clang CXX=/opt/rocm/llvm/bin/clang++ else ifeq ($(BUILD_TYPE),hipblas) - CMAKE_ARGS+=-DSD_HIPBLAS=ON -DGGML_HIPBLAS=ON + ROCM_HOME ?= /opt/rocm + ROCM_PATH ?= /opt/rocm + export CXX=$(ROCM_HOME)/llvm/bin/clang++ + export CC=$(ROCM_HOME)/llvm/bin/clang + AMDGPU_TARGETS?=gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102,gfx1200,gfx1201 + CMAKE_ARGS+=-DSD_HIPBLAS=ON -DGGML_HIPBLAS=ON -DAMDGPU_TARGETS=$(AMDGPU_TARGETS) else ifeq ($(BUILD_TYPE),vulkan) CMAKE_ARGS+=-DSD_VULKAN=ON -DGGML_VULKAN=ON else ifeq ($(OS),Darwin) From fd7592b1a432cfd858254b1ef0b06fa69bd835c5 Mon Sep 17 00:00:00 2001 From: Alessandro Sturniolo Date: Wed, 3 Dec 2025 19:56:06 +0100 Subject: [PATCH 11/13] chatterbox: bump ROCm PyTorch to 2.9.1+rocm6.4 and update index url Align chatterbox hipblas requirements with ROCm 6.4-compatible wheels. --- backend/python/chatterbox/requirements-hipblas.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/python/chatterbox/requirements-hipblas.txt b/backend/python/chatterbox/requirements-hipblas.txt index 6c21992a7585..ed30fb824107 100644 --- a/backend/python/chatterbox/requirements-hipblas.txt +++ b/backend/python/chatterbox/requirements-hipblas.txt @@ -1,6 +1,6 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.0 -torch==2.6.0+rocm6.1 -torchaudio==2.6.0+rocm6.1 +--extra-index-url https://download.pytorch.org/whl/rocm6.4 +torch==2.9.1+rocm6.4 +torchaudio==2.9.1+rocm6.4 transformers numpy>=1.24.0,<1.26.0 # https://github.com/mudler/LocalAI/pull/6240#issuecomment-3329518289 From 58026e773f7de5ceb6338773e919d77f5ddb6c78 Mon Sep 17 00:00:00 2001 From: Alessandro Sturniolo Date: Wed, 3 Dec 2025 20:36:16 +0100 Subject: [PATCH 12/13] misc: add local-ai-launcher to .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index caae10a218a1..2ee2ab8588b1 100644 --- a/.gitignore +++ b/.gitignore @@ -25,6 +25,7 @@ go-bert # LocalAI build binary LocalAI /local-ai +/local-ai-launcher # prevent above rules from omitting the helm chart !charts/* # prevent above rules from omitting the api/localai folder From 3ed9d65a081dff30589044716cc4c1bea7add115 Mon Sep 17 00:00:00 2001 From: Alessandro Sturniolo Date: Wed, 3 Dec 2025 21:27:29 +0100 Subject: [PATCH 13/13] ci: fix GitHub Actions backends workflows after rebase --- .github/workflows/backend.yml | 80 +++++------------------------------ 1 file changed, 10 insertions(+), 70 deletions(-) diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index e03913741e14..c6ec48315d27 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -39,7 +39,6 @@ jobs: #max-parallel: ${{ github.event_name != 'pull_request' && 6 || 4 }} matrix: include: - # CUDA 11 builds - build-type: 'l4t' cuda-major-version: "12" cuda-minor-version: "9" @@ -64,7 +63,7 @@ jobs: skip-drivers: 'true' backend: "diffusers" dockerfile: "./backend/Dockerfile.python" - context: "./"" + context: "./" ubuntu-version: '2404' - build-type: '' cuda-major-version: "" @@ -78,6 +77,7 @@ jobs: backend: "chatterbox" dockerfile: "./backend/Dockerfile.python" context: "./" + ubuntu-version: '2404' # CUDA 12 builds - build-type: 'cublas' cuda-major-version: "12" @@ -211,7 +211,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml' @@ -224,7 +224,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-whisper' @@ -237,7 +237,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-rfdetr' @@ -250,7 +250,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-exllama2' @@ -263,7 +263,7 @@ jobs: ubuntu-version: '2404' - build-type: 'cublas' cuda-major-version: "12" - cuda-minor-version: "0" + cuda-minor-version: "9" platforms: 'linux/amd64' tag-latest: 'auto' tag-suffix: '-gpu-nvidia-cuda-12-neutts' @@ -670,7 +670,7 @@ jobs: ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "12" - cuda-minor-version: "9" + cuda-minor-version: "0" platforms: 'linux/arm64' tag-latest: 'auto' tag-suffix: '-nvidia-l4t-kokoro' @@ -815,18 +815,6 @@ jobs: dockerfile: "./backend/Dockerfile.golang" context: "./" ubuntu-version: '2404' - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "9" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-12-stablediffusion-ggml' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:24.04" - skip-drivers: 'false' - backend: "stablediffusion-ggml" - dockerfile: "./backend/Dockerfile.golang" - context: "./" - build-type: 'sycl_f32' cuda-major-version: "" cuda-minor-version: "" @@ -893,18 +881,6 @@ jobs: dockerfile: "./backend/Dockerfile.golang" context: "./" ubuntu-version: '2404' - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "9" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-12-whisper' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:24.04" - skip-drivers: 'false' - backend: "whisper" - dockerfile: "./backend/Dockerfile.golang" - context: "./" - build-type: 'sycl_f32' cuda-major-version: "" cuda-minor-version: "" @@ -1026,18 +1002,6 @@ jobs: dockerfile: "./backend/Dockerfile.python" context: "./" ubuntu-version: '2404' - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "9" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-12-rfdetr' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:24.04" - skip-drivers: 'false' - backend: "rfdetr" - dockerfile: "./backend/Dockerfile.python" - context: "./" - build-type: 'intel' cuda-major-version: "" cuda-minor-version: "" @@ -1078,18 +1042,6 @@ jobs: dockerfile: "./backend/Dockerfile.python" context: "./" ubuntu-version: '2404' - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "9" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-12-exllama2' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:24.04" - skip-drivers: 'false' - backend: "exllama2" - dockerfile: "./backend/Dockerfile.python" - context: "./" - build-type: 'intel' cuda-major-version: "" cuda-minor-version: "" @@ -1118,7 +1070,7 @@ jobs: ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "12" - cuda-minor-version: "9" + cuda-minor-version: "0" platforms: 'linux/arm64' skip-drivers: 'true' tag-latest: 'auto' @@ -1170,18 +1122,6 @@ jobs: dockerfile: "./backend/Dockerfile.python" context: "./" ubuntu-version: '2404' - - build-type: 'cublas' - cuda-major-version: "12" - cuda-minor-version: "9" - platforms: 'linux/amd64' - tag-latest: 'auto' - tag-suffix: '-gpu-nvidia-cuda-12-neutts' - runs-on: 'ubuntu-latest' - base-image: "ubuntu:24.04" - skip-drivers: 'false' - backend: "neutts" - dockerfile: "./backend/Dockerfile.python" - context: "./" - build-type: 'hipblas' cuda-major-version: "" cuda-minor-version: "" @@ -1197,7 +1137,7 @@ jobs: ubuntu-version: '2404' - build-type: 'l4t' cuda-major-version: "12" - cuda-minor-version: "9" + cuda-minor-version: "0" platforms: 'linux/arm64' skip-drivers: 'true' tag-latest: 'auto'