CodeLinaro · lhez · Jan 28, 2025 · Jan 28, 2025 · Jan 28, 2025 · Jan 28, 2025
diff --git a/.devops/tools.sh b/.devops/tools.sh
@@ -13,9 +13,13 @@ elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
     exec ./llama-quantize "$@"
 elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
     exec ./llama-cli "$@"
+elif [[ "$arg1" == '--bench' || "$arg1" == '-b' ]]; then
+    exec ./llama-bench "$@"
+elif [[ "$arg1" == '--perplexity' || "$arg1" == '-p' ]]; then
+    exec ./llama-perplexity "$@"
 elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
     echo "Converting PTH to GGML..."
-    for i in `ls $1/$2/ggml-model-f16.bin*`; do
+    for i in $(ls $1/$2/ggml-model-f16.bin*); do
         if [ -f "${i/f16/q4_0}" ]; then
             echo "Skip model quantization, it already exists: ${i/f16/q4_0}"
         else
@@ -30,6 +34,10 @@ else
     echo "Available commands: "
     echo "  --run (-r): Run a model previously converted into ggml"
     echo "              ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -n 512"
+    echo "  --bench (-b): Benchmark the performance of the inference for various parameters."
+    echo "              ex: -m model.gguf"
+    echo "  --perplexity (-p): Measure the perplexity of a model over a given text."
+    echo "              ex: -m model.gguf -f file.txt"
     echo "  --convert (-c): Convert a llama model into ggml"
     echo "              ex: --outtype f16 \"/models/7B/\" "
     echo "  --quantize (-q): Optimize with quantization process ggml"

diff --git a/.devops/vulkan.Dockerfile b/.devops/vulkan.Dockerfile
@@ -1,4 +1,4 @@
-ARG UBUNTU_VERSION=22.04
+ARG UBUNTU_VERSION=24.04
 
 FROM ubuntu:$UBUNTU_VERSION AS build
 
@@ -7,7 +7,7 @@ RUN apt update && apt install -y git build-essential cmake wget
 
 # Install Vulkan SDK and cURL
 RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
-    wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
+    wget -qO /etc/apt/sources.list.d/lunarg-vulkan-noble.list https://packages.lunarg.com/vulkan/lunarg-vulkan-noble.list && \
     apt update -y && \
     apt-get install -y vulkan-sdk libcurl4-openssl-dev curl
 
@@ -55,8 +55,9 @@ RUN apt-get update \
     git \
     python3 \
     python3-pip \
-    && pip install --upgrade pip setuptools wheel \
-    && pip install -r requirements.txt \
+    python3-wheel \
+    && pip install --break-system-packages --upgrade setuptools \
+    && pip install --break-system-packages -r requirements.txt \
     && apt autoremove -y \
     && apt clean -y \
     && rm -rf /tmp/* /var/tmp/* \

diff --git a/.editorconfig b/.editorconfig
@@ -40,3 +40,11 @@ indent_style = tab
 [examples/cvector-generator/*.txt]
 trim_trailing_whitespace = unset
 insert_final_newline = unset
+
+[models/templates/*.jinja]
+indent_style = unset
+indent_size = unset
+end_of_line = unset
+charset = unset
+trim_trailing_whitespace = unset
+insert_final_newline = unset
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -43,6 +43,12 @@ jobs:
         with:
           fetch-depth: 0
 
+      - name: ccache
+        uses: hendrikmuhs/[email protected]
+        with:
+          key: macOS-latest-cmake-arm64
+          evict-old-files: 1d
+
       - name: Dependencies
         id: depends
         continue-on-error: true
@@ -108,6 +114,12 @@ jobs:
         with:
           fetch-depth: 0
 
+      - name: ccache
+        uses: hendrikmuhs/[email protected]
+        with:
+          key: macOS-latest-cmake-x64
+          evict-old-files: 1d
+
       - name: Dependencies
         id: depends
         continue-on-error: true
@@ -172,6 +184,12 @@ jobs:
         with:
           fetch-depth: 0
 
+      - name: ccache
+        uses: hendrikmuhs/[email protected]
+        with:
+          key: ubuntu-cpu-cmake
+          evict-old-files: 1d
+
       - name: Dependencies
         id: depends
         run: |
@@ -249,6 +267,12 @@ jobs:
         id: checkout
         uses: actions/checkout@v4
 
+      - name: ccache
+        uses: hendrikmuhs/[email protected]
+        with:
+          key: ubuntu-latest-cmake-sanitizer-${{ matrix.sanitizer }}
+          evict-old-files: 1d
+
       - name: Dependencies
         id: depends
         run: |
@@ -296,6 +320,12 @@ jobs:
         id: checkout
         uses: actions/checkout@v4
 
+      - name: ccache
+        uses: hendrikmuhs/[email protected]
+        with:
+          key: ubuntu-latest-cmake-rpc
+          evict-old-files: 1d
+
       - name: Dependencies
         id: depends
         run: |
@@ -325,6 +355,12 @@ jobs:
         id: checkout
         uses: actions/checkout@v4
 
+      - name: ccache
+        uses: hendrikmuhs/[email protected]
+        with:
+          key: ubuntu-22-cmake-vulkan
+          evict-old-files: 1d
+
       - name: Dependencies
         id: depends
         run: |
@@ -346,7 +382,8 @@ jobs:
         id: cmake_test
         run: |
           cd build
-          ctest -L main --verbose --timeout 900
+          # This is using llvmpipe and runs slower than other backends
+          ctest -L main --verbose --timeout 1800
 
   ubuntu-22-cmake-hip:
     runs-on: ubuntu-22.04
@@ -363,6 +400,12 @@ jobs:
           sudo apt-get update
           sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev
 
+      - name: ccache
+        uses: hendrikmuhs/[email protected]
+        with:
+          key: ubuntu-22-cmake-hip
+          evict-old-files: 1d
+
       - name: Build with native CMake HIP support
         id: cmake_build
         run: |
@@ -395,6 +438,12 @@ jobs:
           apt-get update
           apt-get install -y build-essential git cmake libcurl4-openssl-dev
 
+      - name: ccache
+        uses: hendrikmuhs/[email protected]
+        with:
+          key: ubuntu-22-cmake-musa
+          evict-old-files: 1d
+
       - name: Build with native CMake MUSA support
         id: cmake_build
         run: |
@@ -434,6 +483,12 @@ jobs:
         id: checkout
         uses: actions/checkout@v4
 
+      - name: ccache
+        uses: hendrikmuhs/[email protected]
+        with:
+          key: ubuntu-22-cmake-sycl
+          evict-old-files: 1d
+
       - name: Build
         id: cmake_build
         run: |
@@ -478,6 +533,12 @@ jobs:
         id: checkout
         uses: actions/checkout@v4
 
+      - name: ccache
+        uses: hendrikmuhs/[email protected]
+        with:
+          key: ubuntu-22-cmake-sycl-fp16
+          evict-old-files: 1d
+
       - name: Build
         id: cmake_build
         run: |
@@ -499,6 +560,12 @@ jobs:
         id: checkout
         uses: actions/checkout@v4
 
+      - name: ccache
+        uses: hendrikmuhs/[email protected]
+        with:
+          key: macOS-latest-cmake-ios
+          evict-old-files: 1d
+
       - name: Dependencies
         id: depends
         continue-on-error: true
@@ -530,6 +597,12 @@ jobs:
         id: checkout
         uses: actions/checkout@v4
 
+      - name: ccache
+        uses: hendrikmuhs/[email protected]
+        with:
+          key: macOS-latest-cmake-tvos
+          evict-old-files: 1d
+
       - name: Dependencies
         id: depends
         continue-on-error: true
@@ -565,6 +638,12 @@ jobs:
         id: checkout
         uses: actions/checkout@v4
 
+      - name: ccache
+        uses: hendrikmuhs/[email protected]
+        with:
+          key: macOS-latest-swift
+          evict-old-files: 1d
+
       - name: Dependencies
         id: depends
         continue-on-error: true
@@ -606,6 +685,12 @@ jobs:
       - name: Clone
         uses: actions/checkout@v4
 
+      - name: ccache
+        uses: hendrikmuhs/[email protected]
+        with:
+          key: windows-msys2
+          evict-old-files: 1d
+
       - name: Setup ${{ matrix.sys }}
         uses: msys2/setup-msys2@v2
         with:
@@ -674,6 +759,12 @@ jobs:
         with:
           fetch-depth: 0
 
+      - name: ccache
+        uses: hendrikmuhs/[email protected]
+        with:
+          key: windows-latest-cmake-${{ matrix.build }}
+          evict-old-files: 1d
+
       - name: Clone Kompute submodule
         id: clone_kompute
         if: ${{ matrix.build == 'kompute-x64' }}
@@ -812,6 +903,8 @@ jobs:
         - name: Clone
           id: checkout
           uses: actions/checkout@v4
+          with:
+            fetch-depth: 0
 
         - name: Install dependencies
           env:
@@ -820,6 +913,12 @@ jobs:
               apt update
               apt install -y cmake build-essential ninja-build libgomp1 git
 
+        - name: ccache
+          uses: hendrikmuhs/[email protected]
+          with:
+            key: ubuntu-latest-cmake-cuda
+            evict-old-files: 1d
+
         - name: Build with CMake
           run: |
             cmake -S . -B build -G Ninja \
@@ -846,6 +945,12 @@ jobs:
         with:
             fetch-depth: 0
 
+      - name: Install ccache
+        uses: hendrikmuhs/[email protected]
+        with:
+          key: ${{ github.job }}-${{ matrix.cuda }}-${{ matrix.build }}
+          evict-old-files: 1d
+
       - name: Install Cuda Toolkit 11.7
         if: ${{ matrix.cuda == '11.7' }}
         run: |
@@ -902,11 +1007,6 @@ jobs:
           echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
           echo "CUDA_PATH_V12_4=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
 
-      - name: Install ccache
-        uses: hendrikmuhs/[email protected]
-        with:
-          key: ${{ github.job }}-${{ matrix.cuda }}-${{ matrix.build }}
-
       - name: Install Ninja
         id: install_ninja
         run: |
@@ -986,6 +1086,12 @@ jobs:
         with:
           fetch-depth: 0
 
+      - name: ccache
+        uses: hendrikmuhs/[email protected]
+        with:
+          key: windows-latest-cmake-sycl
+          evict-old-files: 1d
+
       - name: Install
         run:  |
           scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL
@@ -1065,9 +1171,10 @@ jobs:
           & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
 
       - name: Install ccache
-        uses: hendrikmuhs/[email protected]
+        uses: hendrikmuhs/[email protected].16
         with:
           key: ${{ github.job }}
+          evict-old-files: 1d
 
       - name: Build
         id: cmake_build
@@ -1097,6 +1204,12 @@ jobs:
         with:
             fetch-depth: 0
 
+      - name: ccache
+        uses: hendrikmuhs/[email protected]
+        with:
+          key: windows-latest-cmake-hip-release
+          evict-old-files: 1d
+
       - name: Install
         id: depends
         run: |
@@ -1194,6 +1307,12 @@ jobs:
       - name: Clone
         uses: actions/checkout@v4
 
+      - name: ccache
+        uses: hendrikmuhs/[email protected]
+        with:
+          key: android-build
+          evict-old-files: 1d
+
       - name: Set up JDK
         uses: actions/setup-java@v3
         with:
@@ -1231,6 +1350,12 @@ jobs:
         with:
           fetch-depth: 0
 
+      - name: ccache
+        uses: hendrikmuhs/[email protected]
+        with:
+          key: release
+          evict-old-files: 1d
+
       - name: Determine tag name
         id: tag
         shell: bash

diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
@@ -28,16 +28,15 @@ jobs:
   push_to_registry:
     name: Push Docker image to Docker Hub
 
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     env:
       COMMIT_SHA: ${{ github.sha }}
     strategy:
       fail-fast: false
       matrix:
         config:
           # Multi-stage build
-          - { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
-          - { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/arm64", full: true, light: true, server: true, freediskspace: false}
+          - { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, freediskspace: false}
           - { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
           - { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
           - { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}