janhq · jan-service-account · Apr 29, 2025 · Apr 30, 2025 · Apr 30, 2025 · Apr 30, 2025
diff --git a/.devops/cpu.Dockerfile b/.devops/cpu.Dockerfile
@@ -14,9 +14,9 @@ WORKDIR /app
 COPY . .
 
 RUN if [ "$TARGETARCH" = "amd64" ]; then \
-        cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \
+        cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \
     elif [ "$TARGETARCH" = "arm64" ]; then \
-        cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=${GGML_CPU_ARM_ARCH}; \
+        cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DGGML_CPU_ARM_ARCH=${GGML_CPU_ARM_ARCH}; \
     else \
         echo "Unsupported architecture"; \
         exit 1; \

diff --git a/.devops/cuda.Dockerfile b/.devops/cuda.Dockerfile
@@ -21,7 +21,7 @@ COPY . .
 RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
     export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
     fi && \
-    cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
+    cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
     cmake --build build --config Release -j$(nproc)
 
 RUN mkdir -p /app/lib && \

diff --git a/.devops/intel.Dockerfile b/.devops/intel.Dockerfile
@@ -17,7 +17,7 @@ RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
         && export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
     fi && \
     echo "Building with dynamic libs" && \
-    cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON ${OPT_SYCL_F16} && \
+    cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${OPT_SYCL_F16} && \
     cmake --build build --config Release -j$(nproc)
 
 RUN mkdir -p /app/lib && \

diff --git a/.devops/llama-cli-cann.Dockerfile b/.devops/llama-cli-cann.Dockerfile
@@ -22,7 +22,7 @@ ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH
 
 RUN echo "Building with static libs" && \
     source /usr/local/Ascend/ascend-toolkit/set_env.sh --force && \
-    cmake -B build -DGGML_NATIVE=OFF -DGGML_CANN=ON -DBUILD_SHARED_LIBS=OFF  && \
+    cmake -B build -DGGML_NATIVE=OFF -DGGML_CANN=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_TESTS=OFF  && \
     cmake --build build --config Release --target llama-cli
 
 # TODO: use image with NNRT

diff --git a/.devops/musa.Dockerfile b/.devops/musa.Dockerfile
@@ -35,7 +35,7 @@ COPY . .
 RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
         export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \
     fi && \
-    cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
+    cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
     cmake --build build --config Release -j$(nproc)
 
 RUN mkdir -p /app/lib && \

diff --git a/.devops/rocm.Dockerfile b/.devops/rocm.Dockerfile
@@ -40,7 +40,7 @@ WORKDIR /app
 COPY . .
 
 RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
-    cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON \
+    cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DCMAKE_BUILD_TYPE=Release -DLLAMA_BUILD_TESTS=OFF \
     && cmake --build build --config Release -j$(nproc)
 
 RUN mkdir -p /app/lib \

diff --git a/.devops/vulkan.Dockerfile b/.devops/vulkan.Dockerfile
@@ -16,7 +16,7 @@ WORKDIR /app
 
 COPY . .
 
-RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 -DLLAMA_CURL=1 -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON && \
+RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1  -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON && \
     cmake --build build --config Release -j$(nproc)
 
 RUN mkdir -p /app/lib && \

diff --git a/.editorconfig b/.editorconfig
@@ -21,23 +21,23 @@ indent_style = tab
 [prompts/*.txt]
 insert_final_newline = unset
 
-[examples/server/public/*]
+[tools/server/public/*]
 indent_size = 2
 
-[examples/server/public/deps_*]
+[tools/server/public/deps_*]
 trim_trailing_whitespace = unset
 indent_style = unset
 indent_size = unset
 
-[examples/server/deps_*]
+[tools/server/deps_*]
 trim_trailing_whitespace = unset
 indent_style = unset
 indent_size = unset
 
 [examples/llama.swiftui/llama.swiftui.xcodeproj/*]
 indent_style = tab
 
-[examples/cvector-generator/*.txt]
+[tools/cvector-generator/*.txt]
 trim_trailing_whitespace = unset
 insert_final_newline = unset
 

diff --git a/.flake8 b/.flake8
@@ -2,8 +2,9 @@
 max-line-length = 125
 ignore = E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704,W503
 exclude =
-    # Do not traverse examples
+    # Do not traverse examples and tools
     examples,
+    tools,
     # Do not include package initializers
     __init__.py,
     # No need to traverse our git directory

diff --git a/.github/labeler.yml b/.github/labeler.yml
@@ -45,7 +45,9 @@ build:
             - CMakePresets.json
 examples:
     - changed-files:
-        - any-glob-to-any-file: examples/**
+        - any-glob-to-any-file:
+            - examples/**
+            - tools/**
 devops:
     - changed-files:
         - any-glob-to-any-file:
@@ -70,7 +72,7 @@ android:
 server:
     - changed-files:
         - any-glob-to-any-file:
-            - examples/server/**
+            - tools/server/**
 ggml:
     - changed-files:
         - any-glob-to-any-file:

diff --git a/.github/workflows/bench.yml.disabled b/.github/workflows/bench.yml.disabled
@@ -27,10 +27,10 @@ on:
   push:
     branches:
       - master
-    paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
+    paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'tools/server/*.h*', 'tools/server/*.cpp']
   pull_request_target:
     types: [opened, synchronize, reopened]
-    paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
+    paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'tools/server/*.h*', 'tools/server/*.cpp']
   schedule:
     -  cron: '04 2 * * *'
 
@@ -69,7 +69,7 @@ jobs:
       - name: Install python env
         id: pipenv
         run: |
-          cd examples/server/bench
+          cd tools/server/bench
           python3 -m venv venv
           source venv/bin/activate
           pip install -r requirements.txt
@@ -79,7 +79,7 @@ jobs:
         run: |
           wget --quiet https://github.com/prometheus/prometheus/releases/download/v2.51.0/prometheus-2.51.0.linux-amd64.tar.gz
           tar xzf prometheus*.tar.gz --strip-components=1
-          ./prometheus --config.file=examples/server/bench/prometheus.yml &
+          ./prometheus --config.file=tools/server/bench/prometheus.yml &
           while ! nc -z localhost 9090; do
             sleep 0.1
           done
@@ -92,7 +92,7 @@ jobs:
       - name: Install k6 and xk6-sse
         id: k6_installation
         run: |
-          cd examples/server/bench
+          cd tools/server/bench
           go install go.k6.io/xk6/cmd/xk6@latest
           xk6 build master \
               --with github.com/phymbert/xk6-sse
@@ -116,7 +116,7 @@ jobs:
       - name: Download the dataset
         id: download_dataset
         run: |
-          cd examples/server/bench
+          cd tools/server/bench
           wget --quiet https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
 
       - name: Server bench
@@ -126,7 +126,7 @@ jobs:
         run: |
           set -eux
 
-          cd examples/server/bench
+          cd tools/server/bench
           source venv/bin/activate
           python bench.py \
               --runner-label ${{ env.RUNNER_LABEL }} \
@@ -157,9 +157,9 @@ jobs:
           name: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
           compression-level: 9
           path: |
-            examples/server/bench/*.jpg
-            examples/server/bench/*.json
-            examples/server/bench/*.log
+            tools/server/bench/*.jpg
+            tools/server/bench/*.json
+            tools/server/bench/*.log
 
       - name: Commit status
         uses: Sibz/github-status-action@v1
@@ -178,17 +178,17 @@ jobs:
         with:
           client_id: ${{secrets.IMGUR_CLIENT_ID}}
           path: |
-            examples/server/bench/prompt_tokens_seconds.jpg
-            examples/server/bench/predicted_tokens_seconds.jpg
-            examples/server/bench/kv_cache_usage_ratio.jpg
-            examples/server/bench/requests_processing.jpg
+            tools/server/bench/prompt_tokens_seconds.jpg
+            tools/server/bench/predicted_tokens_seconds.jpg
+            tools/server/bench/kv_cache_usage_ratio.jpg
+            tools/server/bench/requests_processing.jpg
 
       - name: Extract mermaid
         id: set_mermaid
         run: |
           set -eux
 
-          cd examples/server/bench
+          cd tools/server/bench
           PROMPT_TOKENS_SECONDS=$(cat prompt_tokens_seconds.mermaid)
           echo "PROMPT_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
           echo "$PROMPT_TOKENS_SECONDS" >> $GITHUB_ENV

diff --git a/.github/workflows/build-linux-cross.yml b/.github/workflows/build-linux-cross.yml
@@ -4,18 +4,25 @@ on:
   workflow_call:
 
 jobs:
-  ubuntu-latest-riscv64-cpu-cross:
-    runs-on: ubuntu-latest
+  ubuntu-24-riscv64-cpu-cross:
+    runs-on: ubuntu-24.04
 
     steps:
       - uses: actions/checkout@v4
       - name: Setup Riscv
         run: |
           sudo dpkg --add-architecture riscv64
-          sudo sed -i 's|http://azure.archive.ubuntu.com/ubuntu|http://ports.ubuntu.com/ubuntu-ports|g' \
-                 /etc/apt/sources.list /etc/apt/apt-mirrors.txt
-          sudo apt-get clean
-          sudo apt-get update
+
+          # Add arch-specific repositories for non-amd64 architectures
+          cat << EOF | sudo tee /etc/apt/sources.list.d/riscv64-ports.list
+          deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
+          deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
+          deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
+          deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
+          EOF
+
+          sudo apt-get update || true    ;# Prevent failure due to missing URLs.
+
           sudo apt-get install -y --no-install-recommends \
                   build-essential \
                   gcc-14-riscv64-linux-gnu \
@@ -27,6 +34,7 @@ jobs:
           cmake -B build -DCMAKE_BUILD_TYPE=Release \
                          -DGGML_OPENMP=OFF \
                          -DLLAMA_BUILD_EXAMPLES=ON \
+                         -DLLAMA_BUILD_TOOLS=ON \
                          -DLLAMA_BUILD_TESTS=OFF \
                          -DCMAKE_SYSTEM_NAME=Linux \
                          -DCMAKE_SYSTEM_PROCESSOR=riscv64 \
@@ -40,21 +48,25 @@ jobs:
 
           cmake --build build --config Release -j $(nproc)
 
-  ubuntu-latest-riscv64-vulkan-cross:
-    runs-on: ubuntu-latest
+  ubuntu-24-riscv64-vulkan-cross:
+    runs-on: ubuntu-24.04
 
     steps:
       - uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-
       - name: Setup Riscv
         run: |
           sudo dpkg --add-architecture riscv64
-          sudo sed -i 's|http://azure.archive.ubuntu.com/ubuntu|http://ports.ubuntu.com/ubuntu-ports|g' \
-                 /etc/apt/sources.list /etc/apt/apt-mirrors.txt
-          sudo apt-get clean
-          sudo apt-get update
+
+          # Add arch-specific repositories for non-amd64 architectures
+          cat << EOF | sudo tee /etc/apt/sources.list.d/riscv64-ports.list
+          deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
+          deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
+          deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
+          deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
+          EOF
+
+          sudo apt-get update || true    ;# Prevent failure due to missing URLs.
+
           sudo apt-get install -y --no-install-recommends \
                   build-essential \
                   glslc \
@@ -69,6 +81,7 @@ jobs:
                          -DGGML_VULKAN=ON \
                          -DGGML_OPENMP=OFF \
                          -DLLAMA_BUILD_EXAMPLES=ON \
+                         -DLLAMA_BUILD_TOOLS=ON \
                          -DLLAMA_BUILD_TESTS=OFF \
                          -DCMAKE_SYSTEM_NAME=Linux \
                          -DCMAKE_SYSTEM_PROCESSOR=riscv64 \
@@ -82,21 +95,25 @@ jobs:
 
           cmake --build build --config Release -j $(nproc)
 
-  ubuntu-latest-arm64-vulkan-cross:
-    runs-on: ubuntu-latest
+  ubuntu-24-arm64-vulkan-cross:
+    runs-on: ubuntu-24.04
 
     steps:
       - uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-
       - name: Setup Arm64
         run: |
           sudo dpkg --add-architecture arm64
-          sudo sed -i 's|http://azure.archive.ubuntu.com/ubuntu|http://ports.ubuntu.com/ubuntu-ports|g' \
-                 /etc/apt/sources.list /etc/apt/apt-mirrors.txt
-          sudo apt-get clean
-          sudo apt-get update
+
+          # Add arch-specific repositories for non-amd64 architectures
+          cat << EOF | sudo tee /etc/apt/sources.list.d/arm64-ports.list
+          deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
+          deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
+          deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
+          deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
+          EOF
+
+          sudo apt-get update || true    ;# Prevent failure due to missing URLs.
+
           sudo apt-get install -y --no-install-recommends \
                   build-essential \
                   glslc \
@@ -110,6 +127,7 @@ jobs:
                          -DGGML_VULKAN=ON \
                          -DGGML_OPENMP=OFF \
                          -DLLAMA_BUILD_EXAMPLES=ON \
+                         -DLLAMA_BUILD_TOOLS=ON \
                          -DLLAMA_BUILD_TESTS=OFF \
                          -DCMAKE_SYSTEM_NAME=Linux \
                          -DCMAKE_SYSTEM_PROCESSOR=aarch64 \