ggml-org
diff --git a/‎.github/ISSUE_TEMPLATE/010-bug-compilation.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/ISSUE_TEMPLATE/010-bug-compilation.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/ISSUE_TEMPLATE/011-bug-results.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/ISSUE_TEMPLATE/011-bug-results.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/labeler.yml‎
Lines changed: 5 additions & 6 deletions b/‎.github/labeler.yml‎
Lines changed: 5 additions & 6 deletions
diff --git a/‎.github/workflows/build.yml‎
Lines changed: 2 additions & 11 deletions b/‎.github/workflows/build.yml‎
Lines changed: 2 additions & 11 deletions
diff --git a/‎.github/workflows/release.yml‎
Lines changed: 8 additions & 2 deletions b/‎.github/workflows/release.yml‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎.gitmodules‎
Lines changed: 0 additions & 3 deletions b/‎.gitmodules‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 0 additions & 1 deletion b/‎CMakeLists.txt‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎common/arg.cpp‎
Lines changed: 7 additions & 0 deletions b/‎common/arg.cpp‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎common/common.h‎
Lines changed: 1 addition & 0 deletions b/‎common/common.h‎
Lines changed: 1 addition & 0 deletions
@@ -40,7 +40,7 @@ body:
     attributes:
         label: GGML backends
         description: Which GGML backends do you know to be affected?
-        options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan]
+        options: [AMX, BLAS, CPU, CUDA, HIP, Metal, Musa, RPC, SYCL, Vulkan, OpenCL]
         multiple: true
     validations:
       required: true
 
@@ -42,7 +42,7 @@ body:
     attributes:
         label: GGML backends
         description: Which GGML backends do you know to be affected?
-        options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan]
+        options: [AMX, BLAS, CPU, CUDA, HIP, Metal, Musa, RPC, SYCL, Vulkan, OpenCL]
         multiple: true
     validations:
       required: true
 
@@ -1,10 +1,4 @@
 # https://github.com/actions/labeler
-Kompute:
-    - changed-files:
-        - any-glob-to-any-file:
-            - ggml/include/ggml-kompute.h
-            - ggml/src/ggml-kompute/**
-            - README-kompute.md
 Apple Metal:
     - changed-files:
         - any-glob-to-any-file:
@@ -93,3 +87,8 @@ Ascend NPU:
             - ggml/include/ggml-cann.h
             - ggml/src/ggml-cann/**
             - docs/backend/CANN.md
+OpenCL:
+    - changed-files:
+        - any-glob-to-any-file:
+            - ggml/include/ggml-opencl.h
+            - ggml/src/ggml-opencl/**
@@ -342,7 +342,7 @@ jobs:
           cd build
           export GGML_VK_VISIBLE_DEVICES=0
           # This is using llvmpipe and runs slower than other backends
-          ctest -L main --verbose --timeout 3600
+          ctest -L main --verbose --timeout 4200
 
   ubuntu-22-cmake-hip:
     runs-on: ubuntu-22.04
@@ -740,9 +740,6 @@ jobs:
           - build: 'llvm-arm64-opencl-adreno'
             arch: 'arm64'
             defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON'
-         # - build: 'kompute-x64'
-         #   arch: 'x64'
-         #   defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON'
 
     steps:
       - name: Clone
@@ -756,12 +753,6 @@ jobs:
           variant: ccache
           evict-old-files: 1d
 
-      - name: Clone Kompute submodule
-        id: clone_kompute
-        if: ${{ matrix.build == 'kompute-x64' }}
-        run: |
-          git submodule update --init ggml/src/ggml-kompute/kompute
-
       - name: Download OpenBLAS
         id: get_openblas
         if: ${{ matrix.build == 'openblas-x64' }}
@@ -777,7 +768,7 @@ jobs:
 
       - name: Install Vulkan SDK
         id: get_vulkan
-        if: ${{ matrix.build == 'kompute-x64' || matrix.build == 'vulkan-x64' }}
+        if: ${{ matrix.build == 'vulkan-x64' }}
         run: |
           curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/vulkansdk-windows-X64-${env:VULKAN_VERSION}.exe"
           & "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install
 
@@ -49,7 +49,8 @@ jobs:
         run: |
           sysctl -a
           cmake -B build \
-            -DCMAKE_BUILD_RPATH="@loader_path" \
+            -DCMAKE_INSTALL_RPATH='@loader_path' \
+            -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
             -DLLAMA_FATAL_WARNINGS=ON \
             -DGGML_METAL_USE_BF16=ON \
             -DGGML_METAL_EMBED_LIBRARY=ON \
@@ -103,7 +104,8 @@ jobs:
           # Metal is disabled due to intermittent failures with Github runners not having a GPU:
           # https://github.com/ggml-org/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
           cmake -B build \
-            -DCMAKE_BUILD_RPATH="@loader_path" \
+            -DCMAKE_INSTALL_RPATH='@loader_path' \
+            -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
             -DLLAMA_FATAL_WARNINGS=ON \
             -DGGML_METAL=OFF \
             -DGGML_RPC=ON
@@ -160,6 +162,8 @@ jobs:
         id: cmake_build
         run: |
           cmake -B build \
+            -DCMAKE_INSTALL_RPATH='$ORIGIN' \
+            -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
             -DGGML_BACKEND_DL=ON \
             -DGGML_NATIVE=OFF \
             -DGGML_CPU_ALL_VARIANTS=ON \
@@ -211,6 +215,8 @@ jobs:
         id: cmake_build
         run: |
           cmake -B build \
+            -DCMAKE_INSTALL_RPATH='$ORIGIN' \
+            -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
             -DGGML_BACKEND_DL=ON \
             -DGGML_NATIVE=OFF \
             -DGGML_CPU_ALL_VARIANTS=ON \
 
@@ -1,3 +0,0 @@
-[submodule "kompute"]
-	path = ggml/src/ggml-kompute/kompute
-	url = https://github.com/nomic-ai/kompute.git
@@ -120,7 +120,6 @@ endfunction()
 
 llama_option_depr(FATAL_ERROR LLAMA_CUBLAS              GGML_CUDA)
 llama_option_depr(WARNING     LLAMA_CUDA                GGML_CUDA)
-llama_option_depr(WARNING     LLAMA_KOMPUTE             GGML_KOMPUTE)
 llama_option_depr(WARNING     LLAMA_METAL               GGML_METAL)
 llama_option_depr(WARNING     LLAMA_METAL_EMBED_LIBRARY GGML_METAL_EMBED_LIBRARY)
 llama_option_depr(WARNING     LLAMA_NATIVE              GGML_NATIVE)
 
@@ -2738,6 +2738,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
             params.public_path = value;
         }
     ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_STATIC_PATH"));
+    add_opt(common_arg(
+        {"--api-prefix"}, "PREFIX",
+        string_format("prefix path the server serves from, without the trailing slash (default: %s)", params.api_prefix.c_str()),
+        [](common_params & params, const std::string & value) {
+            params.api_prefix = value;
+        }
+    ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_API_PREFIX"));
     add_opt(common_arg(
         {"--no-webui"},
         string_format("Disable the Web UI (default: %s)", params.webui ? "enabled" : "disabled"),
 
@@ -371,6 +371,7 @@ struct common_params {
 
     std::string hostname      = "127.0.0.1";
     std::string public_path   = "";                                                                         // NOLINT
+    std::string api_prefix    = "";                                                                         // NOLINT
     std::string chat_template = "";                                                                         // NOLINT
     bool use_jinja = false;                                                                                 // NOLINT
     bool enable_chat_template = true;
Original file line number	Diff line number	Diff line change
`@@ -1,3 +0,0 @@`
`1`		`-[submodule "kompute"]`
`2`		`- path = ggml/src/ggml-kompute/kompute`
`3`		`- url = https://github.com/nomic-ai/kompute.git`