Skip to content

Commit c382c28

Browse files
authored
Merge branch 'ggml-org:master' into master
2 parents d23892e + 699f439 commit c382c28

File tree

149 files changed

+5343
-5932
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

149 files changed

+5343
-5932
lines changed

.github/ISSUE_TEMPLATE/010-bug-compilation.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ body:
4040
attributes:
4141
label: GGML backends
4242
description: Which GGML backends do you know to be affected?
43-
options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan]
43+
options: [AMX, BLAS, CPU, CUDA, HIP, Metal, Musa, RPC, SYCL, Vulkan, OpenCL]
4444
multiple: true
4545
validations:
4646
required: true

.github/ISSUE_TEMPLATE/011-bug-results.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ body:
4242
attributes:
4343
label: GGML backends
4444
description: Which GGML backends do you know to be affected?
45-
options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan]
45+
options: [AMX, BLAS, CPU, CUDA, HIP, Metal, Musa, RPC, SYCL, Vulkan, OpenCL]
4646
multiple: true
4747
validations:
4848
required: true

.github/labeler.yml

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,4 @@
11
# https://github.com/actions/labeler
2-
Kompute:
3-
- changed-files:
4-
- any-glob-to-any-file:
5-
- ggml/include/ggml-kompute.h
6-
- ggml/src/ggml-kompute/**
7-
- README-kompute.md
82
Apple Metal:
93
- changed-files:
104
- any-glob-to-any-file:
@@ -93,3 +87,8 @@ Ascend NPU:
9387
- ggml/include/ggml-cann.h
9488
- ggml/src/ggml-cann/**
9589
- docs/backend/CANN.md
90+
OpenCL:
91+
- changed-files:
92+
- any-glob-to-any-file:
93+
- ggml/include/ggml-opencl.h
94+
- ggml/src/ggml-opencl/**

.github/workflows/build.yml

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,7 @@ jobs:
342342
cd build
343343
export GGML_VK_VISIBLE_DEVICES=0
344344
# This is using llvmpipe and runs slower than other backends
345-
ctest -L main --verbose --timeout 3600
345+
ctest -L main --verbose --timeout 4200
346346
347347
ubuntu-22-cmake-hip:
348348
runs-on: ubuntu-22.04
@@ -740,9 +740,6 @@ jobs:
740740
- build: 'llvm-arm64-opencl-adreno'
741741
arch: 'arm64'
742742
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON'
743-
# - build: 'kompute-x64'
744-
# arch: 'x64'
745-
# defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON'
746743

747744
steps:
748745
- name: Clone
@@ -756,12 +753,6 @@ jobs:
756753
variant: ccache
757754
evict-old-files: 1d
758755

759-
- name: Clone Kompute submodule
760-
id: clone_kompute
761-
if: ${{ matrix.build == 'kompute-x64' }}
762-
run: |
763-
git submodule update --init ggml/src/ggml-kompute/kompute
764-
765756
- name: Download OpenBLAS
766757
id: get_openblas
767758
if: ${{ matrix.build == 'openblas-x64' }}
@@ -777,7 +768,7 @@ jobs:
777768
778769
- name: Install Vulkan SDK
779770
id: get_vulkan
780-
if: ${{ matrix.build == 'kompute-x64' || matrix.build == 'vulkan-x64' }}
771+
if: ${{ matrix.build == 'vulkan-x64' }}
781772
run: |
782773
curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/vulkansdk-windows-X64-${env:VULKAN_VERSION}.exe"
783774
& "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install

.github/workflows/release.yml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ jobs:
4949
run: |
5050
sysctl -a
5151
cmake -B build \
52-
-DCMAKE_BUILD_RPATH="@loader_path" \
52+
-DCMAKE_INSTALL_RPATH='@loader_path' \
53+
-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
5354
-DLLAMA_FATAL_WARNINGS=ON \
5455
-DGGML_METAL_USE_BF16=ON \
5556
-DGGML_METAL_EMBED_LIBRARY=ON \
@@ -103,7 +104,8 @@ jobs:
103104
# Metal is disabled due to intermittent failures with Github runners not having a GPU:
104105
# https://github.com/ggml-org/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
105106
cmake -B build \
106-
-DCMAKE_BUILD_RPATH="@loader_path" \
107+
-DCMAKE_INSTALL_RPATH='@loader_path' \
108+
-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
107109
-DLLAMA_FATAL_WARNINGS=ON \
108110
-DGGML_METAL=OFF \
109111
-DGGML_RPC=ON
@@ -160,6 +162,8 @@ jobs:
160162
id: cmake_build
161163
run: |
162164
cmake -B build \
165+
-DCMAKE_INSTALL_RPATH='$ORIGIN' \
166+
-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
163167
-DGGML_BACKEND_DL=ON \
164168
-DGGML_NATIVE=OFF \
165169
-DGGML_CPU_ALL_VARIANTS=ON \
@@ -211,6 +215,8 @@ jobs:
211215
id: cmake_build
212216
run: |
213217
cmake -B build \
218+
-DCMAKE_INSTALL_RPATH='$ORIGIN' \
219+
-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
214220
-DGGML_BACKEND_DL=ON \
215221
-DGGML_NATIVE=OFF \
216222
-DGGML_CPU_ALL_VARIANTS=ON \

.gitmodules

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +0,0 @@
1-
[submodule "kompute"]
2-
path = ggml/src/ggml-kompute/kompute
3-
url = https://github.com/nomic-ai/kompute.git

CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,6 @@ endfunction()
120120

121121
llama_option_depr(FATAL_ERROR LLAMA_CUBLAS GGML_CUDA)
122122
llama_option_depr(WARNING LLAMA_CUDA GGML_CUDA)
123-
llama_option_depr(WARNING LLAMA_KOMPUTE GGML_KOMPUTE)
124123
llama_option_depr(WARNING LLAMA_METAL GGML_METAL)
125124
llama_option_depr(WARNING LLAMA_METAL_EMBED_LIBRARY GGML_METAL_EMBED_LIBRARY)
126125
llama_option_depr(WARNING LLAMA_NATIVE GGML_NATIVE)

common/arg.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2738,6 +2738,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
27382738
params.public_path = value;
27392739
}
27402740
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_STATIC_PATH"));
2741+
add_opt(common_arg(
2742+
{"--api-prefix"}, "PREFIX",
2743+
string_format("prefix path the server serves from, without the trailing slash (default: %s)", params.api_prefix.c_str()),
2744+
[](common_params & params, const std::string & value) {
2745+
params.api_prefix = value;
2746+
}
2747+
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_API_PREFIX"));
27412748
add_opt(common_arg(
27422749
{"--no-webui"},
27432750
string_format("Disable the Web UI (default: %s)", params.webui ? "enabled" : "disabled"),

common/common.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,7 @@ struct common_params {
371371

372372
std::string hostname = "127.0.0.1";
373373
std::string public_path = ""; // NOLINT
374+
std::string api_prefix = ""; // NOLINT
374375
std::string chat_template = ""; // NOLINT
375376
bool use_jinja = false; // NOLINT
376377
bool enable_chat_template = true;

0 commit comments

Comments
 (0)