Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
73 commits
Select commit Hold shift + click to select a range
f667f1e
convert : fix broken sentencepiece vocab (#14416)
CISC Jun 27, 2025
8d94219
ggml : add ggml_set_rows (#14274)
rgerganov Jun 27, 2025
4367806
recurrent : call balloc split_reset() in init_batch() (#14414)
ggerganov Jun 27, 2025
72babea
graph : make llm_graph_context destructor virtual (#14410)
ggerganov Jun 27, 2025
ceb1bf5
vulkan: Fix GGML_VULKAN_SHADER_DEBUG_INFO (#14427)
jeffbolznv Jun 28, 2025
6609507
ci : fix windows build and release (#14431)
CISC Jun 28, 2025
b25e927
fix async_mode bug (#14432)
bachelor-dou Jun 28, 2025
566c16f
model : add support for ERNIE 4.5 0.3B model (#14408)
ownia Jun 28, 2025
00d5282
vulkan: lock accesses of pinned_memory vector (#14333)
jeffbolznv Jun 28, 2025
63a7bb3
vulkan: handle noncontig in the final case of ggml_vk_get_cpy_pipelin…
jeffbolznv Jun 28, 2025
27208bf
CUDA: add bf16 and f32 support to cublas_mul_mat_batched (#14361)
am17an Jun 28, 2025
bd9c981
vulkan: Add fusion support for RMS_NORM+MUL (#14366)
jeffbolznv Jun 29, 2025
a0535ff
ggml : implement REGLU/GEGLU/SWIGLU ops (#14158)
CISC Jun 29, 2025
a5d1fb6
ggml : fix unmerged GGML_FPxx_TO_FPxx refactoring (#14443)
CISC Jun 29, 2025
f47c1d7
SYCL: disable faulty fp16 exp kernel (#14395)
qnixsynapse Jun 29, 2025
83790b0
server : fix appearance of the chats list context menu for Safari (#1…
rntk Jun 29, 2025
caf5681
server : support jinja extra template kwargs (Qwen3 enable_thinking f…
matteoserva Jun 29, 2025
e9b6350
scripts : make the shell scripts cross-platform (#14341)
vedranmiletic Jun 30, 2025
c839a2d
cmake : Remove redundant include path in CMakeLists.txt (#14452)
xiaobing318 Jun 30, 2025
eb3fa29
test-backend-ops : disable llama test (#14461)
slaren Jun 30, 2025
a7417f5
ggml-cpu: sycl: Re-enable exp f16 (#14462)
Rbiessy Jun 30, 2025
5dd942d
metal : disable fast-math for some cpy kernels (#14460)
ggerganov Jun 30, 2025
745f11f
memory : correctly handle failure in apply() (#14438)
ggerganov Jun 30, 2025
0a5a3b5
Add Conv2d for CPU (#14388)
am17an Jun 30, 2025
79b33b2
opencl : add GEGLU, REGLU, SWIGLU (#14456)
lhez Jul 1, 2025
497be7c
ggml-quants : rename best_mad to best_error (ggml/1283)
danbev Jun 24, 2025
431b2c2
ggml-cpu : "align corners" for bilinear upscale/downscale (ggml/1285)
Acly Jul 1, 2025
f61c05d
sync : ggml
ggerganov Jul 1, 2025
a6a4795
ggml : remove trailing whitespace (#0)
ggerganov Jul 1, 2025
eff5e45
add GELU_ERF (#14455)
CISC Jul 1, 2025
6a746cf
vulkan: Split large mul_mat_id to fit in shared memory (#14451)
jeffbolznv Jul 1, 2025
343b6e9
CANN: update aclnnGroupedMatmulV2 to aclnnGroupedMatmulV3 (#14411)
noemotiovon Jul 1, 2025
1b2aaf2
Add Vulkan images to docker.md (#14472)
xek Jul 1, 2025
de56944
ci : disable fast-math for Metal GHA CI (#14478)
ggerganov Jul 1, 2025
68b3cd6
ggml : Callback before abort (#14481)
ScaledLizard Jul 2, 2025
85841e1
github : add OpenCL backend to issue templates (#14492)
EZForever Jul 2, 2025
611ba4b
ci : add OpenCL to labeler workflow (#14496)
CISC Jul 2, 2025
603e43d
opencl : update upscale to support align corners (#14488)
lhez Jul 2, 2025
c8a4e47
opencl : skip empty nodes on cgraph compute (#14491)
EZForever Jul 2, 2025
d7f5f4e
simple-chat : fix context-exceeded condition (#14494)
ggerganov Jul 2, 2025
307e79d
opencl : fix possible buffer overflow in dump_tensor (#14490)
jeffzhou2000 Jul 2, 2025
ec68e84
ggml : support bcast ggml_soft_max_ext, ggml_flash_attn_ext (#14435)
ggerganov Jun 27, 2025
8875523
vulkan: support softmax/FA batch and broadcast (#14449)
jeffbolznv Jul 1, 2025
12a81af
CUDA: broadcasting for FlashAttention mask (#14500)
JohannesGaessler Jul 2, 2025
55a1c5a
CUDA: add softmax broadcast (#14475)
am17an Jul 2, 2025
f3ed38d
Set RPATH to "@loader_path" / "$ORIGIN" to ensure executables and dyn…
rotemdan Jul 2, 2025
c46944a
ggml : add version function to get lib version (ggml/1286)
danbev Jul 2, 2025
e17991c
sync : ggml
ggerganov Jul 2, 2025
5d46bab
llama : initial Mamba-2 support (#9126)
compilade Jul 2, 2025
e75ba4c
gguf-py : add support for chat template jinja files (#14508)
CISC Jul 2, 2025
55c2646
CUDA: add dynamic shared mem to softmax, refactor general usage (#14497)
am17an Jul 2, 2025
d4cdd9c
ggml : remove kompute backend (#14501)
ggerganov Jul 3, 2025
9067487
ggml : fix FA mask dim 2 and 3 (#14505)
ggerganov Jul 3, 2025
a70c8a0
kv-cache : use ggml_set_rows (#14285)
ggerganov Jul 3, 2025
0c2ee38
convert : correct gemma 3n conversion (#14450)
ngxson Jul 3, 2025
7b63a71
Fix conditional enabling following arch checks for ggml-sycl (#14504)
s-Nick Jul 3, 2025
c8c4495
ggml: backward pass for split swiglu (#14483)
JohannesGaessler Jul 3, 2025
2b72bed
vulkan: support mixed/deepseekR1 FA head sizes (#14509)
jeffbolznv Jul 3, 2025
bee2842
opencl : broadcast for soft_max (#14510)
lhez Jul 3, 2025
28657a8
ggml : implement GEGLU_ERF and GEGLU_QUICK ops (#14445)
CISC Jul 3, 2025
499a8f5
CANN: Replace aclrtMemsetSync with aclnnInplaceZero operator (#14002)
luyhcsu Jul 4, 2025
c79184d
batch : add n_used count (#14512)
ggerganov Jul 4, 2025
7b50f7c
graph : prepare for 4D mask (#14515)
ggerganov Jul 4, 2025
67d1ef2
batch : add optional for sequential equal split (#14511)
ggerganov Jul 4, 2025
ef797db
metal : disable fast math in all quantize kernels (#14528)
ggerganov Jul 4, 2025
b81510a
test-backend-ops: add support for specifying output format (#14368)
yeahdongcn Jul 5, 2025
bac8bed
eval-callback : check for empty input (#14539)
ggerganov Jul 5, 2025
6681688
opencl: add GELU_ERF (#14476)
CISC Jul 5, 2025
ddef995
server : fix assistant prefilling when content is an array (#14360)
CISC Jul 5, 2025
a0374a6
vulkan: Handle updated FA dim2/3 definition (#14518)
jeffbolznv Jul 5, 2025
e592be1
vulkan: fix rms_norm+mul fusion (#14545)
jeffbolznv Jul 6, 2025
6491d6e
vulkan: increase LOAD_VEC_A to 8 (IQ1/IQ2) or 4 (IQ3) (#14485)
netrunnereve Jul 6, 2025
8b273a5
Merge branch 'layla-build' into merge
l3utterfly Jul 6, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .devops/tools.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash
set -e

# Read the first argument into a variable
Expand Down
2 changes: 1 addition & 1 deletion .github/ISSUE_TEMPLATE/010-bug-compilation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ body:
attributes:
label: GGML backends
description: Which GGML backends do you know to be affected?
options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan]
options: [AMX, BLAS, CPU, CUDA, HIP, Metal, Musa, RPC, SYCL, Vulkan, OpenCL]
multiple: true
validations:
required: true
Expand Down
2 changes: 1 addition & 1 deletion .github/ISSUE_TEMPLATE/011-bug-results.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ body:
attributes:
label: GGML backends
description: Which GGML backends do you know to be affected?
options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan]
options: [AMX, BLAS, CPU, CUDA, HIP, Metal, Musa, RPC, SYCL, Vulkan, OpenCL]
multiple: true
validations:
required: true
Expand Down
11 changes: 5 additions & 6 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,4 @@
# https://github.com/actions/labeler
Kompute:
- changed-files:
- any-glob-to-any-file:
- ggml/include/ggml-kompute.h
- ggml/src/ggml-kompute/**
- README-kompute.md
Apple Metal:
- changed-files:
- any-glob-to-any-file:
Expand Down Expand Up @@ -93,3 +87,8 @@ Ascend NPU:
- ggml/include/ggml-cann.h
- ggml/src/ggml-cann/**
- docs/backend/CANN.md
OpenCL:
- changed-files:
- any-glob-to-any-file:
- ggml/include/ggml-opencl.h
- ggml/src/ggml-opencl/**
30 changes: 15 additions & 15 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ jobs:
-DCMAKE_BUILD_RPATH="@loader_path" \
-DLLAMA_FATAL_WARNINGS=ON \
-DGGML_METAL_USE_BF16=ON \
-DGGML_METAL_EMBED_LIBRARY=ON \
-DGGML_METAL_EMBED_LIBRARY=OFF \
-DGGML_METAL_SHADER_DEBUG=ON \
-DGGML_RPC=ON
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)

Expand Down Expand Up @@ -664,7 +665,7 @@ jobs:
./build-xcframework.sh

windows-msys2:
runs-on: windows-latest
runs-on: windows-2025

strategy:
fail-fast: false
Expand Down Expand Up @@ -714,7 +715,7 @@ jobs:
cmake --build build --config ${{ matrix.build }} -j $(nproc)

windows-latest-cmake:
runs-on: windows-latest
runs-on: windows-2025

env:
OPENBLAS_VERSION: 0.3.23
Expand All @@ -725,17 +726,20 @@ jobs:
matrix:
include:
- build: 'cpu-x64 (static)'
arch: 'x64'
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF'
- build: 'openblas-x64'
arch: 'x64'
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
- build: 'vulkan-x64'
arch: 'x64'
defines: '-DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_VULKAN=ON'
- build: 'llvm-arm64'
arch: 'arm64'
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON'
- build: 'llvm-arm64-opencl-adreno'
arch: 'arm64'
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON'
# - build: 'kompute-x64'
# defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON'

steps:
- name: Clone
Expand All @@ -749,12 +753,6 @@ jobs:
variant: ccache
evict-old-files: 1d

- name: Clone Kompute submodule
id: clone_kompute
if: ${{ matrix.build == 'kompute-x64' }}
run: |
git submodule update --init ggml/src/ggml-kompute/kompute

- name: Download OpenBLAS
id: get_openblas
if: ${{ matrix.build == 'openblas-x64' }}
Expand All @@ -770,7 +768,7 @@ jobs:

- name: Install Vulkan SDK
id: get_vulkan
if: ${{ matrix.build == 'kompute-x64' || matrix.build == 'vulkan-x64' }}
if: ${{ matrix.build == 'vulkan-x64' }}
run: |
curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/vulkansdk-windows-X64-${env:VULKAN_VERSION}.exe"
& "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install
Expand Down Expand Up @@ -805,6 +803,8 @@ jobs:
- name: libCURL
id: get_libcurl
uses: ./.github/actions/windows-setup-curl
with:
architecture: ${{ matrix.arch == 'x64' && 'win64' || 'win64a' }}

- name: Build
id: cmake_build
Expand All @@ -825,7 +825,7 @@ jobs:

- name: Test
id: cmake_test
if: ${{ matrix.build != 'llvm-arm64' && matrix.build != 'llvm-arm64-opencl-adreno' }}
if: ${{ matrix.arch == 'x64' }}
run: |
cd build
ctest -L main -C Release --verbose --timeout 900
Expand Down Expand Up @@ -930,7 +930,7 @@ jobs:
cmake --build build --config Release

windows-latest-cmake-sycl:
runs-on: windows-latest
runs-on: windows-2022

defaults:
run:
Expand Down Expand Up @@ -964,7 +964,7 @@ jobs:

windows-latest-cmake-hip:
if: ${{ github.event.inputs.create_release != 'true' }}
runs-on: windows-latest
runs-on: windows-2022

steps:
- name: Clone
Expand Down
22 changes: 14 additions & 8 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ jobs:
run: |
sysctl -a
cmake -B build \
-DCMAKE_BUILD_RPATH="@loader_path" \
-DCMAKE_INSTALL_RPATH='@loader_path' \
-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
-DLLAMA_FATAL_WARNINGS=ON \
-DGGML_METAL_USE_BF16=ON \
-DGGML_METAL_EMBED_LIBRARY=ON \
Expand Down Expand Up @@ -103,7 +104,8 @@ jobs:
# Metal is disabled due to intermittent failures with Github runners not having a GPU:
# https://github.com/ggml-org/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
cmake -B build \
-DCMAKE_BUILD_RPATH="@loader_path" \
-DCMAKE_INSTALL_RPATH='@loader_path' \
-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
-DLLAMA_FATAL_WARNINGS=ON \
-DGGML_METAL=OFF \
-DGGML_RPC=ON
Expand Down Expand Up @@ -160,6 +162,8 @@ jobs:
id: cmake_build
run: |
cmake -B build \
-DCMAKE_INSTALL_RPATH='$ORIGIN' \
-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
-DGGML_BACKEND_DL=ON \
-DGGML_NATIVE=OFF \
-DGGML_CPU_ALL_VARIANTS=ON \
Expand Down Expand Up @@ -211,6 +215,8 @@ jobs:
id: cmake_build
run: |
cmake -B build \
-DCMAKE_INSTALL_RPATH='$ORIGIN' \
-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
-DGGML_BACKEND_DL=ON \
-DGGML_NATIVE=OFF \
-DGGML_CPU_ALL_VARIANTS=ON \
Expand All @@ -235,7 +241,7 @@ jobs:
name: llama-bin-ubuntu-vulkan-x64.zip

windows-cpu:
runs-on: windows-latest
runs-on: windows-2025

strategy:
matrix:
Expand Down Expand Up @@ -271,7 +277,7 @@ jobs:
env:
CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }}
run: |
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.arch }}
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.arch == 'x64' && 'x64' || 'amd64_arm64' }}
cmake -S . -B build -G "Ninja Multi-Config" ^
-D CMAKE_TOOLCHAIN_FILE=cmake/${{ matrix.arch }}-windows-llvm.cmake ^
-DGGML_NATIVE=OFF ^
Expand All @@ -288,7 +294,7 @@ jobs:
CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }}
run: |
Copy-Item $env:CURL_PATH\bin\libcurl-${{ matrix.arch }}.dll .\build\bin\Release\
Copy-Item "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Redist\MSVC\14.42.34433\debug_nonredist\${{ matrix.arch }}\Microsoft.VC143.OpenMP.LLVM\libomp140.${{ matrix.arch == 'x64' && 'x86_64' || 'aarch64' }}.dll" .\build\bin\Release\
Copy-Item "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Redist\MSVC\14.44.35112\debug_nonredist\${{ matrix.arch }}\Microsoft.VC143.OpenMP.LLVM\libomp140.${{ matrix.arch == 'x64' && 'x86_64' || 'aarch64' }}.dll" .\build\bin\Release\
7z a llama-bin-win-cpu-${{ matrix.arch }}.zip .\build\bin\Release\*

- name: Upload artifacts
Expand All @@ -298,7 +304,7 @@ jobs:
name: llama-bin-win-cpu-${{ matrix.arch }}.zip

windows:
runs-on: windows-latest
runs-on: windows-2025

env:
OPENBLAS_VERSION: 0.3.23
Expand Down Expand Up @@ -448,7 +454,7 @@ jobs:
name: cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip

windows-sycl:
runs-on: windows-latest
runs-on: windows-2022

defaults:
run:
Expand Down Expand Up @@ -520,7 +526,7 @@ jobs:
name: llama-bin-win-sycl-x64.zip

windows-hip:
runs-on: windows-latest
runs-on: windows-2022

strategy:
matrix:
Expand Down
3 changes: 0 additions & 3 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +0,0 @@
[submodule "kompute"]
path = ggml/src/ggml-kompute/kompute
url = https://github.com/nomic-ai/kompute.git
1 change: 0 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,6 @@ endfunction()

llama_option_depr(FATAL_ERROR LLAMA_CUBLAS GGML_CUDA)
llama_option_depr(WARNING LLAMA_CUDA GGML_CUDA)
llama_option_depr(WARNING LLAMA_KOMPUTE GGML_KOMPUTE)
llama_option_depr(WARNING LLAMA_METAL GGML_METAL)
llama_option_depr(WARNING LLAMA_METAL_EMBED_LIBRARY GGML_METAL_EMBED_LIBRARY)
llama_option_depr(WARNING LLAMA_NATIVE GGML_NATIVE)
Expand Down
2 changes: 1 addition & 1 deletion build-xcframework.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash
#
# Options
IOS_MIN_OS_VERSION=16.4
Expand Down
2 changes: 1 addition & 1 deletion ci/run.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env bash
#
# sample usage:
#
Expand Down
10 changes: 10 additions & 0 deletions common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2794,6 +2794,16 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
params.ssl_file_cert = value;
}
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_SSL_CERT_FILE"));
add_opt(common_arg(
{"--chat-template-kwargs"}, "STRING",
string_format("sets additional params for the json template parser"),
[](common_params & params, const std::string & value) {
auto parsed = json::parse(value);
for (const auto & item : parsed.items()) {
params.default_template_kwargs[item.key()] = item.value().dump();
}
}
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_CHAT_TEMPLATE_KWARGS"));
add_opt(common_arg(
{"-to", "--timeout"}, "N",
string_format("server read/write timeout in seconds (default: %d)", params.timeout_read),
Expand Down
Loading
Loading