Skip to content

Commit 9a0093b

Browse files
committed
Merge branch 'master' into dev-refactoring
2 parents 332514c + caf5681 commit 9a0093b

File tree

212 files changed

+38927
-24183
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

212 files changed

+38927
-24183
lines changed

.devops/intel.Dockerfile

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -49,19 +49,23 @@ COPY --from=build /app/full /app
4949

5050
WORKDIR /app
5151

52-
RUN apt-get update \
53-
&& apt-get install -y \
54-
git \
55-
python3 \
56-
python3-pip \
57-
&& pip install --upgrade pip setuptools wheel \
58-
&& pip install -r requirements.txt \
59-
&& apt autoremove -y \
60-
&& apt clean -y \
61-
&& rm -rf /tmp/* /var/tmp/* \
62-
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
63-
&& find /var/cache -type f -delete
64-
52+
RUN apt-get update && \
53+
apt-get install -y \
54+
git \
55+
python3 \
56+
python3-pip \
57+
python3-venv && \
58+
python3 -m venv /opt/venv && \
59+
. /opt/venv/bin/activate && \
60+
pip install --upgrade pip setuptools wheel && \
61+
pip install -r requirements.txt && \
62+
apt autoremove -y && \
63+
apt clean -y && \
64+
rm -rf /tmp/* /var/tmp/* && \
65+
find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
66+
find /var/cache -type f -delete
67+
68+
ENV PATH="/opt/venv/bin:$PATH"
6569

6670
ENTRYPOINT ["/app/tools.sh"]
6771

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
name: Build relocatable cmake package
2+
on:
3+
workflow_dispatch:
4+
workflow_call:
5+
6+
jobs:
7+
linux:
8+
runs-on: ubuntu-24.04
9+
steps:
10+
- uses: actions/checkout@v4
11+
with:
12+
fetch-depth: 0
13+
14+
- name: Install dependencies
15+
run: |
16+
sudo apt update
17+
sudo apt install -y build-essential tcl
18+
19+
- name: Build
20+
run: |
21+
PREFIX="$(pwd)"/inst
22+
cmake -S . -B build -DCMAKE_PREFIX_PATH="$PREFIX" \
23+
-DLLAMA_CURL=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_TOOLS=OFF \
24+
-DLLAMA_BUILD_EXAMPLES=OFF -DCMAKE_BUILD_TYPE=Release
25+
cmake --build build --config Release
26+
cmake --install build --prefix "$PREFIX" --config Release
27+
28+
export LLAMA_CONFIG="$PREFIX"/lib/cmake/llama/llama-config.cmake
29+
tclsh <<'EOF'
30+
set build(commit) [string trim [exec git rev-parse --short HEAD]]
31+
set build(number) [string trim [exec git rev-list --count HEAD]]
32+
set build(version) "0.0.$build(number)"
33+
34+
set llamaconfig [read [open "$env(LLAMA_CONFIG)" r]]
35+
set checks [list "set\\(LLAMA_VERSION \\s+$build(version)\\)" \
36+
"set\\(LLAMA_BUILD_COMMIT\\s+$build(commit)\\)" \
37+
"set\\(LLAMA_BUILD_NUMBER\\s+$build(number)\\)"]
38+
39+
puts -nonewline "Checking llama-config.cmake version... "
40+
foreach check $checks {
41+
if {![regexp -expanded -- $check $llamaconfig]} {
42+
puts "\"$check\" failed!"
43+
exit 1
44+
}
45+
}
46+
puts "success."
47+
EOF
48+
49+
cd examples/simple-cmake-pkg
50+
cmake -S . -B build -DCMAKE_PREFIX_PATH="$PREFIX"/lib/cmake
51+
cmake --build build

.github/workflows/build.yml

Lines changed: 58 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,43 @@ on:
55
push:
66
branches:
77
- master
8-
paths: ['.github/workflows/build.yml', '.github/workflows/build-linux-cross.yml', '**/CMakeLists.txt', '**/.cmake', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp']
8+
paths: [
9+
'.github/workflows/build.yml',
10+
'.github/workflows/build-linux-cross.yml',
11+
'.github/workflows/build-cmake-pkg.yml',
12+
'**/CMakeLists.txt',
13+
'**/.cmake',
14+
'**/*.h',
15+
'**/*.hpp',
16+
'**/*.c',
17+
'**/*.cpp',
18+
'**/*.cu',
19+
'**/*.cuh',
20+
'**/*.swift',
21+
'**/*.m',
22+
'**/*.metal',
23+
'**/*.comp'
24+
]
25+
926
pull_request:
1027
types: [opened, synchronize, reopened]
11-
paths: ['.github/workflows/build.yml', '.github/workflows/build-linux-cross.yml', '**/CMakeLists.txt', '**/.cmake', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp']
28+
paths: [
29+
'.github/workflows/build.yml',
30+
'.github/workflows/build-linux-cross.yml',
31+
'.github/workflows/build-cmake-pkg.yml',
32+
'**/CMakeLists.txt',
33+
'**/.cmake',
34+
'**/*.h',
35+
'**/*.hpp',
36+
'**/*.c',
37+
'**/*.cpp',
38+
'**/*.cu',
39+
'**/*.cuh',
40+
'**/*.swift',
41+
'**/*.m',
42+
'**/*.metal',
43+
'**/*.comp'
44+
]
1245

1346
concurrency:
1447
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
@@ -306,6 +339,7 @@ jobs:
306339
id: cmake_test
307340
run: |
308341
cd build
342+
export GGML_VK_VISIBLE_DEVICES=0
309343
# This is using llvmpipe and runs slower than other backends
310344
ctest -L main --verbose --timeout 3600
311345
@@ -477,6 +511,9 @@ jobs:
477511
build-linux-cross:
478512
uses: ./.github/workflows/build-linux-cross.yml
479513

514+
build-cmake-pkg:
515+
uses: ./.github/workflows/build-cmake-pkg.yml
516+
480517
macOS-latest-cmake-ios:
481518
runs-on: macos-latest
482519

@@ -627,7 +664,7 @@ jobs:
627664
./build-xcframework.sh
628665
629666
windows-msys2:
630-
runs-on: windows-latest
667+
runs-on: windows-2025
631668

632669
strategy:
633670
fail-fast: false
@@ -677,27 +714,33 @@ jobs:
677714
cmake --build build --config ${{ matrix.build }} -j $(nproc)
678715
679716
windows-latest-cmake:
680-
runs-on: windows-latest
717+
runs-on: windows-2025
681718

682719
env:
683720
OPENBLAS_VERSION: 0.3.23
684721
SDE_VERSION: 9.33.0-2024-01-07
685-
VULKAN_VERSION: 1.4.309.0
722+
VULKAN_VERSION: 1.4.313.2
686723

687724
strategy:
688725
matrix:
689726
include:
690-
- build: 'cpu-x64'
691-
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF'
727+
- build: 'cpu-x64 (static)'
728+
arch: 'x64'
729+
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF'
692730
- build: 'openblas-x64'
731+
arch: 'x64'
693732
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
694733
- build: 'vulkan-x64'
695-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_VULKAN=ON'
734+
arch: 'x64'
735+
defines: '-DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_VULKAN=ON'
696736
- build: 'llvm-arm64'
737+
arch: 'arm64'
697738
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON'
698739
- build: 'llvm-arm64-opencl-adreno'
740+
arch: 'arm64'
699741
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON'
700742
# - build: 'kompute-x64'
743+
# arch: 'x64'
701744
# defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON'
702745

703746
steps:
@@ -735,7 +778,7 @@ jobs:
735778
id: get_vulkan
736779
if: ${{ matrix.build == 'kompute-x64' || matrix.build == 'vulkan-x64' }}
737780
run: |
738-
curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe"
781+
curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/vulkansdk-windows-X64-${env:VULKAN_VERSION}.exe"
739782
& "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install
740783
Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}"
741784
Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin"
@@ -768,6 +811,8 @@ jobs:
768811
- name: libCURL
769812
id: get_libcurl
770813
uses: ./.github/actions/windows-setup-curl
814+
with:
815+
architecture: ${{ matrix.arch == 'x64' && 'win64' || 'win64a' }}
771816

772817
- name: Build
773818
id: cmake_build
@@ -777,6 +822,7 @@ jobs:
777822
cmake -S . -B build ${{ matrix.defines }} `
778823
-DCURL_LIBRARY="$env:CURL_PATH/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:CURL_PATH/include"
779824
cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS}
825+
cp $env:CURL_PATH/bin/libcurl-*.dll build/bin/Release
780826
781827
- name: Add libopenblas.dll
782828
id: add_libopenblas_dll
@@ -787,7 +833,7 @@ jobs:
787833
788834
- name: Test
789835
id: cmake_test
790-
if: ${{ matrix.build != 'llvm-arm64' && matrix.build != 'llvm-arm64-opencl-adreno' }}
836+
if: ${{ matrix.arch == 'x64' }}
791837
run: |
792838
cd build
793839
ctest -L main -C Release --verbose --timeout 900
@@ -892,7 +938,7 @@ jobs:
892938
cmake --build build --config Release
893939
894940
windows-latest-cmake-sycl:
895-
runs-on: windows-latest
941+
runs-on: windows-2022
896942

897943
defaults:
898944
run:
@@ -926,7 +972,7 @@ jobs:
926972

927973
windows-latest-cmake-hip:
928974
if: ${{ github.event.inputs.create_release != 'true' }}
929-
runs-on: windows-latest
975+
runs-on: windows-2022
930976

931977
steps:
932978
- name: Clone

.github/workflows/release.yml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ jobs:
235235
name: llama-bin-ubuntu-vulkan-x64.zip
236236

237237
windows-cpu:
238-
runs-on: windows-latest
238+
runs-on: windows-2025
239239

240240
strategy:
241241
matrix:
@@ -271,7 +271,7 @@ jobs:
271271
env:
272272
CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }}
273273
run: |
274-
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.arch }}
274+
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.arch == 'x64' && 'x64' || 'amd64_arm64' }}
275275
cmake -S . -B build -G "Ninja Multi-Config" ^
276276
-D CMAKE_TOOLCHAIN_FILE=cmake/${{ matrix.arch }}-windows-llvm.cmake ^
277277
-DGGML_NATIVE=OFF ^
@@ -288,7 +288,7 @@ jobs:
288288
CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }}
289289
run: |
290290
Copy-Item $env:CURL_PATH\bin\libcurl-${{ matrix.arch }}.dll .\build\bin\Release\
291-
Copy-Item "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Redist\MSVC\14.42.34433\debug_nonredist\${{ matrix.arch }}\Microsoft.VC143.OpenMP.LLVM\libomp140.${{ matrix.arch == 'x64' && 'x86_64' || 'aarch64' }}.dll" .\build\bin\Release\
291+
Copy-Item "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Redist\MSVC\14.44.35112\debug_nonredist\${{ matrix.arch }}\Microsoft.VC143.OpenMP.LLVM\libomp140.${{ matrix.arch == 'x64' && 'x86_64' || 'aarch64' }}.dll" .\build\bin\Release\
292292
7z a llama-bin-win-cpu-${{ matrix.arch }}.zip .\build\bin\Release\*
293293
294294
- name: Upload artifacts
@@ -298,11 +298,11 @@ jobs:
298298
name: llama-bin-win-cpu-${{ matrix.arch }}.zip
299299

300300
windows:
301-
runs-on: windows-latest
301+
runs-on: windows-2025
302302

303303
env:
304304
OPENBLAS_VERSION: 0.3.23
305-
VULKAN_VERSION: 1.4.309.0
305+
VULKAN_VERSION: 1.4.313.2
306306

307307
strategy:
308308
matrix:
@@ -332,7 +332,7 @@ jobs:
332332
id: get_vulkan
333333
if: ${{ matrix.backend == 'vulkan' }}
334334
run: |
335-
curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe"
335+
curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/vulkansdk-windows-X64-${env:VULKAN_VERSION}.exe"
336336
& "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install
337337
Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}"
338338
Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin"
@@ -448,7 +448,7 @@ jobs:
448448
name: cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip
449449

450450
windows-sycl:
451-
runs-on: windows-latest
451+
runs-on: windows-2022
452452

453453
defaults:
454454
run:
@@ -520,7 +520,7 @@ jobs:
520520
name: llama-bin-win-sycl-x64.zip
521521

522522
windows-hip:
523-
runs-on: windows-latest
523+
runs-on: windows-2022
524524

525525
strategy:
526526
matrix:

CMakeLists.txt

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,14 @@ option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured
8989
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
9090
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/common.cmake)
9191

92+
if (NOT DEFINED LLAMA_BUILD_NUMBER)
93+
set(LLAMA_BUILD_NUMBER ${BUILD_NUMBER})
94+
endif()
95+
if (NOT DEFINED LLAMA_BUILD_COMMIT)
96+
set(LLAMA_BUILD_COMMIT ${BUILD_COMMIT})
97+
endif()
98+
set(LLAMA_INSTALL_VERSION 0.0.${LLAMA_BUILD_NUMBER})
99+
92100
# override ggml options
93101
set(GGML_ALL_WARNINGS ${LLAMA_ALL_WARNINGS})
94102
set(GGML_FATAL_WARNINGS ${LLAMA_FATAL_WARNINGS})
@@ -156,6 +164,8 @@ if (LLAMA_USE_SYSTEM_GGML)
156164
endif()
157165

158166
if (NOT TARGET ggml AND NOT LLAMA_USE_SYSTEM_GGML)
167+
set(GGML_BUILD_NUMBER ${LLAMA_BUILD_NUMBER})
168+
set(GGML_BUILD_COMMIT ${LLAMA_BUILD_COMMIT})
159169
add_subdirectory(ggml)
160170
# ... otherwise assume ggml is added by a parent CMakeLists.txt
161171
endif()
@@ -205,10 +215,6 @@ endif()
205215
include(GNUInstallDirs)
206216
include(CMakePackageConfigHelpers)
207217

208-
set(LLAMA_BUILD_NUMBER ${BUILD_NUMBER})
209-
set(LLAMA_BUILD_COMMIT ${BUILD_COMMIT})
210-
set(LLAMA_INSTALL_VERSION 0.0.${BUILD_NUMBER})
211-
212218
set(LLAMA_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header files")
213219
set(LLAMA_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
214220
set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ ifdef LLAMA_SERVER_SSL
367367
endif
368368

369369
ifndef GGML_NO_CPU_AARCH64
370-
MK_CPPFLAGS += -DGGML_USE_CPU_AARCH64
370+
MK_CPPFLAGS += -DGGML_USE_CPU_REPACK
371371
endif
372372

373373
# warnings
@@ -970,7 +970,7 @@ OBJ_GGML = \
970970
$(DIR_GGML)/src/ggml-threading.o \
971971
$(DIR_GGML)/src/ggml-cpu/ggml-cpu.o \
972972
$(DIR_GGML)/src/ggml-cpu/ggml-cpu_cpp.o \
973-
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-aarch64.o \
973+
$(DIR_GGML)/src/ggml-cpu/repack.o \
974974
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-hbm.o \
975975
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-quants.o \
976976
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-traits.o \

README.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
[![Release](https://img.shields.io/github/v/release/ggml-org/llama.cpp)](https://github.com/ggml-org/llama.cpp/releases)
77
[![Server](https://github.com/ggml-org/llama.cpp/actions/workflows/server.yml/badge.svg)](https://github.com/ggml-org/llama.cpp/actions/workflows/server.yml)
88

9-
[Roadmap](https://github.com/users/ggerganov/projects/7) / [Project status](https://github.com/ggml-org/llama.cpp/discussions/3471) / [Manifesto](https://github.com/ggml-org/llama.cpp/discussions/205) / [ggml](https://github.com/ggml-org/ggml)
9+
[Roadmap](https://github.com/users/ggerganov/projects/7) / [Manifesto](https://github.com/ggml-org/llama.cpp/discussions/205) / [ggml](https://github.com/ggml-org/ggml)
1010

1111
Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others) in pure C/C++
1212

@@ -18,7 +18,6 @@ Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others)
1818
## Hot topics
1919

2020
- 🔥 Multimodal support arrived in `llama-server`: [#12898](https://github.com/ggml-org/llama.cpp/pull/12898) | [documentation](./docs/multimodal.md)
21-
- **GGML developer experience survey (organized and reviewed by NVIDIA):** [link](https://forms.gle/Gasw3cRgyhNEnrwK9)
2221
- A new binary `llama-mtmd-cli` is introduced to replace `llava-cli`, `minicpmv-cli`, `gemma3-cli` ([#13012](https://github.com/ggml-org/llama.cpp/pull/13012)) and `qwen2vl-cli` ([#13141](https://github.com/ggml-org/llama.cpp/pull/13141)), `libllava` will be deprecated
2322
- VS Code extension for FIM completions: https://github.com/ggml-org/llama.vscode
2423
- Universal [tool call support](./docs/function-calling.md) in `llama-server` https://github.com/ggml-org/llama.cpp/pull/9639

0 commit comments

Comments
 (0)