Skip to content

Commit f0282b5

Browse files
committed
sync ggml to llama @ b8595b16e69e3029e06be3b8f6635f9812b2bc3f
1 parent 5ebd4ba commit f0282b5

File tree

572 files changed

+73486
-25459
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

572 files changed

+73486
-25459
lines changed

ggml/CMakeLists.txt

Lines changed: 49 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,40 @@
11
cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories.
2-
project("ggml" C CXX)
2+
project("ggml" C CXX ASM)
3+
4+
### GGML Version
5+
set(GGML_VERSION_MAJOR 0)
6+
set(GGML_VERSION_MINOR 9)
7+
set(GGML_VERSION_PATCH 4)
8+
set(GGML_VERSION_BASE "${GGML_VERSION_MAJOR}.${GGML_VERSION_MINOR}.${GGML_VERSION_PATCH}")
9+
10+
find_program(GIT_EXE NAMES git git.exe NO_CMAKE_FIND_ROOT_PATH)
11+
if(GIT_EXE)
12+
# Get current git commit hash
13+
execute_process(COMMAND ${GIT_EXE} rev-parse --short HEAD
14+
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
15+
OUTPUT_VARIABLE GGML_BUILD_COMMIT
16+
OUTPUT_STRIP_TRAILING_WHITESPACE
17+
ERROR_QUIET
18+
)
19+
20+
# Check if the working directory is dirty (i.e., has uncommitted changes)
21+
execute_process(COMMAND ${GIT_EXE} diff-index --quiet HEAD -- .
22+
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
23+
RESULT_VARIABLE GGML_GIT_DIRTY
24+
ERROR_QUIET
25+
)
26+
endif()
27+
28+
# Build the version string with optional dirty flag
29+
set(GGML_VERSION "${GGML_VERSION_BASE}")
30+
if(GGML_GIT_DIRTY AND NOT GGML_GIT_DIRTY EQUAL 0)
31+
set(GGML_VERSION "${GGML_VERSION}-dirty")
32+
endif()
33+
34+
if(NOT GGML_BUILD_COMMIT)
35+
set(GGML_BUILD_COMMIT "unknown")
36+
endif()
37+
338
include(CheckIncludeFileCXX)
439

540
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
@@ -129,18 +164,19 @@ endif()
129164
option(GGML_LASX "ggml: enable lasx" ON)
130165
option(GGML_LSX "ggml: enable lsx" ON)
131166
option(GGML_RVV "ggml: enable rvv" ON)
132-
option(GGML_RV_ZFH "ggml: enable riscv zfh" OFF)
167+
option(GGML_RV_ZFH "ggml: enable riscv zfh" ON)
168+
option(GGML_RV_ZVFH "ggml: enable riscv zvfh" ON)
169+
option(GGML_RV_ZICBOP "ggml: enable riscv zicbop" ON)
133170
option(GGML_XTHEADVECTOR "ggml: enable xtheadvector" OFF)
134-
option(GGML_VXE "ggml: enable vxe" ON)
135-
option(GGML_NNPA "ggml: enable nnpa" OFF) # temp disabled by default, see: https://github.com/ggml-org/llama.cpp/issues/14877
171+
option(GGML_VXE "ggml: enable vxe" ${GGML_NATIVE})
136172

137173
option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF)
138174
set(GGML_CPU_ARM_ARCH "" CACHE STRING "ggml: CPU architecture for ARM")
139175
set(GGML_CPU_POWERPC_CPUTYPE "" CACHE STRING "ggml: CPU type for PowerPC")
140176

141177

142178
if (MINGW)
143-
set(GGML_WIN_VER "0x602" CACHE STRING "ggml: Windows version")
179+
set(GGML_WIN_VER "0xA00" CACHE STRING "ggml: Windows version")
144180
endif()
145181

146182
# ggml core
@@ -158,7 +194,6 @@ option(GGML_CUDA "ggml: use CUDA"
158194
option(GGML_MUSA "ggml: use MUSA" OFF)
159195
option(GGML_CUDA_FORCE_MMQ "ggml: use mmq kernels instead of cuBLAS" OFF)
160196
option(GGML_CUDA_FORCE_CUBLAS "ggml: always use cuBLAS instead of mmq kernels" OFF)
161-
option(GGML_CUDA_F16 "ggml: use 16 bit floats for some calculations" OFF)
162197
set (GGML_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
163198
"ggml: max. batch size for using peer access")
164199
option(GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copies" OFF)
@@ -174,8 +209,8 @@ option(GGML_HIP "ggml: use HIP"
174209
option(GGML_HIP_GRAPHS "ggml: use HIP graph, experimental, slow" OFF)
175210
option(GGML_HIP_NO_VMM "ggml: do not try to use HIP VMM" ON)
176211
option(GGML_HIP_ROCWMMA_FATTN "ggml: enable rocWMMA for FlashAttention" OFF)
177-
option(GGML_HIP_FORCE_ROCWMMA_FATTN_GFX12 "ggml: enable rocWMMA FlashAttention on GFX12" OFF)
178212
option(GGML_HIP_MMQ_MFMA "ggml: enable MFMA MMA for CDNA in MMQ" ON)
213+
option(GGML_HIP_EXPORT_METRICS "ggml: enable kernel perf metrics output" OFF)
179214
option(GGML_MUSA_GRAPHS "ggml: use MUSA graph, experimental, unstable" OFF)
180215
option(GGML_MUSA_MUDNN_COPY "ggml: enable muDNN for accelerated copy" OFF)
181216
option(GGML_VULKAN "ggml: use Vulkan" OFF)
@@ -187,8 +222,11 @@ option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation"
187222
option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF)
188223
option(GGML_WEBGPU "ggml: use WebGPU" OFF)
189224
option(GGML_WEBGPU_DEBUG "ggml: enable WebGPU debug output" OFF)
225+
option(GGML_WEBGPU_CPU_PROFILE "ggml: enable WebGPU profiling (CPU)" OFF)
226+
option(GGML_WEBGPU_GPU_PROFILE "ggml: enable WebGPU profiling (GPU)" OFF)
227+
228+
option(GGML_ZDNN "ggml: use zDNN" OFF)
190229
option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT})
191-
option(GGML_METAL_USE_BF16 "ggml: use bfloat if available" OFF)
192230
option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF)
193231
option(GGML_METAL_SHADER_DEBUG "ggml: compile Metal with -fno-fast-math" OFF)
194232
option(GGML_METAL_EMBED_LIBRARY "ggml: embed Metal library" ${GGML_METAL})
@@ -213,6 +251,8 @@ option(GGML_OPENCL_USE_ADRENO_KERNELS "ggml: use optimized kernels for Adr
213251
set (GGML_OPENCL_TARGET_VERSION "300" CACHE STRING
214252
"gmml: OpenCL API version to target")
215253

254+
option(GGML_HEXAGON "ggml: enable Hexagon backend" OFF)
255+
216256
# toolchain for vulkan-shaders-gen
217257
set (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN "" CACHE FILEPATH "ggml: toolchain file for vulkan-shaders-gen")
218258

@@ -299,26 +339,6 @@ endif()
299339
# Create CMake package
300340
#
301341

302-
# Generate version info based on git commit.
303-
304-
if(NOT DEFINED GGML_BUILD_NUMBER)
305-
find_program(GIT_EXE NAMES git git.exe REQUIRED NO_CMAKE_FIND_ROOT_PATH)
306-
execute_process(COMMAND ${GIT_EXE} rev-list --count HEAD
307-
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
308-
OUTPUT_VARIABLE GGML_BUILD_NUMBER
309-
OUTPUT_STRIP_TRAILING_WHITESPACE
310-
)
311-
312-
if(GGML_BUILD_NUMBER EQUAL 1)
313-
message(WARNING "GGML build version fixed at 1 likely due to a shallow clone.")
314-
endif()
315-
316-
execute_process(COMMAND ${GIT_EXE} rev-parse --short HEAD
317-
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
318-
OUTPUT_VARIABLE GGML_BUILD_COMMIT
319-
OUTPUT_STRIP_TRAILING_WHITESPACE
320-
)
321-
endif()
322342

323343

324344
# Capture variables prefixed with GGML_.
@@ -347,7 +367,7 @@ set(GGML_VARIABLES_EXPANDED ${variable_set_statements})
347367

348368
# Create the CMake package and set install location.
349369

350-
set(GGML_INSTALL_VERSION 0.0.${GGML_BUILD_NUMBER})
370+
set(GGML_INSTALL_VERSION ${GGML_VERSION})
351371
set(GGML_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header files")
352372
set(GGML_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
353373
set(GGML_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")

ggml/cmake/ggml-config.cmake.in

Lines changed: 42 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ if(NOT TARGET ggml::ggml)
106106

107107
find_library(GGML_LIBRARY ggml
108108
REQUIRED
109-
HINTS ${GGML_LIB_DIR} ${GGML_BACKEND_DIR}
109+
HINTS ${GGML_LIB_DIR}
110110
NO_CMAKE_FIND_ROOT_PATH)
111111

112112
add_library(ggml::ggml UNKNOWN IMPORTED)
@@ -125,54 +125,56 @@ if(NOT TARGET ggml::ggml)
125125
IMPORTED_LOCATION "${GGML_BASE_LIBRARY}")
126126

127127
set(_ggml_all_targets "")
128-
foreach(_ggml_backend ${GGML_AVAILABLE_BACKENDS})
129-
string(REPLACE "-" "_" _ggml_backend_pfx "${_ggml_backend}")
130-
string(TOUPPER "${_ggml_backend_pfx}" _ggml_backend_pfx)
131-
132-
find_library(${_ggml_backend_pfx}_LIBRARY ${_ggml_backend}
133-
REQUIRED
134-
HINTS ${GGML_LIB_DIR}
135-
NO_CMAKE_FIND_ROOT_PATH)
136-
137-
message(STATUS "Found ${${_ggml_backend_pfx}_LIBRARY}")
138-
139-
add_library(ggml::${_ggml_backend} UNKNOWN IMPORTED)
140-
set_target_properties(ggml::${_ggml_backend}
141-
PROPERTIES
142-
INTERFACE_INCLUDE_DIRECTORIES "${GGML_INCLUDE_DIR}"
143-
IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
144-
IMPORTED_LOCATION "${${_ggml_backend_pfx}_LIBRARY}"
145-
INTERFACE_COMPILE_FEATURES c_std_90
146-
POSITION_INDEPENDENT_CODE ON)
147-
148-
string(REGEX MATCH "^ggml-cpu" is_cpu_variant "${_ggml_backend}")
149-
if(is_cpu_variant)
150-
list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES "ggml::ggml-base")
151-
set_target_properties(ggml::${_ggml_backend}
152-
PROPERTIES
153-
INTERFACE_LINK_LIBRARIES "${GGML_CPU_INTERFACE_LINK_LIBRARIES}")
128+
if (NOT GGML_BACKEND_DL)
129+
foreach(_ggml_backend ${GGML_AVAILABLE_BACKENDS})
130+
string(REPLACE "-" "_" _ggml_backend_pfx "${_ggml_backend}")
131+
string(TOUPPER "${_ggml_backend_pfx}" _ggml_backend_pfx)
154132

155-
if(GGML_CPU_INTERFACE_LINK_OPTIONS)
156-
set_target_properties(ggml::${_ggml_backend}
157-
PROPERTIES
158-
INTERFACE_LINK_OPTIONS "${GGML_CPU_INTERFACE_LINK_OPTIONS}")
159-
endif()
133+
find_library(${_ggml_backend_pfx}_LIBRARY ${_ggml_backend}
134+
REQUIRED
135+
HINTS ${GGML_LIB_DIR}
136+
NO_CMAKE_FIND_ROOT_PATH)
137+
138+
message(STATUS "Found ${${_ggml_backend_pfx}_LIBRARY}")
160139

161-
else()
162-
list(APPEND ${_ggml_backend_pfx}_INTERFACE_LINK_LIBRARIES "ggml::ggml-base")
140+
add_library(ggml::${_ggml_backend} UNKNOWN IMPORTED)
163141
set_target_properties(ggml::${_ggml_backend}
164142
PROPERTIES
165-
INTERFACE_LINK_LIBRARIES "${${_ggml_backend_pfx}_INTERFACE_LINK_LIBRARIES}")
143+
INTERFACE_INCLUDE_DIRECTORIES "${GGML_INCLUDE_DIR}"
144+
IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
145+
IMPORTED_LOCATION "${${_ggml_backend_pfx}_LIBRARY}"
146+
INTERFACE_COMPILE_FEATURES c_std_90
147+
POSITION_INDEPENDENT_CODE ON)
148+
149+
string(REGEX MATCH "^ggml-cpu" is_cpu_variant "${_ggml_backend}")
150+
if(is_cpu_variant)
151+
list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES "ggml::ggml-base")
152+
set_target_properties(ggml::${_ggml_backend}
153+
PROPERTIES
154+
INTERFACE_LINK_LIBRARIES "${GGML_CPU_INTERFACE_LINK_LIBRARIES}")
155+
156+
if(GGML_CPU_INTERFACE_LINK_OPTIONS)
157+
set_target_properties(ggml::${_ggml_backend}
158+
PROPERTIES
159+
INTERFACE_LINK_OPTIONS "${GGML_CPU_INTERFACE_LINK_OPTIONS}")
160+
endif()
166161

167-
if(${_ggml_backend_pfx}_INTERFACE_LINK_OPTIONS)
162+
else()
163+
list(APPEND ${_ggml_backend_pfx}_INTERFACE_LINK_LIBRARIES "ggml::ggml-base")
168164
set_target_properties(ggml::${_ggml_backend}
169165
PROPERTIES
170-
INTERFACE_LINK_OPTIONS "${${_ggml_backend_pfx}_INTERFACE_LINK_OPTIONS}")
166+
INTERFACE_LINK_LIBRARIES "${${_ggml_backend_pfx}_INTERFACE_LINK_LIBRARIES}")
167+
168+
if(${_ggml_backend_pfx}_INTERFACE_LINK_OPTIONS)
169+
set_target_properties(ggml::${_ggml_backend}
170+
PROPERTIES
171+
INTERFACE_LINK_OPTIONS "${${_ggml_backend_pfx}_INTERFACE_LINK_OPTIONS}")
172+
endif()
171173
endif()
172-
endif()
173174

174-
list(APPEND _ggml_all_targets ggml::${_ggml_backend})
175-
endforeach()
175+
list(APPEND _ggml_all_targets ggml::${_ggml_backend})
176+
endforeach()
177+
endif()
176178

177179
list(APPEND GGML_INTERFACE_LINK_LIBRARIES ggml::ggml-base "${_ggml_all_targets}")
178180
set_target_properties(ggml::ggml

ggml/include/ggml-backend.h

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,8 @@ extern "C" {
132132
GGML_BACKEND_DEVICE_TYPE_CPU,
133133
// GPU device using dedicated memory
134134
GGML_BACKEND_DEVICE_TYPE_GPU,
135+
// integrated GPU device using host memory
136+
GGML_BACKEND_DEVICE_TYPE_IGPU,
135137
// accelerator devices intended to be used together with the CPU backend (e.g. BLAS or AMX)
136138
GGML_BACKEND_DEVICE_TYPE_ACCEL
137139
};
@@ -150,11 +152,21 @@ extern "C" {
150152

151153
// all the device properties
152154
struct ggml_backend_dev_props {
155+
// device name
153156
const char * name;
157+
// device description
154158
const char * description;
159+
// device free memory in bytes
155160
size_t memory_free;
161+
// device total memory in bytes
156162
size_t memory_total;
163+
// device type
157164
enum ggml_backend_dev_type type;
165+
// device id
166+
// for PCI devices, this should be the PCI bus id formatted as "domain:bus:device.function" (e.g. "0000:01:00.0")
167+
// if the id is unknown, this should be NULL
168+
const char * device_id;
169+
// device capabilities
158170
struct ggml_backend_dev_caps caps;
159171
};
160172

@@ -203,6 +215,8 @@ extern "C" {
203215
// Backend registry
204216
//
205217

218+
GGML_API void ggml_backend_register(ggml_backend_reg_t reg);
219+
206220
GGML_API void ggml_backend_device_register(ggml_backend_dev_t device);
207221

208222
// Backend (reg) enumeration
@@ -302,11 +316,15 @@ extern "C" {
302316
GGML_API int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched);
303317
GGML_API int ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched);
304318

305-
GGML_API size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend);
319+
GGML_API ggml_backend_buffer_type_t ggml_backend_sched_get_buffer_type(ggml_backend_sched_t sched, ggml_backend_t backend);
320+
GGML_API size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend);
306321

307322
GGML_API void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
308323
GGML_API ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node);
309324

325+
// Split graph without allocating it
326+
GGML_API void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
327+
310328
// Allocate and compute graph on the backend scheduler
311329
GGML_API bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph); // returns success
312330
GGML_API enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph);

ggml/include/ggml-cpu.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,6 @@ extern "C" {
101101
GGML_BACKEND_API int ggml_cpu_has_riscv_v (void);
102102
GGML_BACKEND_API int ggml_cpu_has_vsx (void);
103103
GGML_BACKEND_API int ggml_cpu_has_vxe (void);
104-
GGML_BACKEND_API int ggml_cpu_has_nnpa (void);
105104
GGML_BACKEND_API int ggml_cpu_has_wasm_simd (void);
106105
GGML_BACKEND_API int ggml_cpu_has_llamafile (void);
107106

@@ -135,6 +134,7 @@ extern "C" {
135134
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cpu_reg(void);
136135

137136
GGML_BACKEND_API void ggml_cpu_fp32_to_fp32(const float *, float *, int64_t);
137+
GGML_BACKEND_API void ggml_cpu_fp32_to_i32 (const float *, int32_t *, int64_t);
138138
GGML_BACKEND_API void ggml_cpu_fp32_to_fp16(const float *, ggml_fp16_t *, int64_t);
139139
GGML_BACKEND_API void ggml_cpu_fp16_to_fp32(const ggml_fp16_t *, float *, int64_t);
140140
GGML_BACKEND_API void ggml_cpu_fp32_to_bf16(const float *, ggml_bf16_t *, int64_t);

ggml/include/ggml-hexagon.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#pragma once
2+
3+
#include "ggml.h"
4+
#include "ggml-backend.h"
5+
6+
#ifdef __cplusplus
7+
extern "C" {
8+
#endif
9+
10+
// backend API
11+
GGML_BACKEND_API ggml_backend_t ggml_backend_hexagon_init(void);
12+
13+
GGML_BACKEND_API bool ggml_backend_is_hexagon(ggml_backend_t backend);
14+
15+
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_hexagon_reg(void);
16+
17+
#ifdef __cplusplus
18+
}
19+
#endif

ggml/include/ggml-metal.h

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,18 +39,13 @@ extern "C" {
3939
// user-code should use only these functions
4040
//
4141

42+
// TODO: remove in the future
4243
GGML_BACKEND_API ggml_backend_t ggml_backend_metal_init(void);
4344

4445
GGML_BACKEND_API bool ggml_backend_is_metal(ggml_backend_t backend);
4546

46-
GGML_DEPRECATED(
47-
GGML_BACKEND_API ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t size, size_t max_size),
48-
"obsoleted by the new device interface - https://github.com/ggml-org/llama.cpp/pull/9713");
49-
5047
GGML_BACKEND_API void ggml_backend_metal_set_abort_callback(ggml_backend_t backend, ggml_abort_callback abort_callback, void * user_data);
5148

52-
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void);
53-
5449
// helper to check if the device supports a specific family
5550
// ideally, the user code should be doing these checks
5651
// ref: https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf

0 commit comments

Comments
 (0)