Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
e1df78f
Told cmake to install ggml-cpp.h as a public header file. (ggml/1126)
petterreinholdtsen Feb 26, 2025
0ad15e3
cuda/cpu: Increase support for fp16 unary operations (ggml/1125)
cmdr2 Feb 28, 2025
a34489c
cuda/vulkan: specify fp32-only support for some operations in support…
cmdr2 Feb 28, 2025
0c02f62
cuda: unary ops as float + de-duplicate (ggml/1130)
cmdr2 Mar 3, 2025
0894863
ggml-cpu: Fix build with sve (llama/12059)
MollySophia Feb 25, 2025
efe4017
vulkan: fix assertion when qy_needs_dequant (llama/12068)
jeffbolznv Feb 25, 2025
6e7f839
cmake: Fix ggml backend dependencies and installation (llama/11818)
vvuksanovic Feb 27, 2025
c270cea
vulkan: improve im2col (llama/11826)
daniandtheweb Feb 28, 2025
cd143b8
vulkan: matmul dequantization improvements (llama/12015)
netrunnereve Feb 28, 2025
a252113
CANN: Fix build error with GCC 13 (llama/11990)
hipudding Feb 28, 2025
be928d3
ggml: aarch64: implement SVE kernels for q2_k_q8_k vector dot (llama/…
Vithulep Feb 28, 2025
00b059e
CUDA: fix logic for V100 + GGML_CUDA_FORCE_MMQ (llama/12098)
JohannesGaessler Feb 28, 2025
4f45b43
vulkan: add specific MMV kernels for IQ2 and IQ3 quants + optimizatio…
remyoudompheng Feb 28, 2025
2724346
ggml : upgrade init_tensor API to return a ggml_status (llama/11854)
WilliamTambellini Feb 28, 2025
85e0461
CUDA: compress mode option and default to size (llama/12029)
Green-Sky Mar 1, 2025
aa27e01
ggml-backend : keep paths in native string type when possible (llama/…
slaren Mar 2, 2025
5167979
SYCL: Move CPY kernels to a separate file and add few missing kernels…
qnixsynapse Mar 3, 2025
7914670
ggml : fix kleidiai build (llama/12159)
ag2s20150909 Mar 3, 2025
9a517d6
HIP: implement FlashAttention via rocWMMA for CDNA and RDNA3+ (llama/…
hjc4869 Mar 3, 2025
e38d29c
ggml : portability fixes for VS 2017 (llama/12150)
mgroeber9110 Mar 4, 2025
66e42fa
vulkan : sync (llama/0)
ggerganov Mar 4, 2025
ea027a4
ggml : ggml_compute_forward_concat() for arbitrary tensor type (ggml/…
vmobilis Mar 7, 2025
6979ba8
ggml : fix GGMLMetalClass ODR (llama/12200)
pminev Mar 5, 2025
c19d8eb
SYCL: Disable f16 Unary OPs as not supported by the kernels (llama/12…
qnixsynapse Mar 5, 2025
2b2c567
ggml-cpu: Faster IQ1 mul_mat_vec on AVX2 using BMI2 instructions (lla…
remyoudompheng Mar 6, 2025
dccfba4
opencl : fix profile-related errors (llama/12095)
simon886212 Mar 6, 2025
03b31a0
opencl : fix `ulong` kernel args were set from `int` variables (llama…
linehill Mar 6, 2025
c6e70f7
opencl : fix buffer alignment (llama/12197)
linehill Mar 6, 2025
8a3a93e
HIP/CUDA: set the paramerter value in maintain_cuda_graph instead of …
IMbackK Mar 6, 2025
425cefd
CUDA: fix FA logic for PTX 7.0 and CC >= 7.5 (llama/12222)
JohannesGaessler Mar 6, 2025
f755166
cmake : fix undefined reference errors for std::filesystem in ggml (#…
hbuxiaofei Mar 6, 2025
dbf9384
opencl: Noncontiguous `norm`, `rms_norm`, disable `fp16` for some ops…
lhez Mar 7, 2025
41079a9
metal : fix default.metallib build (llama/12224)
danbev Mar 7, 2025
5141a2c
metal : simplify kernel arguments using a struct (ggml/3229) (llama/1…
BB-fat Mar 7, 2025
16754cd
ggml-cpu: faster AVX2 variant for IQ1_M (llama/12216)
remyoudompheng Mar 7, 2025
0b4956d
sync : ggml
ggerganov Mar 8, 2025
ed5c494
cmake : fix ggml-config (ggml/0)
ggerganov Mar 8, 2025
209e1f3
objc : fix build, tmp remove GPU support, use C++17
ggerganov Mar 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 2 additions & 12 deletions examples/whisper.objc/whisper.objc.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

/* Begin PBXBuildFile section */
1844471A2AB211A2007D6BFE /* ggml-alloc.c in Sources */ = {isa = PBXBuildFile; fileRef = 184447182AB211A2007D6BFE /* ggml-alloc.c */; };
1844471C2AB21655007D6BFE /* ggml-metal.m in Sources */ = {isa = PBXBuildFile; fileRef = 1844471B2AB21655007D6BFE /* ggml-metal.m */; settings = {COMPILER_FLAGS = "-framework Foundation -framework Metal -framework MetalKit -fno-objc-arc"; }; };
18627C7B29052BDF00BD2A04 /* AppDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = 18627C7A29052BDF00BD2A04 /* AppDelegate.m */; };
18627C7E29052BDF00BD2A04 /* SceneDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = 18627C7D29052BDF00BD2A04 /* SceneDelegate.m */; };
18627C8129052BDF00BD2A04 /* ViewController.m in Sources */ = {isa = PBXBuildFile; fileRef = 18627C8029052BDF00BD2A04 /* ViewController.m */; };
Expand All @@ -19,8 +18,6 @@
18627C9429052C4900BD2A04 /* whisper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18627C9329052C4900BD2A04 /* whisper.cpp */; settings = {COMPILER_FLAGS = "-DWHISPER_USE_COREML -DWHISPER_COREML_ALLOW_FALLBACK -DGGML_USE_METAL"; }; };
18627C9629052C5800BD2A04 /* ggml.c in Sources */ = {isa = PBXBuildFile; fileRef = 18627C9529052C5800BD2A04 /* ggml.c */; settings = {COMPILER_FLAGS = "-DGGML_USE_ACCELERATE -DGGML_USE_METAL"; }; };
18627C9B29052CFF00BD2A04 /* ggml-base.en.bin in Resources */ = {isa = PBXBuildFile; fileRef = 18627C9A29052CFF00BD2A04 /* ggml-base.en.bin */; };
18A276062C2A98A5001C8D37 /* ggml-metal.metal in Copy Files */ = {isa = PBXBuildFile; fileRef = 1844471D2AB2195F007D6BFE /* ggml-metal.metal */; };
18A2760B2C2A9B43001C8D37 /* ggml-metal.metal in Resources */ = {isa = PBXBuildFile; fileRef = 1844471D2AB2195F007D6BFE /* ggml-metal.metal */; };
18ABE15A2AF556340044A204 /* ggml-backend.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1572AF556340044A204 /* ggml-backend.cpp */; };
18ABE15B2AF556340044A204 /* ggml-quants.c in Sources */ = {isa = PBXBuildFile; fileRef = 18ABE1592AF556340044A204 /* ggml-quants.c */; };
18E864A92CE73C1E0094B8B3 /* ggml-cpu.c in Sources */ = {isa = PBXBuildFile; fileRef = 18E864A82CE73C1E0094B8B3 /* ggml-cpu.c */; };
Expand All @@ -44,7 +41,6 @@
dstPath = "";
dstSubfolderSpec = 7;
files = (
18A276062C2A98A5001C8D37 /* ggml-metal.metal in Copy Files */,
);
name = "Copy Files";
runOnlyForDeploymentPostprocessing = 0;
Expand All @@ -54,8 +50,6 @@
/* Begin PBXFileReference section */
184447182AB211A2007D6BFE /* ggml-alloc.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = "ggml-alloc.c"; path = "../../../ggml/src/ggml-alloc.c"; sourceTree = "<group>"; };
184447192AB211A2007D6BFE /* ggml-alloc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-alloc.h"; path = "../../../ggml/include/ggml-alloc.h"; sourceTree = "<group>"; };
1844471B2AB21655007D6BFE /* ggml-metal.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = "ggml-metal.m"; path = "../../../ggml/src/ggml-metal/ggml-metal.m"; sourceTree = "<group>"; };
1844471D2AB2195F007D6BFE /* ggml-metal.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; name = "ggml-metal.metal"; path = "../../../ggml/src/ggml-metal/ggml-metal.metal"; sourceTree = "<group>"; };
18627C7629052BDF00BD2A04 /* whisper.objc.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = whisper.objc.app; sourceTree = BUILT_PRODUCTS_DIR; };
18627C7929052BDF00BD2A04 /* AppDelegate.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = AppDelegate.h; sourceTree = "<group>"; };
18627C7A29052BDF00BD2A04 /* AppDelegate.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = AppDelegate.m; sourceTree = "<group>"; };
Expand All @@ -73,7 +67,6 @@
18627C9529052C5800BD2A04 /* ggml.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = ggml.c; path = ../../../ggml/src/ggml.c; sourceTree = "<group>"; };
18627C9729052C6600BD2A04 /* ggml.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ggml.h; path = ../../../ggml/include/ggml.h; sourceTree = "<group>"; };
18627C9A29052CFF00BD2A04 /* ggml-base.en.bin */ = {isa = PBXFileReference; lastKnownFileType = archive.macbinary; name = "ggml-base.en.bin"; path = "../../../models/ggml-base.en.bin"; sourceTree = "<group>"; };
18A275FE2C2A94DE001C8D37 /* ggml-metal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-metal.h"; path = "../../../ggml/include/ggml-metal.h"; sourceTree = "<group>"; };
18A275FF2C2A9563001C8D37 /* ggml-common.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-common.h"; path = "../../../ggml/src/ggml-common.h"; sourceTree = "<group>"; };
18ABE1542AF556340044A204 /* ggml-quants.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-quants.h"; path = "../../../ggml/src/ggml-quants.h"; sourceTree = "<group>"; };
18ABE1552AF556340044A204 /* ggml-backend.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "ggml-backend.h"; path = "../../../ggml/include/ggml-backend.h"; sourceTree = "<group>"; };
Expand Down Expand Up @@ -153,15 +146,12 @@
18E864AA2CE73C580094B8B3 /* ggml-cpu.h */,
18E864A82CE73C1E0094B8B3 /* ggml-cpu.c */,
18A275FF2C2A9563001C8D37 /* ggml-common.h */,
18A275FE2C2A94DE001C8D37 /* ggml-metal.h */,
18ABE1562AF556340044A204 /* ggml-backend-impl.h */,
18ABE1572AF556340044A204 /* ggml-backend.cpp */,
18ABE1552AF556340044A204 /* ggml-backend.h */,
18ABE1582AF556340044A204 /* ggml-impl.h */,
18ABE1592AF556340044A204 /* ggml-quants.c */,
18ABE1542AF556340044A204 /* ggml-quants.h */,
1844471D2AB2195F007D6BFE /* ggml-metal.metal */,
1844471B2AB21655007D6BFE /* ggml-metal.m */,
184447182AB211A2007D6BFE /* ggml-alloc.c */,
184447192AB211A2007D6BFE /* ggml-alloc.h */,
7FE3424E2A0C418A0015A058 /* ggml-base.en-encoder.mlmodelc */,
Expand Down Expand Up @@ -258,7 +248,6 @@
isa = PBXResourcesBuildPhase;
buildActionMask = 2147483647;
files = (
18A2760B2C2A9B43001C8D37 /* ggml-metal.metal in Resources */,
18627C8929052BE000BD2A04 /* LaunchScreen.storyboard in Resources */,
7FE3424F2A0C418A0015A058 /* ggml-base.en-encoder.mlmodelc in Resources */,
18627C8629052BE000BD2A04 /* Assets.xcassets in Resources */,
Expand Down Expand Up @@ -293,7 +282,6 @@
18627C7E29052BDF00BD2A04 /* SceneDelegate.m in Sources */,
433188B82D3A187C00E3FE79 /* gguf.cpp in Sources */,
18F8C0BC2CEDF4DC00CAD607 /* ggml-threading.cpp in Sources */,
1844471C2AB21655007D6BFE /* ggml-metal.m in Sources */,
7FE3424B2A0C3FA20015A058 /* whisper-encoder-impl.m in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
Expand Down Expand Up @@ -443,6 +431,7 @@
buildSettings = {
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
CLANG_CXX_LANGUAGE_STANDARD = "c++17";
CODE_SIGN_STYLE = Automatic;
CURRENT_PROJECT_VERSION = 1;
DEVELOPMENT_TEAM = P8JZH34X63;
Expand Down Expand Up @@ -473,6 +462,7 @@
buildSettings = {
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
CLANG_CXX_LANGUAGE_STANDARD = "c++17";
CODE_SIGN_STYLE = Automatic;
CURRENT_PROJECT_VERSION = 1;
DEVELOPMENT_TEAM = P8JZH34X63;
Expand Down
9 changes: 8 additions & 1 deletion ggml/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ option(GGML_CPU_KLEIDIAI "ggml: use KleidiAI optimized kernels if applicable
option(GGML_AVX "ggml: enable AVX" ${INS_ENB})
option(GGML_AVX_VNNI "ggml: enable AVX-VNNI" OFF)
option(GGML_AVX2 "ggml: enable AVX2" ${INS_ENB})
option(GGML_BMI2 "ggml: enable BMI2" ${INS_ENB})
option(GGML_AVX512 "ggml: enable AVX512F" OFF)
option(GGML_AVX512_VBMI "ggml: enable AVX512-VBMI" OFF)
option(GGML_AVX512_VNNI "ggml: enable AVX512-VNNI" OFF)
Expand Down Expand Up @@ -155,10 +156,14 @@ option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM"
option(GGML_CUDA_FA "ggml: compile ggml FlashAttention CUDA kernels" ON)
option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF)
option(GGML_CUDA_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" ${GGML_CUDA_GRAPHS_DEFAULT})
set (GGML_CUDA_COMPRESSION_MODE "size" CACHE STRING
"ggml: cuda link binary compression mode; requires cuda 12.8+")
set_property(CACHE GGML_CUDA_COMPRESSION_MODE PROPERTY STRINGS "none;speed;balance;size")

option(GGML_HIP "ggml: use HIP" OFF)
option(GGML_HIP_GRAPHS "ggml: use HIP graph, experimental, slow" OFF)
option(GGML_HIP_NO_VMM "ggml: do not try to use HIP VMM" ON)
option(GGML_HIP_ROCWMMA_FATTN "ggml: enable rocWMMA for FlashAttention" OFF)
option(GGML_HIP_UMA "ggml: use HIP unified memory architecture" OFF)
option(GGML_VULKAN "ggml: use Vulkan" OFF)
option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF)
Expand Down Expand Up @@ -212,6 +217,8 @@ set(THREADS_PREFER_PTHREAD_FLAG ON)

find_package(Threads REQUIRED)

include(GNUInstallDirs)

#
# build the library
#
Expand All @@ -235,7 +242,6 @@ endif ()
# install
#

include(GNUInstallDirs)
include(CMakePackageConfigHelpers)

# all public headers
Expand All @@ -246,6 +252,7 @@ set(GGML_PUBLIC_HEADERS
include/ggml-backend.h
include/ggml-blas.h
include/ggml-cann.h
include/ggml-cpp.h
include/ggml-cuda.h
include/ggml-kompute.h
include/ggml-opt.h
Expand Down
9 changes: 7 additions & 2 deletions ggml/cmake/ggml-config.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ foreach(_ggml_backend ${GGML_AVAILABLE_BACKENDS})

string(REGEX MATCH "^ggml-cpu" is_cpu_variant "${_ggml_backend}")
if(is_cpu_variant)
list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES "ggml::ggml" "ggml::ggml-base")
list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES "ggml::ggml-base")
set_target_properties(ggml::${_ggml_backend}
PROPERTIES
INTERFACE_LINK_LIBRARIES "${GGML_CPU_INTERFACE_LINK_LIBRARIES}")
Expand All @@ -124,7 +124,7 @@ foreach(_ggml_backend ${GGML_AVAILABLE_BACKENDS})
endif()

else()
list(APPEND ${_ggml_backend_pfx}_INTERFACE_LINK_LIBRARIES "ggml::ggml" "ggml::ggml-base")
list(APPEND ${_ggml_backend_pfx}_INTERFACE_LINK_LIBRARIES "ggml::ggml-base")
set_target_properties(ggml::${_ggml_backend}
PROPERTIES
INTERFACE_LINK_LIBRARIES "${${_ggml_backend_pfx}_INTERFACE_LINK_LIBRARIES}")
Expand All @@ -139,6 +139,11 @@ foreach(_ggml_backend ${GGML_AVAILABLE_BACKENDS})
list(APPEND _ggml_all_targets ggml::${_ggml_backend})
endforeach()

list(APPEND GGML_INTERFACE_LINK_LIBRARIES ggml::ggml-base "${_ggml_all_targets}")
set_target_properties(ggml::ggml
PROPERTIES
INTERFACE_LINK_LIBRARIES "${GGML_INTERFACE_LINK_LIBRARIES}")

add_library(ggml::all INTERFACE IMPORTED)
set_target_properties(ggml::all
PROPERTIES
Expand Down
2 changes: 1 addition & 1 deletion ggml/include/ggml-alloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ struct ggml_tallocr {
};

GGML_API struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer);
GGML_API void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor);
GGML_API enum ggml_status ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor);

// Graph allocator
/*
Expand Down
6 changes: 3 additions & 3 deletions ggml/include/ggml-backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ extern "C" {
GGML_API void ggml_backend_buffer_free (ggml_backend_buffer_t buffer);
GGML_API void * ggml_backend_buffer_get_base (ggml_backend_buffer_t buffer);
GGML_API size_t ggml_backend_buffer_get_size (ggml_backend_buffer_t buffer);
GGML_API void ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
GGML_API enum ggml_status ggml_backend_buffer_init_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
GGML_API size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
GGML_API size_t ggml_backend_buffer_get_max_size (ggml_backend_buffer_t buffer);
GGML_API size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
Expand Down Expand Up @@ -342,8 +342,8 @@ extern "C" {
GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);

// Tensor initialization
GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
GGML_API void ggml_backend_view_init(struct ggml_tensor * tensor);
GGML_API enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
GGML_API enum ggml_status ggml_backend_view_init(struct ggml_tensor * tensor);

// CPU buffer types are always available
GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
Expand Down
1 change: 1 addition & 0 deletions ggml/include/ggml-cpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ extern "C" {
GGML_BACKEND_API int ggml_cpu_has_avx (void);
GGML_BACKEND_API int ggml_cpu_has_avx_vnni (void);
GGML_BACKEND_API int ggml_cpu_has_avx2 (void);
GGML_BACKEND_API int ggml_cpu_has_bmi2 (void);
GGML_BACKEND_API int ggml_cpu_has_f16c (void);
GGML_BACKEND_API int ggml_cpu_has_fma (void);
GGML_BACKEND_API int ggml_cpu_has_avx512 (void);
Expand Down
6 changes: 5 additions & 1 deletion ggml/include/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -2140,7 +2140,11 @@ extern "C" {
# define GGML_RESTRICT
# endif
#else
# define GGML_RESTRICT restrict
# if defined (_MSC_VER) && (__STDC_VERSION__ < 201112L)
# define GGML_RESTRICT __restrict
# else
# define GGML_RESTRICT restrict
# endif
#endif
typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
Expand Down
14 changes: 7 additions & 7 deletions ggml/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ add_library(ggml
target_link_libraries(ggml PUBLIC ggml-base)

if (CMAKE_SYSTEM_NAME MATCHES "Linux")
target_link_libraries(ggml PRIVATE dl)
target_link_libraries(ggml PRIVATE dl stdc++fs)
endif()

function(ggml_add_backend_library backend)
Expand Down Expand Up @@ -289,7 +289,7 @@ function(ggml_add_cpu_backend_variant tag_name)
set(GGML_CPU_TAG_NAME ${tag_name})
# other: OPENMP LLAMAFILE CPU_HBM
foreach (feat NATIVE
AVX AVX2 AVX_VNNI FMA F16C
AVX AVX2 BMI2 AVX_VNNI FMA F16C
AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16
AMX_TILE AMX_INT8 AMX_BF16)
set(GGML_${feat} OFF)
Expand All @@ -309,13 +309,13 @@ if (GGML_CPU_ALL_VARIANTS)
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL")
endif()
ggml_add_cpu_backend_variant(sandybridge AVX)
ggml_add_cpu_backend_variant(haswell AVX F16C AVX2 FMA)
ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 FMA AVX512)
ggml_add_cpu_backend_variant(icelake AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
ggml_add_cpu_backend_variant(alderlake AVX F16C AVX2 FMA AVX_VNNI)
ggml_add_cpu_backend_variant(haswell AVX F16C AVX2 BMI2 FMA)
ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 BMI2 FMA AVX512)
ggml_add_cpu_backend_variant(icelake AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
ggml_add_cpu_backend_variant(alderlake AVX F16C AVX2 BMI2 FMA AVX_VNNI)
if (NOT MSVC)
# MSVC doesn't support AMX
ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
ggml_add_cpu_backend_variant(sapphirerapids AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
endif()
elseif (GGML_CPU)
ggml_add_cpu_backend_variant_impl("")
Expand Down
39 changes: 24 additions & 15 deletions ggml/src/ggml-alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer) {
return talloc;
}

void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor) {
enum ggml_status ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor) {
size_t size = ggml_backend_buffer_get_alloc_size(talloc->buffer, tensor);
size = GGML_PAD(size, talloc->alignment);

Expand All @@ -104,7 +104,7 @@ void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tenso

assert(((uintptr_t)addr % talloc->alignment) == 0);

ggml_backend_tensor_alloc(talloc->buffer, tensor, addr);
return ggml_backend_tensor_alloc(talloc->buffer, tensor, addr);
}

// dynamic tensor allocator
Expand Down Expand Up @@ -933,42 +933,51 @@ size_t ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id) {

// utils

static void free_buffers(ggml_backend_buffer_t ** buffers, const size_t * n_buffers) {
for (size_t i = 0; i < *n_buffers; i++) {
ggml_backend_buffer_free((*buffers)[i]);
}
free(*buffers);
}

static bool alloc_tensor_range(struct ggml_context * ctx,
struct ggml_tensor * first, struct ggml_tensor * last,
ggml_backend_buffer_type_t buft, size_t size,
ggml_backend_buffer_t ** buffers, size_t * n_buffers) {

ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, size);
if (buffer == NULL) {
#ifndef NDEBUG
GGML_LOG_DEBUG("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(buft), size);
#endif
for (size_t i = 0; i < *n_buffers; i++) {
ggml_backend_buffer_free((*buffers)[i]);
}
free(*buffers);
GGML_LOG_ERROR("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(buft), size);
free_buffers(buffers, n_buffers);
return false;
}

*buffers = realloc(*buffers, sizeof(ggml_backend_buffer_t) * (*n_buffers + 1));
(*buffers)[(*n_buffers)++] = buffer;

struct ggml_tallocr tallocr = ggml_tallocr_new(buffer);

for (struct ggml_tensor * t = first; t != last; t = ggml_get_next_tensor(ctx, t)) {
enum ggml_status status = GGML_STATUS_SUCCESS;
if (t->data == NULL) {
if (t->view_src == NULL) {
ggml_tallocr_alloc(&tallocr, t);
status = ggml_tallocr_alloc(&tallocr, t);
} else if (t->buffer == NULL) {
ggml_backend_view_init(t);
status = ggml_backend_view_init(t);
}
} else {
if (t->view_src != NULL && t->buffer == NULL) {
// view of a pre-allocated tensor
ggml_backend_view_init(t);
status = ggml_backend_view_init(t);
}
}
if (status != GGML_STATUS_SUCCESS) {
GGML_LOG_ERROR("%s: failed to initialize tensor %s\n", __func__, t->name);
free_buffers(buffers, n_buffers);
return false;
}
}

*buffers = realloc(*buffers, sizeof(ggml_backend_buffer_t) * (*n_buffers + 1));
(*buffers)[(*n_buffers)++] = buffer;

return true;
}

Expand Down
Loading
Loading