Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
9c60398
CUDA: use mma FA kernel for gqa > 4 on RTX 4000 (llama/15035)
JohannesGaessler Aug 2, 2025
2651cb7
opencl: fix adreno compiler detection logic (llama/15029)
lhez Aug 2, 2025
949131a
vulkan: Use coopmat2 for conv2d (llama/14982)
jeffbolznv Aug 3, 2025
8b87017
vulkan: fix build when using glslang that does not support coopmat2 (…
jeffbolznv Aug 4, 2025
63b6d5e
cmake: Add GGML_BACKEND_DIR option (llama/15074)
ckastner Aug 4, 2025
1c18a3a
sycl: fix mul_mat selection (llama/15092)
Rbiessy Aug 5, 2025
a561a19
llama : add gpt-oss (llama/15091)
ggerganov Aug 5, 2025
e37215a
CANN: add support for ACL Graph (llama/15065)
noemotiovon Aug 6, 2025
1667d77
ggml : fix fallback to CPU for ununsupported ops (llama/15118)
slaren Aug 6, 2025
0be191f
opencl: add `swiglu_oai` and `add_id` (llama/15121)
lhez Aug 6, 2025
06e36b3
fix profiling crash (llama/15072)
rmatif Aug 6, 2025
63806ec
CUDA: GEMM for FP32/FP16/BF16 and ne11 <= 16 (llama/15131)
JohannesGaessler Aug 7, 2025
10cf8ea
ggml: Skip backend library linking code when GGML_BACKEND_DL=ON (llam…
ckastner Aug 7, 2025
2d056bc
HIP: add cmake option to enable compiler output of kernel resource us…
IMbackK Aug 7, 2025
412c6db
vulkan: Add env var to disable host visible vidmem (llama/15109)
jeffbolznv Aug 7, 2025
2d26b6d
vulkan: support fattn sinks (llama/15126)
jeffbolznv Aug 7, 2025
54f6875
opencl: support sink in `soft_max` (attn sinks) (llama/15152)
lhez Aug 8, 2025
d654e38
CUDA: attention sinks for mma FlashAttention (llama/15157)
JohannesGaessler Aug 8, 2025
8122b79
ggml : fix field name when new ggml_backend (llama/14944)
aisk Aug 8, 2025
eab860c
gguf-py : add Numpy MXFP4 de/quantization support (llama/15111)
compilade Aug 8, 2025
44ad8cc
CUDA: add attention sinks for tile and wmma (llama/15178)
am17an Aug 9, 2025
beedc60
cuda: refactored ssm_scan and use CUB (llama/13291)
Your-Cheese Aug 9, 2025
f9070d6
kleidiai: fix unsigned overflow bug (llama/15150)
chaxu01 Aug 11, 2025
b11d639
CANN: Add broadcast for softmax and FA (llama/15208)
hipudding Aug 11, 2025
873ab5d
musa: fix failures in test-backend-ops for mul_mat_id op (llama/15236)
yeahdongcn Aug 12, 2025
3a904fd
CANN: GGML_OP_CPY optimization (llama/15070)
noemotiovon Aug 12, 2025
8e1c682
CUDA cmake: add `-lineinfo` for easier debug (llama/15260)
am17an Aug 12, 2025
6f0c19c
opencl: allow mixed f16/f32 `add` (llama/15140)
rmatif Aug 12, 2025
9969cfc
sycl: Fix and disable more configurations of mul_mat (llama/15151)
Rbiessy Aug 12, 2025
cf811e3
HIP: disable sync warp shuffel operators from clr amd_warp_sync_funct…
IMbackK Aug 12, 2025
897e390
ggml-rpc: chunk send()/recv() to avoid EINVAL for very large tensors …
Tak-RS Aug 13, 2025
fe0cee1
CUDA: Optimize `reduce_rows_f32` kernel, leading up to 25x perf impro…
ORippler Aug 13, 2025
f381597
ggml : repack block_iq4_nlx8 (llama/14904)
ggerganov Aug 13, 2025
2e2f57b
ggml : update `ggml_rope_multi` (llama/12665)
foldl Aug 13, 2025
5606bd1
sync : llama.cpp
ggerganov Aug 13, 2025
78d736a
HIP: bump requirement to rocm 6.1 (llama/15296)
IMbackK Aug 13, 2025
75e5564
finetune: SGD optimizer, more CLI args (llama/13873)
graehl Aug 14, 2025
fc8640d
cuda : fix GGML_CUDA_GRAPHS=OFF (llama/15300)
CISC Aug 14, 2025
d65b660
sync : llama.cpp
ggerganov Aug 14, 2025
28d9223
tests : remove unused includes (#0)
ggerganov Aug 14, 2025
c75e2b1
mnist : adapt to opt changes
ggerganov Aug 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,9 @@ if (WIN32)
set(CMAKE_SHARED_MODULE_PREFIX "")
endif()

option(BUILD_SHARED_LIBS "ggml: build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})
option(GGML_BACKEND_DL "ggml: build backends as dynamic libraries (requires BUILD_SHARED_LIBS)" OFF)
option(BUILD_SHARED_LIBS "ggml: build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})
option(GGML_BACKEND_DL "ggml: build backends as dynamic libraries (requires BUILD_SHARED_LIBS)" OFF)
set(GGML_BACKEND_DIR "" CACHE PATH "ggml: directory to load dynamic backends from (requires GGML_BACKEND_DL")

#
# option list
Expand Down Expand Up @@ -175,6 +176,7 @@ option(GGML_HIP_NO_VMM "ggml: do not try to use HIP VMM"
option(GGML_HIP_ROCWMMA_FATTN "ggml: enable rocWMMA for FlashAttention" OFF)
option(GGML_HIP_FORCE_ROCWMMA_FATTN_GFX12 "ggml: enable rocWMMA FlashAttention on GFX12" OFF)
option(GGML_HIP_MMQ_MFMA "ggml: enable MFMA MMA for CDNA in MMQ" ON)
option(GGML_HIP_EXPORT_METRICS "ggml: enable kernel perf metrics output" OFF)
option(GGML_MUSA_GRAPHS "ggml: use MUSA graph, experimental, unstable" OFF)
option(GGML_MUSA_MUDNN_COPY "ggml: enable muDNN for accelerated copy" OFF)
option(GGML_VULKAN "ggml: use Vulkan" OFF)
Expand Down
80 changes: 41 additions & 39 deletions cmake/ggml-config.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -125,54 +125,56 @@ if(NOT TARGET ggml::ggml)
IMPORTED_LOCATION "${GGML_BASE_LIBRARY}")

set(_ggml_all_targets "")
foreach(_ggml_backend ${GGML_AVAILABLE_BACKENDS})
string(REPLACE "-" "_" _ggml_backend_pfx "${_ggml_backend}")
string(TOUPPER "${_ggml_backend_pfx}" _ggml_backend_pfx)

find_library(${_ggml_backend_pfx}_LIBRARY ${_ggml_backend}
REQUIRED
HINTS ${GGML_LIB_DIR}
NO_CMAKE_FIND_ROOT_PATH)

message(STATUS "Found ${${_ggml_backend_pfx}_LIBRARY}")

add_library(ggml::${_ggml_backend} UNKNOWN IMPORTED)
set_target_properties(ggml::${_ggml_backend}
PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${GGML_INCLUDE_DIR}"
IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
IMPORTED_LOCATION "${${_ggml_backend_pfx}_LIBRARY}"
INTERFACE_COMPILE_FEATURES c_std_90
POSITION_INDEPENDENT_CODE ON)

string(REGEX MATCH "^ggml-cpu" is_cpu_variant "${_ggml_backend}")
if(is_cpu_variant)
list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES "ggml::ggml-base")
set_target_properties(ggml::${_ggml_backend}
PROPERTIES
INTERFACE_LINK_LIBRARIES "${GGML_CPU_INTERFACE_LINK_LIBRARIES}")
if (NOT GGML_BACKEND_DL)
foreach(_ggml_backend ${GGML_AVAILABLE_BACKENDS})
string(REPLACE "-" "_" _ggml_backend_pfx "${_ggml_backend}")
string(TOUPPER "${_ggml_backend_pfx}" _ggml_backend_pfx)

if(GGML_CPU_INTERFACE_LINK_OPTIONS)
set_target_properties(ggml::${_ggml_backend}
PROPERTIES
INTERFACE_LINK_OPTIONS "${GGML_CPU_INTERFACE_LINK_OPTIONS}")
endif()
find_library(${_ggml_backend_pfx}_LIBRARY ${_ggml_backend}
REQUIRED
HINTS ${GGML_LIB_DIR}
NO_CMAKE_FIND_ROOT_PATH)

message(STATUS "Found ${${_ggml_backend_pfx}_LIBRARY}")

else()
list(APPEND ${_ggml_backend_pfx}_INTERFACE_LINK_LIBRARIES "ggml::ggml-base")
add_library(ggml::${_ggml_backend} UNKNOWN IMPORTED)
set_target_properties(ggml::${_ggml_backend}
PROPERTIES
INTERFACE_LINK_LIBRARIES "${${_ggml_backend_pfx}_INTERFACE_LINK_LIBRARIES}")
INTERFACE_INCLUDE_DIRECTORIES "${GGML_INCLUDE_DIR}"
IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
IMPORTED_LOCATION "${${_ggml_backend_pfx}_LIBRARY}"
INTERFACE_COMPILE_FEATURES c_std_90
POSITION_INDEPENDENT_CODE ON)

string(REGEX MATCH "^ggml-cpu" is_cpu_variant "${_ggml_backend}")
if(is_cpu_variant)
list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES "ggml::ggml-base")
set_target_properties(ggml::${_ggml_backend}
PROPERTIES
INTERFACE_LINK_LIBRARIES "${GGML_CPU_INTERFACE_LINK_LIBRARIES}")

if(GGML_CPU_INTERFACE_LINK_OPTIONS)
set_target_properties(ggml::${_ggml_backend}
PROPERTIES
INTERFACE_LINK_OPTIONS "${GGML_CPU_INTERFACE_LINK_OPTIONS}")
endif()

if(${_ggml_backend_pfx}_INTERFACE_LINK_OPTIONS)
else()
list(APPEND ${_ggml_backend_pfx}_INTERFACE_LINK_LIBRARIES "ggml::ggml-base")
set_target_properties(ggml::${_ggml_backend}
PROPERTIES
INTERFACE_LINK_OPTIONS "${${_ggml_backend_pfx}_INTERFACE_LINK_OPTIONS}")
INTERFACE_LINK_LIBRARIES "${${_ggml_backend_pfx}_INTERFACE_LINK_LIBRARIES}")

if(${_ggml_backend_pfx}_INTERFACE_LINK_OPTIONS)
set_target_properties(ggml::${_ggml_backend}
PROPERTIES
INTERFACE_LINK_OPTIONS "${${_ggml_backend_pfx}_INTERFACE_LINK_OPTIONS}")
endif()
endif()
endif()

list(APPEND _ggml_all_targets ggml::${_ggml_backend})
endforeach()
list(APPEND _ggml_all_targets ggml::${_ggml_backend})
endforeach()
endif()

list(APPEND GGML_INTERFACE_LINK_LIBRARIES ggml::ggml-base "${_ggml_all_targets}")
set_target_properties(ggml::ggml
Expand Down
2 changes: 1 addition & 1 deletion examples/mnist/mnist-common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ ggml_opt_result_t mnist_model_eval(mnist_model & model, ggml_opt_dataset_t datas

void mnist_model_train(mnist_model & model, ggml_opt_dataset_t dataset, const int nepoch, const float val_split) {
ggml_opt_fit(model.backend_sched, model.ctx_compute, model.images, model.logits, dataset,
GGML_OPT_LOSS_TYPE_CROSS_ENTROPY, ggml_opt_get_default_optimizer_params, nepoch, model.nbatch_logical, val_split, false);
GGML_OPT_LOSS_TYPE_CROSS_ENTROPY, GGML_OPT_OPTIMIZER_TYPE_ADAMW, ggml_opt_get_default_optimizer_params, nepoch, model.nbatch_logical, val_split, false);
}

void mnist_model_save(mnist_model & model, const std::string & fname) {
Expand Down
31 changes: 25 additions & 6 deletions include/ggml-opt.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,16 +74,26 @@ extern "C" {
GGML_OPT_BUILD_TYPE_OPT = 30,
};

enum ggml_opt_optimizer_type {
GGML_OPT_OPTIMIZER_TYPE_ADAMW,
GGML_OPT_OPTIMIZER_TYPE_SGD,

GGML_OPT_OPTIMIZER_TYPE_COUNT
};

// parameters that control which optimizer is used and how said optimizer tries to find the minimal loss
struct ggml_opt_optimizer_params {
// AdamW optimizer parameters
struct {
float alpha; // learning rate
float beta1;
float beta2;
float beta1; // first AdamW momentum
float beta2; // second AdamW momentum
float eps; // epsilon for numerical stability
float wd; // weight decay for AdamW, use 0.0f to disable
float wd; // weight decay - 0.0f to disable
} adamw;
struct {
float alpha; // learning rate
float wd; // weight decay
} sgd;
};

// callback to calculate optimizer parameters prior to a backward pass
Expand Down Expand Up @@ -112,8 +122,11 @@ extern "C" {

int32_t opt_period; // after how many gradient accumulation steps an optimizer step should be done

ggml_opt_get_optimizer_params get_opt_pars; // callback for calculating optimizer parameters
void * get_opt_pars_ud; // userdata for calculating optimizer parameters
ggml_opt_get_optimizer_params get_opt_pars; // callback for calculating optimizer parameters
void * get_opt_pars_ud; // userdata for calculating optimizer parameters

// only GGML_OPT_OPTIMIZER_TYPE_ADAMW needs m, v momenta per parameter tensor
enum ggml_opt_optimizer_type optimizer;
};

// get parameters for an optimization context with defaults set where possible
Expand Down Expand Up @@ -142,6 +155,10 @@ extern "C" {
// get the gradient accumulator for a node from the forward graph
GGML_API struct ggml_tensor * ggml_opt_grad_acc(ggml_opt_context_t opt_ctx, struct ggml_tensor * node);

GGML_API enum ggml_opt_optimizer_type ggml_opt_context_optimizer_type(ggml_opt_context_t); //TODO consistent naming scheme

GGML_API const char * ggml_opt_optimizer_name(enum ggml_opt_optimizer_type);

// ====== Optimization Result ======

GGML_API ggml_opt_result_t ggml_opt_result_init(void);
Expand Down Expand Up @@ -226,12 +243,14 @@ extern "C" {
struct ggml_tensor * outputs, // output tensor, must have shape [ne_label, ndata_batch] if labels are used
ggml_opt_dataset_t dataset, // dataset with data and optionally also labels
enum ggml_opt_loss_type loss_type, // loss to minimize
enum ggml_opt_optimizer_type optimizer, // sgd or adamw
ggml_opt_get_optimizer_params get_opt_pars, // callback to get optimizer params, userdata is pointer to epoch (of type int64_t)
int64_t nepoch, // how many times the dataset should be iterated over
int64_t nbatch_logical, // datapoints optimizer step, must be a multiple of ndata_batch in inputs/outputs
float val_split, // fraction of the dataset to use for validation, must be in [0.0f, 1.0f)
bool silent); // whether or not info prints to stderr should be suppressed


#ifdef __cplusplus
}
#endif
68 changes: 65 additions & 3 deletions include/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,8 @@
#define GGML_ROPE_TYPE_MROPE 8
#define GGML_ROPE_TYPE_VISION 24

#define GGML_MROPE_SECTIONS 4

#define GGML_UNUSED(x) (void)(x)

#define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
Expand Down Expand Up @@ -304,6 +306,16 @@
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
GGML_TENSOR_LOCALS(size_t, nb, dst, nb)

#define GGML_TENSOR_TERNARY_OP_LOCALS \
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \
GGML_TENSOR_LOCALS(size_t, nb1, src1, nb) \
GGML_TENSOR_LOCALS(int64_t, ne2, src2, ne) \
GGML_TENSOR_LOCALS(size_t, nb2, src2, nb) \
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
GGML_TENSOR_LOCALS(size_t, nb, dst, nb)

#define GGML_TENSOR_BINARY_OP_LOCALS01 \
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
Expand Down Expand Up @@ -395,7 +407,8 @@ extern "C" {
// GGML_TYPE_IQ4_NL_4_4 = 36,
// GGML_TYPE_IQ4_NL_4_8 = 37,
// GGML_TYPE_IQ4_NL_8_8 = 38,
GGML_TYPE_COUNT = 39,
GGML_TYPE_MXFP4 = 39, // MXFP4 (1 block)
GGML_TYPE_COUNT = 40,
};

// precision
Expand Down Expand Up @@ -430,6 +443,7 @@ extern "C" {
GGML_FTYPE_MOSTLY_IQ4_XS = 22, // except 1d tensors
GGML_FTYPE_MOSTLY_IQ1_M = 23, // except 1d tensors
GGML_FTYPE_MOSTLY_BF16 = 24, // except 1d tensors
GGML_FTYPE_MOSTLY_MXFP4 = 25, // except 1d tensors
};

// available tensor operations:
Expand All @@ -438,6 +452,7 @@ extern "C" {

GGML_OP_DUP,
GGML_OP_ADD,
GGML_OP_ADD_ID,
GGML_OP_ADD1,
GGML_OP_ACC,
GGML_OP_SUB,
Expand Down Expand Up @@ -527,6 +542,7 @@ extern "C" {
GGML_OP_CROSS_ENTROPY_LOSS,
GGML_OP_CROSS_ENTROPY_LOSS_BACK,
GGML_OP_OPT_STEP_ADAMW,
GGML_OP_OPT_STEP_SGD,

GGML_OP_GLU,

Expand Down Expand Up @@ -557,6 +573,7 @@ extern "C" {
GGML_GLU_OP_REGLU,
GGML_GLU_OP_GEGLU,
GGML_GLU_OP_SWIGLU,
GGML_GLU_OP_SWIGLU_OAI,
GGML_GLU_OP_GEGLU_ERF,
GGML_GLU_OP_GEGLU_QUICK,

Expand Down Expand Up @@ -831,6 +848,13 @@ extern "C" {
struct ggml_tensor * b,
enum ggml_type type);

// dst[i0, i1, i2] = a[i0, i1, i2] + b[i0, ids[i1, i2]]
GGML_API struct ggml_tensor * ggml_add_id(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
struct ggml_tensor * ids);

GGML_API struct ggml_tensor * ggml_add1(
struct ggml_context * ctx,
struct ggml_tensor * a,
Expand Down Expand Up @@ -1198,6 +1222,13 @@ extern "C" {
struct ggml_tensor * a,
struct ggml_tensor * b);

GGML_API struct ggml_tensor * ggml_swiglu_oai(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
float alpha,
float limit);

// normalize along rows
GGML_API struct ggml_tensor * ggml_norm(
struct ggml_context * ctx,
Expand Down Expand Up @@ -1570,6 +1601,10 @@ extern "C" {
float scale,
float max_bias);

GGML_API void ggml_soft_max_add_sinks(
struct ggml_tensor * a,
struct ggml_tensor * sinks);

GGML_API struct ggml_tensor * ggml_soft_max_ext_back(
struct ggml_context * ctx,
struct ggml_tensor * a,
Expand Down Expand Up @@ -1628,7 +1663,7 @@ extern "C" {
struct ggml_tensor * b,
struct ggml_tensor * c,
int n_dims,
int sections[4],
int sections[GGML_MROPE_SECTIONS],
int mode,
int n_ctx_orig,
float freq_base,
Expand All @@ -1654,6 +1689,22 @@ extern "C" {
float beta_fast,
float beta_slow);

GGML_API struct ggml_tensor * ggml_rope_multi_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b,
struct ggml_tensor * c,
int n_dims,
int sections[GGML_MROPE_SECTIONS],
int mode,
int n_ctx_orig,
float freq_base,
float freq_scale,
float ext_factor,
float attn_factor,
float beta_fast,
float beta_slow);

GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_rope_custom(
struct ggml_context * ctx,
struct ggml_tensor * a,
Expand Down Expand Up @@ -2052,6 +2103,10 @@ extern "C" {
GGML_API enum ggml_prec ggml_flash_attn_ext_get_prec(
const struct ggml_tensor * a);

GGML_API void ggml_flash_attn_ext_add_sinks(
struct ggml_tensor * a,
struct ggml_tensor * sinks);

// TODO: needs to be adapted to ggml_flash_attn_ext
GGML_API struct ggml_tensor * ggml_flash_attn_back(
struct ggml_context * ctx,
Expand Down Expand Up @@ -2257,7 +2312,14 @@ extern "C" {
struct ggml_tensor * grad,
struct ggml_tensor * m,
struct ggml_tensor * v,
struct ggml_tensor * adamw_params); // parameters such a the learning rate
struct ggml_tensor * adamw_params); // parameters such as the learning rate

// stochastic gradient descent step (with weight decay)
GGML_API struct ggml_tensor * ggml_opt_step_sgd(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * grad,
struct ggml_tensor * sgd_params); // alpha, weight decay

//
// automatic differentiation
Expand Down
2 changes: 1 addition & 1 deletion scripts/sync-llama.last
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3303c19b1691088275ee864a823697177c94a15d
4ebd0c125b24a0d7a78b0ffc1d9567530ed8f0c4
13 changes: 12 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,13 @@ add_library(ggml
ggml-backend-reg.cpp)
add_library(ggml::ggml ALIAS ggml)

if (GGML_BACKEND_DIR)
if (NOT GGML_BACKEND_DL)
message(FATAL_ERROR "GGML_BACKEND_DIR requires GGML_BACKEND_DL")
endif()
target_compile_definitions(ggml PUBLIC GGML_BACKEND_DIR="${GGML_BACKEND_DIR}")
endif()

target_link_libraries(ggml PUBLIC ggml-base)

if (CMAKE_SYSTEM_NAME MATCHES "Linux")
Expand All @@ -227,7 +234,11 @@ function(ggml_add_backend_library backend)
set_target_properties(${backend} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
target_compile_definitions(${backend} PRIVATE GGML_BACKEND_DL)
add_dependencies(ggml ${backend})
install(TARGETS ${backend} LIBRARY DESTINATION ${CMAKE_INSTALL_BINDIR})
if (GGML_BACKEND_DIR)
install(TARGETS ${backend} LIBRARY DESTINATION ${GGML_BACKEND_DIR})
else()
install(TARGETS ${backend} LIBRARY DESTINATION ${CMAKE_INSTALL_BINDIR})
endif()
else()
add_library(${backend} ${ARGN})
target_link_libraries(ggml PUBLIC ${backend})
Expand Down
1 change: 1 addition & 0 deletions src/ggml-alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ static bool ggml_op_can_inplace(enum ggml_op op) {
case GGML_OP_DIAG_MASK_ZERO:
case GGML_OP_DIAG_MASK_INF:
case GGML_OP_ADD:
case GGML_OP_ADD_ID:
case GGML_OP_ADD1:
case GGML_OP_SUB:
case GGML_OP_MUL:
Expand Down
Loading
Loading