Skip to content

Commit dbe335a

Browse files
author
Anoop Kapoor
committed
@FIR-1063 - llama.cpp/ggml/tsavorite support for sakanaAI Model
1 parent e6d62c9 commit dbe335a

File tree

3 files changed

+46
-10
lines changed

3 files changed

+46
-10
lines changed

ggml/include/ggml-tsavorite.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,8 @@ extern "C" {
5757

5858
enum ggml_tsavorite_input_tensors_count {
5959
TSAVORITE_UNARY_INPUT_TENSORS = 1,
60-
TSAVORITE_TWO_INPUT_TENSORS = 2
60+
TSAVORITE_TWO_INPUT_TENSORS = 2,
61+
TSAVORITE_IGNORE_TENSORS
6162
};
6263

6364
enum ggml_tsavorite_log_type {
@@ -141,6 +142,10 @@ enum ggml_tsavorite_kernel_type {
141142
GGML_TSAVORITE_KERNEL_TYPE_GEGLU_QUICK,
142143

143144
GGML_TSAVORITE_KERNEL_TYPE_SOFT_MAX,
145+
GGML_TSAVORITE_KERNEL_TYPE_RESHAPE,
146+
GGML_TSAVORITE_KERNEL_TYPE_VIEW,
147+
GGML_TSAVORITE_KERNEL_TYPE_PERMUTE,
148+
GGML_TSAVORITE_KERNEL_TYPE_TRANSPOSE,
144149

145150
GGML_TSAVORITE_KERNEL_TYPE_COUNT
146151
};

ggml/src/ggml-backend.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -979,10 +979,15 @@ void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgra
979979
int cur_backend_id = -1;
980980
for (int i = 0; i < graph->n_nodes; i++) {
981981
struct ggml_tensor * node = graph->nodes[i];
982+
int * node_backend_id = &tensor_backend_id(node);
982983
if (ggml_is_view_op(node->op)) {
984+
if(node->src[0]) {
985+
*node_backend_id = sched->n_backends -1;
986+
node_backend_id = &tensor_backend_id(node->src[0]);
987+
*node_backend_id = sched->n_backends -1;
988+
}
983989
continue;
984990
}
985-
int * node_backend_id = &tensor_backend_id(node);
986991
if (*node_backend_id != -1) {
987992
if (*node_backend_id == sched->n_backends - 1) {
988993
// skip cpu (lowest prio backend)

ggml/src/ggml-tsavorite/ggml-tsavorite.cpp

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -819,10 +819,16 @@ static bool ggml_tsavorite_supports_op(const struct ggml_backend_tsavorite_devic
819819
case GGML_OP_SQRT:
820820
case GGML_OP_SQR:
821821
case GGML_OP_SIN:
822+
case GGML_OP_RESHAPE:
823+
case GGML_OP_VIEW:
824+
case GGML_OP_PERMUTE:
825+
case GGML_OP_TRANSPOSE:
826+
822827
case GGML_OP_RMS_NORM:
823-
#ifdef GGML_TARGET_POSIX
824-
case GGML_OP_SOFT_MAX:
825-
#endif /* GGML_TARGET_POSIX */
828+
829+
#ifdef GGML_TARGET_POSIX_DEBUG
830+
case GGML_OP_SOFT_MAX:
831+
#endif /* GGML_TARGET_POSIX_DEBUG */
826832
break;
827833
case GGML_OP_GLU:
828834
{
@@ -1063,6 +1069,20 @@ static enum ggml_status ggml_tsavorite_graph_compute(ggml_backend_t backend,
10631069
}
10641070
num_of_input_tensors = TSAVORITE_TWO_INPUT_TENSORS;
10651071
break;
1072+
case GGML_OP_RESHAPE:
1073+
kernel_type = GGML_TSAVORITE_KERNEL_TYPE_RESHAPE;
1074+
num_of_input_tensors = TSAVORITE_IGNORE_TENSORS;
1075+
break;
1076+
case GGML_OP_VIEW:
1077+
kernel_type = GGML_TSAVORITE_KERNEL_TYPE_VIEW;
1078+
num_of_input_tensors = TSAVORITE_IGNORE_TENSORS;
1079+
break;
1080+
case GGML_OP_PERMUTE:
1081+
num_of_input_tensors = TSAVORITE_IGNORE_TENSORS;
1082+
break;
1083+
case GGML_OP_TRANSPOSE:
1084+
num_of_input_tensors = TSAVORITE_IGNORE_TENSORS;
1085+
break;
10661086
case GGML_OP_UNARY:
10671087
switch (ggml_get_unary_op(node)) {
10681088
case GGML_UNARY_OP_NEG:
@@ -1093,10 +1113,10 @@ static enum ggml_status ggml_tsavorite_graph_compute(ggml_backend_t backend,
10931113
return GGML_STATUS_ABORTED;
10941114
}
10951115

1096-
if (!ctx->kernels[kernel_type].pipeline ||
1116+
if ((num_of_input_tensors != TSAVORITE_IGNORE_TENSORS) && (!ctx->kernels[kernel_type].pipeline ||
10971117
(!ctx->kernels[kernel_type].pipeline->_mlir_fptr_3_input[kernel_sub_type] &&
10981118
!ctx->kernels[kernel_type].pipeline->_mlir_fptr_2_input[kernel_sub_type] &&
1099-
!ctx->kernels[kernel_type].pipeline->_mlir_fptr_1_input[kernel_sub_type])) {
1119+
!ctx->kernels[kernel_type].pipeline->_mlir_fptr_1_input[kernel_sub_type]))) {
11001120
GGML_TSAVORITE_LOG_ERROR("Kernel Type %d, not supported \n", kernel_type);
11011121
return GGML_STATUS_ABORTED;
11021122
}
@@ -2128,10 +2148,16 @@ static bool ggml_backend_tsavorite_device_offload_op(ggml_backend_dev_t dev,
21282148
case GGML_OP_SQRT:
21292149
case GGML_OP_SQR:
21302150
case GGML_OP_SIN:
2151+
case GGML_OP_RESHAPE:
2152+
case GGML_OP_VIEW:
2153+
case GGML_OP_PERMUTE:
2154+
case GGML_OP_TRANSPOSE:
21312155
case GGML_OP_RMS_NORM:
2132-
#ifdef GGML_TARGET_POSIX
2133-
case GGML_OP_SOFT_MAX:
2134-
#endif /* GGML_TARGET_POSIX */
2156+
2157+
#ifdef GGML_TARGET_POSIX_DEBUG
2158+
case GGML_OP_SOFT_MAX:
2159+
#endif /* GGML_TARGET_POSIX_DEBUG */
2160+
21352161
break;
21362162
case GGML_OP_GLU:
21372163
{

0 commit comments

Comments
 (0)