Skip to content

Commit bdf18ba

Browse files
committed
ggml : dynamic ggml_sched_max_splits based on graph_size (#9047)
* ggml : Dynamic ggml_sched_max_splits based on graph_size * Fixed and readded debug code for causes Author : Nico Bosshard
1 parent 9c5c94f commit bdf18ba

File tree

1 file changed

+5
-8
lines changed

1 file changed

+5
-8
lines changed

ggml/src/ggml-backend.c

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1076,10 +1076,6 @@ static bool ggml_is_view_op(enum ggml_op op) {
10761076
#define GGML_SCHED_MAX_BACKENDS 16
10771077
#endif
10781078

1079-
#ifndef GGML_SCHED_MAX_SPLITS
1080-
#define GGML_SCHED_MAX_SPLITS 2048
1081-
#endif
1082-
10831079
#ifndef GGML_SCHED_MAX_SPLIT_INPUTS
10841080
#define GGML_SCHED_MAX_SPLIT_INPUTS GGML_MAX_SRC
10851081
#endif
@@ -1208,7 +1204,8 @@ static int ggml_backend_sched_backend_from_buffer(ggml_backend_sched_t sched, co
12081204
}
12091205

12101206
#if 0
1211-
static char causes[GGML_DEFAULT_GRAPH_SIZE*16 + GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS][128]; // debug only
1207+
#define GGML_SCHED_MAX_SPLITS_DEBUG 4096
1208+
static char causes[GGML_DEFAULT_GRAPH_SIZE*16 + GGML_SCHED_MAX_SPLITS_DEBUG*GGML_SCHED_MAX_SPLIT_INPUTS][128]; // debug only
12121209
#define SET_CAUSE(node, ...) sprintf(causes[hash_id(node)], __VA_ARGS__)
12131210
#define GET_CAUSE(node) causes[hash_id(node)]
12141211
#else
@@ -1633,7 +1630,6 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
16331630
sched->splits = realloc(sched->splits, sched->splits_capacity * sizeof(struct ggml_backend_sched_split));
16341631
GGML_ASSERT(sched->splits != NULL);
16351632
}
1636-
GGML_ASSERT(i_split < GGML_SCHED_MAX_SPLITS);
16371633
split = &sched->splits[i_split];
16381634
split->backend_id = node_backend_id;
16391635
split->i_start = i;
@@ -1964,13 +1960,14 @@ ggml_backend_sched_t ggml_backend_sched_new(
19641960
sched->hv_tensor_backend_ids = malloc(sched->hash_set.size * sizeof(sched->hv_tensor_backend_ids[0]));
19651961
sched->hv_tensor_copies = malloc(sched->hash_set.size * sched->n_backends * sched->n_copies * sizeof(struct ggml_tensor *));
19661962

1967-
const size_t nodes_size = graph_size + GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS*2;
1963+
const size_t ggml_sched_max_splits = graph_size; // at most there is one split for each node in the graph
1964+
const size_t nodes_size = graph_size + ggml_sched_max_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2;
19681965
sched->node_backend_ids = calloc(nodes_size, sizeof(sched->node_backend_ids[0]));
19691966
sched->leaf_backend_ids = calloc(nodes_size, sizeof(sched->leaf_backend_ids[0]));
19701967
sched->prev_node_backend_ids = calloc(nodes_size, sizeof(sched->prev_node_backend_ids[0]));
19711968
sched->prev_leaf_backend_ids = calloc(nodes_size, sizeof(sched->prev_leaf_backend_ids[0]));
19721969

1973-
sched->context_buffer_size = GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS*2*sizeof(struct ggml_tensor) + ggml_graph_overhead_custom(graph_size, false);
1970+
sched->context_buffer_size = ggml_sched_max_splits*GGML_SCHED_MAX_SPLIT_INPUTS*2*sizeof(struct ggml_tensor) + ggml_graph_overhead_custom(graph_size, false);
19741971
sched->context_buffer = malloc(sched->context_buffer_size);
19751972

19761973
const int initial_splits_capacity = 16;

0 commit comments

Comments
 (0)