@@ -1076,10 +1076,6 @@ static bool ggml_is_view_op(enum ggml_op op) {
10761076#define GGML_SCHED_MAX_BACKENDS 16
10771077#endif
10781078
1079- #ifndef GGML_SCHED_MAX_SPLITS
1080- #define GGML_SCHED_MAX_SPLITS 2048
1081- #endif
1082-
10831079#ifndef GGML_SCHED_MAX_SPLIT_INPUTS
10841080#define GGML_SCHED_MAX_SPLIT_INPUTS GGML_MAX_SRC
10851081#endif
@@ -1208,7 +1204,8 @@ static int ggml_backend_sched_backend_from_buffer(ggml_backend_sched_t sched, co
12081204}
12091205
12101206#if 0
1211- static char causes [GGML_DEFAULT_GRAPH_SIZE * 16 + GGML_SCHED_MAX_SPLITS * GGML_SCHED_MAX_SPLIT_INPUTS ][128 ]; // debug only
1207+ #define GGML_SCHED_MAX_SPLITS_DEBUG 4096
1208+ static char causes [GGML_DEFAULT_GRAPH_SIZE * 16 + GGML_SCHED_MAX_SPLITS_DEBUG * GGML_SCHED_MAX_SPLIT_INPUTS ][128 ]; // debug only
12121209#define SET_CAUSE (node , ...) sprintf(causes[hash_id(node)], __VA_ARGS__)
12131210#define GET_CAUSE (node ) causes[hash_id(node)]
12141211#else
@@ -1633,7 +1630,6 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
16331630 sched -> splits = realloc (sched -> splits , sched -> splits_capacity * sizeof (struct ggml_backend_sched_split ));
16341631 GGML_ASSERT (sched -> splits != NULL );
16351632 }
1636- GGML_ASSERT (i_split < GGML_SCHED_MAX_SPLITS );
16371633 split = & sched -> splits [i_split ];
16381634 split -> backend_id = node_backend_id ;
16391635 split -> i_start = i ;
@@ -1964,13 +1960,14 @@ ggml_backend_sched_t ggml_backend_sched_new(
19641960 sched -> hv_tensor_backend_ids = malloc (sched -> hash_set .size * sizeof (sched -> hv_tensor_backend_ids [0 ]));
19651961 sched -> hv_tensor_copies = malloc (sched -> hash_set .size * sched -> n_backends * sched -> n_copies * sizeof (struct ggml_tensor * ));
19661962
1967- const size_t nodes_size = graph_size + GGML_SCHED_MAX_SPLITS * GGML_SCHED_MAX_SPLIT_INPUTS * 2 ;
1963+ const size_t ggml_sched_max_splits = graph_size ; // at most there is one split for each node in the graph
1964+ const size_t nodes_size = graph_size + ggml_sched_max_splits * GGML_SCHED_MAX_SPLIT_INPUTS * 2 ;
19681965 sched -> node_backend_ids = calloc (nodes_size , sizeof (sched -> node_backend_ids [0 ]));
19691966 sched -> leaf_backend_ids = calloc (nodes_size , sizeof (sched -> leaf_backend_ids [0 ]));
19701967 sched -> prev_node_backend_ids = calloc (nodes_size , sizeof (sched -> prev_node_backend_ids [0 ]));
19711968 sched -> prev_leaf_backend_ids = calloc (nodes_size , sizeof (sched -> prev_leaf_backend_ids [0 ]));
19721969
1973- sched -> context_buffer_size = GGML_SCHED_MAX_SPLITS * GGML_SCHED_MAX_SPLIT_INPUTS * 2 * sizeof (struct ggml_tensor ) + ggml_graph_overhead_custom (graph_size , false);
1970+ sched -> context_buffer_size = ggml_sched_max_splits * GGML_SCHED_MAX_SPLIT_INPUTS * 2 * sizeof (struct ggml_tensor ) + ggml_graph_overhead_custom (graph_size , false);
19741971 sched -> context_buffer = malloc (sched -> context_buffer_size );
19751972
19761973 const int initial_splits_capacity = 16 ;
0 commit comments