@@ -1018,10 +1018,6 @@ static bool ggml_is_view_op(enum ggml_op op) {
10181018#define GGML_SCHED_MAX_BACKENDS 16
10191019#endif
10201020
1021- #ifndef GGML_SCHED_MAX_SPLITS
1022- #define GGML_SCHED_MAX_SPLITS 2048
1023- #endif
1024-
10251021#ifndef GGML_SCHED_MAX_SPLIT_INPUTS
10261022#define GGML_SCHED_MAX_SPLIT_INPUTS GGML_MAX_SRC
10271023#endif
@@ -1125,7 +1121,8 @@ static int ggml_backend_sched_backend_from_buffer(ggml_backend_sched_t sched, co
11251121}
11261122
11271123#if 0
1128- static char causes [GGML_DEFAULT_GRAPH_SIZE * 16 + GGML_SCHED_MAX_SPLITS * GGML_SCHED_MAX_SPLIT_INPUTS ][128 ]; // debug only
1124+ #define GGML_SCHED_MAX_SPLITS_DEBUG 4096
1125+ static char causes [GGML_DEFAULT_GRAPH_SIZE * 16 + GGML_SCHED_MAX_SPLITS_DEBUG * GGML_SCHED_MAX_SPLIT_INPUTS ][128 ]; // debug only
11291126#define SET_CAUSE (node , ...) sprintf(causes[hash_id(node)], __VA_ARGS__)
11301127#define GET_CAUSE (node ) causes[hash_id(node)]
11311128#else
@@ -1549,7 +1546,6 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
15491546 sched -> splits = realloc (sched -> splits , sched -> splits_capacity * sizeof (struct ggml_backend_sched_split ));
15501547 GGML_ASSERT (sched -> splits != NULL );
15511548 }
1552- GGML_ASSERT (i_split < GGML_SCHED_MAX_SPLITS );
15531549 split = & sched -> splits [i_split ];
15541550 split -> backend_id = node_backend_id ;
15551551 split -> i_start = i ;
@@ -1865,13 +1861,14 @@ ggml_backend_sched_t ggml_backend_sched_new(
18651861 sched -> hv_tensor_backend_ids = malloc (sched -> hash_set .size * sizeof (sched -> hv_tensor_backend_ids [0 ]));
18661862 sched -> hv_tensor_copies = malloc (sched -> hash_set .size * sched -> n_backends * sched -> n_copies * sizeof (struct ggml_tensor * ));
18671863
1868- const size_t nodes_size = graph_size + GGML_SCHED_MAX_SPLITS * GGML_SCHED_MAX_SPLIT_INPUTS * 2 ;
1864+ const size_t ggml_sched_max_splits = graph_size ; // at most there is one split for each node in the graph
1865+ const size_t nodes_size = graph_size + ggml_sched_max_splits * GGML_SCHED_MAX_SPLIT_INPUTS * 2 ;
18691866 sched -> node_backend_ids = calloc (nodes_size , sizeof (sched -> node_backend_ids [0 ]));
18701867 sched -> leaf_backend_ids = calloc (nodes_size , sizeof (sched -> leaf_backend_ids [0 ]));
18711868 sched -> prev_node_backend_ids = calloc (nodes_size , sizeof (sched -> prev_node_backend_ids [0 ]));
18721869 sched -> prev_leaf_backend_ids = calloc (nodes_size , sizeof (sched -> prev_leaf_backend_ids [0 ]));
18731870
1874- sched -> context_buffer_size = GGML_SCHED_MAX_SPLITS * GGML_SCHED_MAX_SPLIT_INPUTS * 2 * sizeof (struct ggml_tensor ) + ggml_graph_overhead_custom (graph_size , false);
1871+ sched -> context_buffer_size = ggml_sched_max_splits * GGML_SCHED_MAX_SPLIT_INPUTS * 2 * sizeof (struct ggml_tensor ) + ggml_graph_overhead_custom (graph_size , false);
18751872 sched -> context_buffer = malloc (sched -> context_buffer_size );
18761873
18771874 const int initial_splits_capacity = 16 ;
0 commit comments