@@ -248,7 +248,7 @@ extern "C" {
248
248
// preferrably to run on the same backend as the buffer
249
249
ggml_backend_buffer_set_usage(buf_weights, GGML_BACKEND_BUFFER_USAGE_WEIGHTS);
250
250
251
- sched = ggml_backend_sched_new({backend_gpu, backend_gpu2, backend_cpu}, NULL, num_backends, GGML_DEFAULT_GRAPH_SIZE, false);
251
+ sched = ggml_backend_sched_new({backend_gpu, backend_gpu2, backend_cpu}, NULL, num_backends, GGML_DEFAULT_GRAPH_SIZE, false, true );
252
252
253
253
// initialize buffers from a max size graph (optional)
254
254
reserve_graph = build_graph(sched, max_batch_size);
@@ -289,7 +289,7 @@ extern "C" {
289
289
typedef bool (* ggml_backend_sched_eval_callback )(struct ggml_tensor * t , bool ask , void * user_data );
290
290
291
291
// Initialize a backend scheduler, backends with low index are given priority over backends with high index
292
- GGML_API ggml_backend_sched_t ggml_backend_sched_new (ggml_backend_t * backends , ggml_backend_buffer_type_t * bufts , int n_backends , size_t graph_size , bool parallel );
292
+ GGML_API ggml_backend_sched_t ggml_backend_sched_new (ggml_backend_t * backends , ggml_backend_buffer_type_t * bufts , int n_backends , size_t graph_size , bool parallel , bool op_offload );
293
293
GGML_API void ggml_backend_sched_free (ggml_backend_sched_t sched );
294
294
295
295
// Initialize backend buffers from a measure graph
0 commit comments