Skip to content

Commit e68aa10

Browse files
authored
vulkan: sort graph to allow more parallel execution (ggml-org#15850)
* vulkan: sort graph to allow more parallel execution Add a backend proc to allow the backend to modify the graph. The vulkan implementation looks at which nodes depend on each other and greedily reorders them to group together nodes that don't depend on each other. It only reorders the nodes, doesn't change the contents of any of them. With ggml-org#15489, this reduces the number of synchronizations needed. * call optimize_graph per-split
1 parent 0a16bf5 commit e68aa10

File tree

13 files changed

+154
-0
lines changed

13 files changed

+154
-0
lines changed

ggml/src/ggml-backend-impl.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,9 @@ extern "C" {
114114
void (*event_record)(ggml_backend_t backend, ggml_backend_event_t event);
115115
// wait for an event on on a different stream
116116
void (*event_wait) (ggml_backend_t backend, ggml_backend_event_t event);
117+
118+
// (optional) sort/optimize the nodes in the graph
119+
void (*optimize_graph) (ggml_backend_t backend, struct ggml_cgraph * cgraph);
117120
};
118121

119122
struct ggml_backend {

ggml/src/ggml-backend.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,13 @@ void ggml_backend_event_wait(ggml_backend_t backend, ggml_backend_event_t event)
463463
backend->iface.event_wait(backend, event);
464464
}
465465

466+
static void ggml_backend_optimize_graph(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
467+
GGML_ASSERT(backend);
468+
if (backend->iface.optimize_graph != NULL) {
469+
backend->iface.optimize_graph(backend, cgraph);
470+
}
471+
}
472+
466473
// Backend device
467474

468475
const char * ggml_backend_dev_name(ggml_backend_dev_t device) {
@@ -1298,6 +1305,10 @@ void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgra
12981305
struct ggml_backend_sched_split * split = &sched->splits[i];
12991306
split->graph = ggml_graph_view(graph, split->i_start, split->i_end);
13001307

1308+
// Optimize this split of the graph. This needs to happen before we make graph_copy,
1309+
// so they are in sync.
1310+
ggml_backend_optimize_graph(sched->backends[split->backend_id], &split->graph);
1311+
13011312
// add inputs to the graph copy so that they are allocated by ggml-alloc at the start of the split
13021313
for (int j = 0; j < split->n_inputs; j++) {
13031314
assert(graph_copy->size > (graph_copy->n_nodes + 1));

ggml/src/ggml-blas/ggml-blas.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,7 @@ static struct ggml_backend_i blas_backend_i = {
270270
/* .graph_compute = */ ggml_backend_blas_graph_compute,
271271
/* .event_record = */ NULL,
272272
/* .event_wait = */ NULL,
273+
/* .optimize_graph = */ NULL,
273274
};
274275

275276
static ggml_guid_t ggml_backend_blas_guid(void) {

ggml/src/ggml-cann/ggml-cann.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2690,6 +2690,7 @@ static const ggml_backend_i ggml_backend_cann_interface = {
26902690
/* .graph_compute = */ ggml_backend_cann_graph_compute,
26912691
/* .event_record = */ ggml_backend_cann_event_record,
26922692
/* .event_wait = */ ggml_backend_cann_event_wait,
2693+
/* .optimize_graph = */ NULL,
26932694
};
26942695

26952696
/**

ggml/src/ggml-cpu/ggml-cpu.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,7 @@ static const struct ggml_backend_i ggml_backend_cpu_i = {
190190
/* .graph_compute = */ ggml_backend_cpu_graph_compute,
191191
/* .event_record = */ NULL,
192192
/* .event_wait = */ NULL,
193+
/* .optimize_graph = */ NULL,
193194
};
194195

195196
static ggml_guid_t ggml_backend_cpu_guid(void) {

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3135,6 +3135,7 @@ static const ggml_backend_i ggml_backend_cuda_interface = {
31353135
/* .graph_compute = */ ggml_backend_cuda_graph_compute,
31363136
/* .event_record = */ ggml_backend_cuda_event_record,
31373137
/* .event_wait = */ ggml_backend_cuda_event_wait,
3138+
/* .optimize_graph = */ NULL,
31383139
};
31393140

31403141
static ggml_guid_t ggml_backend_cuda_guid() {

ggml/src/ggml-metal/ggml-metal.m

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6275,6 +6275,7 @@ static void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb) {
62756275
/* .graph_compute = */ ggml_backend_metal_graph_compute,
62766276
/* .event_record = */ NULL,
62776277
/* .event_wait = */ NULL,
6278+
/* .optimize_graph = */ NULL,
62786279
};
62796280

62806281
static ggml_guid_t ggml_backend_metal_guid(void) {

ggml/src/ggml-opencl/ggml-opencl.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2838,6 +2838,7 @@ static ggml_backend_i ggml_backend_opencl_i = {
28382838
/* .graph_compute = */ ggml_backend_opencl_graph_compute,
28392839
/* .event_record = */ NULL,
28402840
/* .event_wait = */ NULL,
2841+
/* .optimize_graph = */ NULL,
28412842
};
28422843

28432844
ggml_backend_t ggml_backend_opencl_init(void) {

ggml/src/ggml-rpc/ggml-rpc.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -795,6 +795,7 @@ static ggml_backend_i ggml_backend_rpc_interface = {
795795
/* .graph_compute = */ ggml_backend_rpc_graph_compute,
796796
/* .event_record = */ NULL,
797797
/* .event_wait = */ NULL,
798+
/* .optimize_graph = */ NULL,
798799
};
799800

800801
ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const char * endpoint) {

ggml/src/ggml-sycl/ggml-sycl.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4063,6 +4063,7 @@ static ggml_backend_i ggml_backend_sycl_interface = {
40634063
/* .graph_compute = */ ggml_backend_sycl_graph_compute,
40644064
/* .event_record = */ ggml_backend_sycl_event_record,
40654065
/* .event_wait = */ ggml_backend_sycl_event_wait,
4066+
/* .optimize_graph = */ NULL,
40664067
};
40674068

40684069
static ggml_guid_t ggml_backend_sycl_guid() {

0 commit comments

Comments
 (0)