You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
auto queue_synchronize = [&](ggml_backend_t backend) {
1366
+
auto backend_id = ggml_backend_sched_backend_id(sched, backend);
1367
+
needs_synchronize[backend_id] = true;
1368
+
};
1364
1369
1365
1370
// copy the input tensors to the split backend
1366
1371
for (int j = 0; j < split->n_inputs; j++) {
@@ -1383,9 +1388,10 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
1383
1388
} else {
1384
1389
ggml_backend_synchronize(split_backend);
1385
1390
}
1386
-
// try async copy, but if not possible, we can still use a sync copy without synchronizing the dst backend, since we handle the synchronization here with multiple copies and events
1387
-
// TODO: add public function to facilitate this, since applications do not have direct access to the backend interface
1388
-
if (!split_backend->iface.cpy_tensor_async || !split_backend->iface.cpy_tensor_async(input_backend, split_backend, input, input_cpy)) {
1391
+
if (split_backend->iface.cpy_tensor_async && split_backend->iface.cpy_tensor_async(input_backend, split_backend, input, input_cpy)) {
1392
+
// async tensor copy occurs on the source stream, queue up a synchronize after all the copies are done to ensure all inputs are ready
1393
+
queue_synchronize(input_backend);
1394
+
} else {
1389
1395
ggml_backend_synchronize(input_backend);
1390
1396
if (sched->events[split_backend_id][sched->cur_copy] != NULL) {
0 commit comments