Skip to content

Commit 94f9aa0

Browse files
committed
better sync
1 parent 2e3ce66 commit 94f9aa0

File tree

1 file changed

+3
-9
lines changed

1 file changed

+3
-9
lines changed

ggml/src/ggml-tp/ggml-tp.cpp

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -936,22 +936,16 @@ static void ggml_backend_tp_buffer_graph_compute_one(struct compute_thread * thr
936936
if (extra->needs_src_rejoin && pending_rejoins.size()) {
937937
rejoins++;
938938
thread->end = node_index;
939+
// synchronize self and then release peers
940+
ggml_backend_synchronize(be);
939941
release_peers(thread);
940942

941943
// wait for everyone else
942944
for (size_t i = 0; i < thread->peers->size(); i++) {
943945
ggml_backend_tp_semaphore_acquire(&thread->semaphore);
944946
}
945-
946-
if (device_index == 0) {
947-
for (size_t j = 0; j < ggml_parallel_devices.size(); j++) {
948-
auto backend = ggml_parallel_backends[j];
949-
ggml_backend_synchronize(backend);
950-
}
951-
release_peers(thread);
952-
}
953-
ggml_backend_tp_semaphore_acquire(&thread->semaphore);
954947

948+
// once all peers are done, we can rejoin the tensors
955949
for (auto & pending : pending_rejoins) {
956950
reduce_joined_tensors(device_index, pending);
957951
auto pending_extra = (ggml_tensor_parallel_extra *)pending->extra;

0 commit comments

Comments
 (0)