File tree Expand file tree Collapse file tree 1 file changed +3
-9
lines changed
Expand file tree Collapse file tree 1 file changed +3
-9
lines changed Original file line number Diff line number Diff line change @@ -936,22 +936,16 @@ static void ggml_backend_tp_buffer_graph_compute_one(struct compute_thread * thr
936936 if (extra->needs_src_rejoin && pending_rejoins.size ()) {
937937 rejoins++;
938938 thread->end = node_index;
939+ // synchronize self and then release peers
940+ ggml_backend_synchronize (be);
939941 release_peers (thread);
940942
941943 // wait for everyone else
942944 for (size_t i = 0 ; i < thread->peers ->size (); i++) {
943945 ggml_backend_tp_semaphore_acquire (&thread->semaphore );
944946 }
945-
946- if (device_index == 0 ) {
947- for (size_t j = 0 ; j < ggml_parallel_devices.size (); j++) {
948- auto backend = ggml_parallel_backends[j];
949- ggml_backend_synchronize (backend);
950- }
951- release_peers (thread);
952- }
953- ggml_backend_tp_semaphore_acquire (&thread->semaphore );
954947
948+ // once all peers are done, we can rejoin the tensors
955949 for (auto & pending : pending_rejoins) {
956950 reduce_joined_tensors (device_index, pending);
957951 auto pending_extra = (ggml_tensor_parallel_extra *)pending->extra ;
You can’t perform that action at this time.
0 commit comments