Skip to content

Commit cd2cf74

Browse files
committed
Merge branch 'parallel' into wip
2 parents e0206e5 + cfbaccd commit cd2cf74

File tree

1 file changed

+4
-0
lines changed

1 file changed

+4
-0
lines changed

ggml/src/ggml-tp/ggml-tp.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -893,6 +893,10 @@ static ggml_status reduce_gathered_tensors(ggml_cgraph * backend_graph, int devi
893893
auto be = ggml_parallel_backends[device_index];
894894
ggml_tensor * wrapped = extra->tensors[device_index];
895895

896+
// when reducing a tensor, the actual op (sub or add) is contained in reduce_op_tensors
897+
// which needs a split view of the reduce state sources.
898+
// and the final reduce (add) is contained in tensors.
899+
// todo: make this part of the graph.
896900
for (size_t i = 0; i < ggml_parallel_devices.size(); i++) {
897901
if (i == 0) {
898902
wrapped->src[0] = extra->rejoined_tensor_views[device_index][i++];

0 commit comments

Comments
 (0)