Skip to content

Commit e41e802

Browse files
committed
Multi-threads for PIM calculation.
1 parent 5c2859b commit e41e802

File tree

1 file changed

+9
-4
lines changed

1 file changed

+9
-4
lines changed

ggml/src/ggml.c

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12624,10 +12624,14 @@ UseGgmlGemm1:;
1262412624
}
1262512625

1262612626
#ifdef PIM_KERNEL
12627-
if ((dst->flags & GGML_TENSOR_FLAG_PIM)) {
12627+
if ((dst->flags & GGML_TENSOR_FLAG_PIM)) {
1262812628
#if PIM_DEBUG_PERF_PRINT
12629-
uint64_t t_start = get_time_us();
12629+
uint64_t t_start = get_time_us();
1263012630
#endif
12631+
if (ith > 0) {
12632+
// for non-master thread, exit directly
12633+
return;
12634+
}
1263112635
dpu_launch_gemv_async(src1, wdata, src0, dst, dst->layerid);
1263212636
dpu_kernel_barrier(*(dst->dpu_set));
1263312637
#if PIM_DEBUG_PERF_PRINT
@@ -12649,7 +12653,8 @@ UseGgmlGemm1:;
1264912653
pim_res->data = malloc(ggml_nbytes(pim_res));
1265012654
GGML_ASSERT(pim_res->data != NULL);
1265112655
dpu_get_gemv_res(src1, src0, pim_res);
12652-
#endif
12656+
12657+
#endif // TENSOR_EXPORT
1265312658
#if PIM_DEBUG_PERF_PRINT
1265412659
uint64_t tt_start = get_time_us();
1265512660
#endif
@@ -12664,7 +12669,7 @@ UseGgmlGemm1:;
1266412669
}
1266512670
#else
1266612671
return;
12667-
#endif
12672+
#endif // TENSOR_EXPORT
1266812673
}
1266912674
#if TENSOR_EXPORT
1267012675
if (to_export && !exported) {

0 commit comments

Comments
 (0)