Skip to content

Commit c7786e7

Browse files
committed
ggml-cpu : optimize rvv ggml_vec_dot_f32
1 parent 2749662 commit c7786e7

File tree

1 file changed

+15
-8
lines changed

1 file changed

+15
-8
lines changed

ggml/src/ggml-cpu/vec.cpp

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include "vec.h"
2+
#include <riscv_vector.h>
23

34
#include <cassert>
45

@@ -85,15 +86,21 @@ void ggml_vec_dot_f32(int n, float * GGML_RESTRICT s, size_t bs, const float * G
8586
// reduce sum1,sum2 to sum1
8687
GGML_F32_VEC_REDUCE(sumf, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8);
8788
#elif defined(__riscv_v_intrinsic)
88-
vfloat32m1_t vsum = __riscv_vfmv_v_f_f32m1(0.0f, 1);
89-
for (int i = 0, avl; i < n; i += avl) {
90-
avl = __riscv_vsetvl_e32m8(n - i);
91-
vfloat32m8_t ax = __riscv_vle32_v_f32m8(&x[i], avl);
92-
vfloat32m8_t ay = __riscv_vle32_v_f32m8(&y[i], avl);
93-
vfloat32m8_t prod = __riscv_vfmul_vv_f32m8(ax, ay, avl);
94-
vsum = __riscv_vfredusum_vs_f32m8_f32m1(prod, vsum, avl);
89+
int vl = __riscv_vsetvlmax_e32m8();
90+
vfloat32m1_t vs = __riscv_vfmv_v_f_f32m1(0.0f, 1);
91+
vfloat32m8_t vsum;
92+
vfloat32m8_t ax;
93+
vfloat32m8_t ay;
94+
vsum = __riscv_vfmv_v_f_f32m8_tu(vsum, 0.0f, vl);
95+
for (int i = 0; i < n; i += vl) {
96+
vl = __riscv_vsetvl_e32m8(n - i);
97+
ax = __riscv_vle32_v_f32m8_tu(ax, &x[i], vl);
98+
ay = __riscv_vle32_v_f32m8_tu(ay, &y[i], vl);
99+
vsum = __riscv_vfmacc_vv_f32m8_tu(vsum, ax, ay, vl);
95100
}
96-
sumf += __riscv_vfmv_f_s_f32m1_f32(vsum);
101+
vl = __riscv_vsetvlmax_e32m8();
102+
vs = __riscv_vfredusum_vs_f32m8_f32m1(vsum, vs, vl);
103+
sumf += __riscv_vfmv_f_s_f32m1_f32(vs);
97104
#else
98105
const int np = (n & ~(GGML_F32_STEP - 1));
99106

0 commit comments

Comments
 (0)