|
1 | 1 | #include "vec.h" |
| 2 | +#include <riscv_vector.h> |
2 | 3 |
|
3 | 4 | #include <cassert> |
4 | 5 |
|
@@ -85,15 +86,21 @@ void ggml_vec_dot_f32(int n, float * GGML_RESTRICT s, size_t bs, const float * G |
85 | 86 | // reduce sum1,sum2 to sum1 |
86 | 87 | GGML_F32_VEC_REDUCE(sumf, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8); |
87 | 88 | #elif defined(__riscv_v_intrinsic) |
88 | | - vfloat32m1_t vsum = __riscv_vfmv_v_f_f32m1(0.0f, 1); |
89 | | - for (int i = 0, avl; i < n; i += avl) { |
90 | | - avl = __riscv_vsetvl_e32m8(n - i); |
91 | | - vfloat32m8_t ax = __riscv_vle32_v_f32m8(&x[i], avl); |
92 | | - vfloat32m8_t ay = __riscv_vle32_v_f32m8(&y[i], avl); |
93 | | - vfloat32m8_t prod = __riscv_vfmul_vv_f32m8(ax, ay, avl); |
94 | | - vsum = __riscv_vfredusum_vs_f32m8_f32m1(prod, vsum, avl); |
| 89 | + int vl = __riscv_vsetvlmax_e32m8(); |
| 90 | + vfloat32m1_t vs = __riscv_vfmv_v_f_f32m1(0.0f, 1); |
| 91 | + vfloat32m8_t vsum; |
| 92 | + vfloat32m8_t ax; |
| 93 | + vfloat32m8_t ay; |
| 94 | + vsum = __riscv_vfmv_v_f_f32m8_tu(vsum, 0.0f, vl); |
| 95 | + for (int i = 0; i < n; i += vl) { |
| 96 | + vl = __riscv_vsetvl_e32m8(n - i); |
| 97 | + ax = __riscv_vle32_v_f32m8_tu(ax, &x[i], vl); |
| 98 | + ay = __riscv_vle32_v_f32m8_tu(ay, &y[i], vl); |
| 99 | + vsum = __riscv_vfmacc_vv_f32m8_tu(vsum, ax, ay, vl); |
95 | 100 | } |
96 | | - sumf += __riscv_vfmv_f_s_f32m1_f32(vsum); |
| 101 | + vl = __riscv_vsetvlmax_e32m8(); |
| 102 | + vs = __riscv_vfredusum_vs_f32m8_f32m1(vsum, vs, vl); |
| 103 | + sumf += __riscv_vfmv_f_s_f32m1_f32(vs); |
97 | 104 | #else |
98 | 105 | const int np = (n & ~(GGML_F32_STEP - 1)); |
99 | 106 |
|
|
0 commit comments