Skip to content

Commit 1215dde

Browse files
authored
ggml-cpu : add RISC-V vector intrinsic support for silu and cvar operations (ggml-org#17227)
Signed-off-by: Wang Yang <[email protected]>
1 parent 0cfb191 commit 1215dde

File tree

1 file changed

+17
-0
lines changed

1 file changed

+17
-0
lines changed

ggml/src/ggml-cpu/vec.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,13 @@ void ggml_vec_silu_f32(const int n, float * y, const float * x) {
360360
for (; i + 3 < n; i += 4) {
361361
vst1q_f32(y + i, ggml_v_silu(vld1q_f32(x + i)));
362362
}
363+
#elif defined(__riscv_v_intrinsic)
364+
for (int vl; i < n; i += vl) {
365+
vl = __riscv_vsetvl_e32m2(n - i);
366+
vfloat32m2_t vx = __riscv_vle32_v_f32m2(&x[i], vl);
367+
vfloat32m2_t vy = ggml_v_silu_m2(vx, vl);
368+
__riscv_vse32_v_f32m2(&y[i], vy, vl);
369+
}
363370
#endif
364371
for (; i < n; ++i) {
365372
y[i] = ggml_silu_f32(x[i]);
@@ -460,6 +467,16 @@ ggml_float ggml_vec_cvar_f32(const int n, float * y, const float * x, const floa
460467
val = vec_mul(val, val);
461468
sum += (ggml_float)vec_hsum_f32x4(val);
462469
}
470+
#elif defined(__riscv_v_intrinsic)
471+
vfloat64m1_t vsum = __riscv_vfmv_v_f_f64m1(0, 1);
472+
for (int vl; i < n; i += vl) {
473+
vl = __riscv_vsetvl_e32m2(n - i);
474+
vfloat32m2_t val = __riscv_vfsub_vf_f32m2(__riscv_vle32_v_f32m2(&x[i], vl), mean, vl);
475+
__riscv_vse32_v_f32m2(&y[i], val, vl);
476+
val = __riscv_vfmul_vv_f32m2(val, val, vl);
477+
vsum = __riscv_vfwredusum_vs_f32m2_f64m1(val, vsum, vl);
478+
}
479+
sum = (ggml_float)__riscv_vfmv_f_s_f64m1_f64(vsum);
463480
#endif
464481
for (; i < n; ++i) {
465482
float val = x[i] - mean;

0 commit comments

Comments
 (0)