@@ -360,6 +360,13 @@ void ggml_vec_silu_f32(const int n, float * y, const float * x) {
360360 for (; i + 3 < n; i += 4 ) {
361361 vst1q_f32 (y + i, ggml_v_silu (vld1q_f32 (x + i)));
362362 }
363+ #elif defined(__riscv_v_intrinsic)
364+ for (int vl; i < n; i += vl) {
365+ vl = __riscv_vsetvl_e32m2 (n - i);
366+ vfloat32m2_t vx = __riscv_vle32_v_f32m2 (&x[i], vl);
367+ vfloat32m2_t vy = ggml_v_silu_m2 (vx, vl);
368+ __riscv_vse32_v_f32m2 (&y[i], vy, vl);
369+ }
363370#endif
364371 for (; i < n; ++i) {
365372 y[i] = ggml_silu_f32 (x[i]);
@@ -460,6 +467,16 @@ ggml_float ggml_vec_cvar_f32(const int n, float * y, const float * x, const floa
460467 val = vec_mul (val, val);
461468 sum += (ggml_float)vec_hsum_f32x4 (val);
462469 }
470+ #elif defined(__riscv_v_intrinsic)
471+ vfloat64m1_t vsum = __riscv_vfmv_v_f_f64m1 (0 , 1 );
472+ for (int vl; i < n; i += vl) {
473+ vl = __riscv_vsetvl_e32m2 (n - i);
474+ vfloat32m2_t val = __riscv_vfsub_vf_f32m2 (__riscv_vle32_v_f32m2 (&x[i], vl), mean, vl);
475+ __riscv_vse32_v_f32m2 (&y[i], val, vl);
476+ val = __riscv_vfmul_vv_f32m2 (val, val, vl);
477+ vsum = __riscv_vfwredusum_vs_f32m2_f64m1 (val, vsum, vl);
478+ }
479+ sum = (ggml_float)__riscv_vfmv_f_s_f64m1_f64 (vsum);
463480#endif
464481 for (; i < n; ++i) {
465482 float val = x[i] - mean;
0 commit comments