@@ -7650,28 +7650,26 @@ static void ggml_compute_forward_ssm_scan_f32(
76507650
76517651 // d_inner
76527652 for (int i1 = 0 ; i1 < ir; ++i1) {
7653-
76547653 float dt_soft_plus = dt[i1] <= 20 .0f ? log1pf (expf (dt[i1])) : dt[i1];
76557654 float x_dt = x[i1] * dt_soft_plus;
76567655 svfloat32_t vx_dt = GGML_F32_VEC_SET1 (x_dt);
76577656 svfloat32_t vdt_soft_plus = GGML_F32_VEC_SET1 (dt_soft_plus);
76587657 svfloat32_t r1_vector = GGML_F32_VEC_ZERO;
76597658
7660- for (int64_t k=0 ; k < nc; k += svcntw ()) {
7661-
7662- svfloat32_t vA = GGML_F32_VEC_LOAD (&A[i1*nc+k]);
7659+ for (int64_t k = 0 ; k < nc; k += svcntw ()) {
7660+ svfloat32_t vA = GGML_F32_VEC_LOAD (&A[i1*nc + k]);
76637661 svfloat32_t vB = GGML_F32_VEC_LOAD (&B[k]);
76647662 svfloat32_t vC = GGML_F32_VEC_LOAD (&C[k]);
7665- svfloat32_t vs0 = GGML_F32_VEC_LOAD (&s0[i1*nc+ k]);
7663+ svfloat32_t vs0 = GGML_F32_VEC_LOAD (&s0[i1*nc + k]);
76667664
7667- svfloat32_t t1 = GGML_F32_VEC_MUL (vdt_soft_plus,vA);
7665+ svfloat32_t t1 = GGML_F32_VEC_MUL (vdt_soft_plus, vA);
76687666 t1 = exp_ps_sve (svptrue_b32 (), t1);
7669- svfloat32_t t2 = GGML_F32_VEC_MUL (vx_dt,vB);
7667+ svfloat32_t t2 = GGML_F32_VEC_MUL (vx_dt, vB);
76707668
76717669 vs0 = GGML_F32_VEC_FMA (vs0, t1, t2);
76727670 r1_vector = GGML_F32_VEC_ADD (GGML_F32_VEC_MUL (vs0, vC), r1_vector);
76737671
7674- GGML_F32_VEC_STORE (&s[i1*nc+ k], vs0);
7672+ GGML_F32_VEC_STORE (&s[i1*nc + k], vs0);
76757673 }
76767674 y[i1] = GGML_F32xt_REDUCE_ONE (r1_vector);
76777675 }
0 commit comments