Skip to content

Commit 50c678f

Browse files
committed
rm __ARM_FEATURE_SVE
1 parent 563aca0 commit 50c678f

File tree

1 file changed

+15
-45
lines changed

1 file changed

+15
-45
lines changed

ggml/src/ggml-cpu/vec.h

Lines changed: 15 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -355,57 +355,27 @@ inline static void ggml_vec_mad1_f32(const int n, float * y, const float s, cons
355355
#if defined(GGML_USE_ACCELERATE)
356356
vDSP_vsmsa(y, 1, &s, &b, y, 1, n);
357357
#elif defined(GGML_SIMD)
358-
#if defined(__ARM_FEATURE_SVE)
359-
const int sve_register_length = ggml_cpu_get_sve_cnt() * 8;
360-
const int ggml_f32_epr = sve_register_length / 32;//8;//svcntw(); // SVE128:4, SVE256:8, SVE512:16
361-
const int ggml_f32_step = 2 * ggml_f32_epr;
362-
363-
GGML_F32_VEC vs = GGML_F32_VEC_SET1(s);
364-
GGML_F32_VEC vb = GGML_F32_VEC_SET1(b);
365-
366-
const int np = (n & ~(ggml_f32_step - 1));
367-
svfloat32_t ay1;
368-
svfloat32_t ay2;
369-
for (int i = 0; i < np; i += ggml_f32_step) {
370-
ay1 = GGML_F32_VEC_LOAD(y + i);
371-
ay1 = GGML_F32_VEC_FMA(ay1, vs, vb);
372-
GGML_F32_VEC_STORE(y + i, ay1);
373-
374-
ay2 = GGML_F32_VEC_LOAD(y + i + 1*ggml_f32_epr);
375-
ay2 = GGML_F32_VEC_FMA(ay2, vs, vb);
376-
GGML_F32_VEC_STORE(y + i + 1*ggml_f32_epr, ay2);
377-
}
378-
// leftovers
379-
// maximum number of leftover elements will be less that ggml_f32_epr. Apply predicated svmad on available elements only
380-
if (np < n) {
381-
svbool_t pg = svwhilelt_b32(np, n);
382-
ay1 = svld1_f32(pg, y + np);
383-
ay1 = svmul_f32_m(pg, ay1, vs);
384-
ay1 = svadd_f32_m(pg, ay1, vb);
385-
svst1_f32(pg, y + np, ay1);
386-
}
387-
#else
388-
const int np = (n & ~(GGML_F32_STEP - 1));
358+
// TODO: #if defined(__ARM_FEATURE_SVE)
359+
const int np = (n & ~(GGML_F32_STEP - 1));
389360

390-
GGML_F32_VEC vs = GGML_F32_VEC_SET1(s);
391-
GGML_F32_VEC vb = GGML_F32_VEC_SET1(b);
361+
GGML_F32_VEC vs = GGML_F32_VEC_SET1(s);
362+
GGML_F32_VEC vb = GGML_F32_VEC_SET1(b);
392363

393-
GGML_F32_VEC ay[GGML_F32_ARR];
364+
GGML_F32_VEC ay[GGML_F32_ARR];
394365

395-
for (int i = 0; i < np; i += GGML_F32_STEP) {
396-
for (int j = 0; j < GGML_F32_ARR; j++) {
397-
ay[j] = GGML_F32_VEC_LOAD(y + i + j*GGML_F32_EPR);
398-
ay[j] = GGML_F32_VEC_FMA(ay[j], vs, vb);
366+
for (int i = 0; i < np; i += GGML_F32_STEP) {
367+
for (int j = 0; j < GGML_F32_ARR; j++) {
368+
ay[j] = GGML_F32_VEC_LOAD(y + i + j*GGML_F32_EPR);
369+
ay[j] = GGML_F32_VEC_FMA(ay[j], vs, vb);
399370

400-
GGML_F32_VEC_STORE(y + i + j*GGML_F32_EPR, ay[j]);
401-
}
371+
GGML_F32_VEC_STORE(y + i + j*GGML_F32_EPR, ay[j]);
402372
}
373+
}
403374

404-
// leftovers
405-
for (int i = np; i < n; ++i) {
406-
y[i] = y[i]*s + b;
407-
}
408-
#endif
375+
// leftovers
376+
for (int i = np; i < n; ++i) {
377+
y[i] = y[i]*s + b;
378+
}
409379
#else
410380
// scalar
411381
for (int i = 0; i < n; ++i) {

0 commit comments

Comments
 (0)