Skip to content

Commit 0d16d2d

Browse files
committed
Revert "ggml: SVE support for exponential functions (ggml-org#15145)"
This reverts commit 078ce23.
1 parent fae4467 commit 0d16d2d

File tree

2 files changed

+1
-54
lines changed

2 files changed

+1
-54
lines changed

ggml/src/ggml-cpu/vec.cpp

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -321,12 +321,6 @@ void ggml_vec_silu_f32(const int n, float * y, const float * x) {
321321
for (; i + 3 < n; i += 4) {
322322
_mm_storeu_ps(y + i, ggml_v_silu(_mm_loadu_ps(x + i)));
323323
}
324-
#elif defined(__ARM_FEATURE_SVE) && defined(__aarch64__)
325-
const int vlen = svcntw();
326-
for (; i < n; i += vlen) {
327-
const svbool_t pg = svwhilelt_b32_s32(i, n);
328-
svst1_f32(pg, y + i, ggml_v_silu(pg, svld1_f32(pg, x + i)));
329-
}
330324
#elif defined(__ARM_NEON) && defined(__aarch64__)
331325
for (; i + 3 < n; i += 4) {
332326
vst1q_f32(y + i, ggml_v_silu(vld1q_f32(x + i)));
@@ -351,12 +345,6 @@ void ggml_vec_swiglu_f32(const int n, float * y, const float * x, const float *
351345
for (; i + 3 < n; i += 4) {
352346
_mm_storeu_ps(y + i, _mm_mul_ps(ggml_v_silu(_mm_loadu_ps(x + i)), _mm_loadu_ps(g + i)));
353347
}
354-
#elif defined(__ARM_FEATURE_SVE) && defined(__aarch64__)
355-
const int vlen = svcntw();
356-
for (; i < n; i += vlen) {
357-
const svbool_t pg = svwhilelt_b32_s32(i, n);
358-
svst1_f32(pg, y + i, svmul_f32_x(pg, ggml_v_silu(pg, svld1_f32(pg, x + i)), svld1_f32(pg, g + i)));
359-
}
360348
#elif defined(__ARM_NEON) && defined(__aarch64__)
361349
for (; i + 3 < n; i += 4) {
362350
vst1q_f32(y + i, vmulq_f32(ggml_v_silu(vld1q_f32(x + i)), vld1q_f32(g + i)));
@@ -404,15 +392,6 @@ ggml_float ggml_vec_soft_max_f32(const int n, float * y, const float * x, float
404392
#endif
405393
sum += (ggml_float)_mm_cvtss_f32(val);
406394
}
407-
#elif defined(__ARM_FEATURE_SVE) && defined(__aarch64__)
408-
const int vlen = svcntw();
409-
for (; i < n; i += vlen) {
410-
const svbool_t pg = svwhilelt_b32_s32(i, n);
411-
svfloat32_t val = ggml_v_expf(pg, svsub_f32_x(pg, svld1_f32(pg, x + i),
412-
svdup_n_f32_x(pg, max)));
413-
svst1_f32(pg, y + i, val);
414-
sum += (ggml_float)svaddv_f32(pg, val);
415-
}
416395
#elif defined(__ARM_NEON) && defined(__aarch64__)
417396
for (; i + 3 < n; i += 4) {
418397
float32x4_t val = ggml_v_expf(vsubq_f32(vld1q_f32(x + i),

ggml/src/ggml-cpu/vec.h

Lines changed: 1 addition & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1002,39 +1002,7 @@ inline static ggml_fp16_t ggml_silu_f16(ggml_fp16_t x) {
10021002
}
10031003
#endif
10041004

1005-
#if defined(__ARM_FEATURE_SVE) && defined(__aarch64__)
1006-
1007-
inline static svfloat32_t ggml_v_expf(svbool_t pg, svfloat32_t x) {
1008-
const svfloat32_t r = svdup_n_f32_x(pg, 0x1.8p23f);
1009-
const svfloat32_t z = svmla_n_f32_x(pg, r, x, 0x1.715476p+0f);
1010-
const svfloat32_t n = svsub_f32_x(pg, z, r);
1011-
const svfloat32_t b = svmls_n_f32_x(pg, svmls_n_f32_x(pg, x, n, 0x1.62e4p-1f), n, 0x1.7f7d1cp-20f);
1012-
const svuint32_t e = svlsl_n_u32_x(pg, svreinterpret_u32_f32(z), 23);
1013-
const svfloat32_t k = svreinterpret_f32_u32(svadd_u32_x(pg, e, svreinterpret_u32_f32(svdup_n_f32_x(pg, 1))));
1014-
const svbool_t c = svacgt_n_f32(pg, n, 126);
1015-
const svfloat32_t u = svmul_f32_x(pg, b, b);
1016-
const svfloat32_t j = svmla_f32_x(pg,
1017-
svmul_n_f32_x(pg, b, 0x1.ffffecp-1f),
1018-
svmla_f32_x(pg, svmla_f32_x(pg, svdup_n_f32_x(pg, 0x1.fffdb6p-2f), svdup_n_f32_x(pg, 0x1.555e66p-3f), b),
1019-
svmla_f32_x(pg, svdup_n_f32_x(pg, 0x1.573e2ep-5f), svdup_n_f32_x(pg, 0x1.0e4020p-7f), b), u), u);
1020-
const svuint32_t d = svdup_n_u32_z(svcmple_n_f32(pg, n, 0.0), 0x82000000);
1021-
const svfloat32_t s1 = svreinterpret_f32_u32(svadd_n_u32_x(pg, d, 0x7f000000));
1022-
const svfloat32_t s2 = svreinterpret_f32_u32(svsub_u32_x(pg, e, d));
1023-
return svsel_f32(svacgt_f32(pg, n, svdup_n_f32_x(pg, 192)), svmul_f32_x(pg, s1, s1),
1024-
svsel_f32(c, svmul_f32_x(pg, svmla_f32_x(pg, s2, s2, j), s1), svmla_f32_x(pg, k, k, j)));
1025-
}
1026-
1027-
// computes silu x/(1+exp(-x)) in single precision vector
1028-
inline static svfloat32_t ggml_v_silu(svbool_t pg, svfloat32_t x) {
1029-
const svfloat32_t one = svdup_n_f32_x(pg, 1.0f);
1030-
const svfloat32_t zero = svdup_n_f32_x(pg, 0.0f);
1031-
const svfloat32_t neg_x = svsub_f32_x(pg, zero, x);
1032-
const svfloat32_t exp_neg_x = ggml_v_expf(pg, neg_x);
1033-
const svfloat32_t one_plus_exp_neg_x = svadd_f32_x(pg, one, exp_neg_x);
1034-
return svdiv_f32_x(pg, x, one_plus_exp_neg_x);
1035-
}
1036-
1037-
#elif defined(__ARM_NEON) && defined(__aarch64__)
1005+
#if defined(__ARM_NEON) && defined(__aarch64__)
10381006

10391007
// adapted from arm limited optimized routine
10401008
// the maximum error is 1.45358 plus 0.5 ulps

0 commit comments

Comments
 (0)