Skip to content

Commit a5ccf16

Browse files
committed
ggml_vec_mad1_f32
1 parent 7af3fd9 commit a5ccf16

File tree

2 files changed

+31
-4
lines changed

2 files changed

+31
-4
lines changed

ggml/src/ggml-cpu/ops.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4671,10 +4671,7 @@ static void ggml_compute_forward_scale_f32(
46714671
// src0 is same shape as dst => same indices
46724672
memcpy((char *)dst->data + i1*nb1, (char *)src0->data + i1*nb01, nc * sizeof(float));
46734673
}
4674-
ggml_vec_scale_f32(nc, (float *) ((char *) dst->data + i1*nb1), s);
4675-
if (b != 0.0f) {
4676-
ggml_vec_acc1_f32(nc, (float *) ((char *) dst->data + i1*nb1), b);
4677-
}
4674+
ggml_vec_mad1_f32(nc, (float *) ((char *) dst->data + i1*nb1), s, b);
46784675
}
46794676
}
46804677

ggml/src/ggml-cpu/vec.h

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,36 @@ inline static void ggml_vec_mad_f32_unroll(const int n, const int xs, const int
351351
#endif
352352
}
353353

354+
inline static void ggml_vec_mad1_f32(const int n, float * y, const float s, const float b) {
355+
#if defined(GGML_SIMD)
356+
const int np = (n & ~(GGML_F32_STEP - 1));
357+
358+
GGML_F32_VEC vs = GGML_F32_VEC_SET1(s);
359+
GGML_F32_VEC vb = GGML_F32_VEC_SET1(b);
360+
361+
GGML_F32_VEC ay[GGML_F32_ARR];
362+
363+
for (int i = 0; i < np; i += GGML_F32_STEP) {
364+
for (int j = 0; j < GGML_F32_ARR; j++) {
365+
ay[j] = GGML_F32_VEC_LOAD(y + i + j*GGML_F32_EPR);
366+
ay[j] = GGML_F32_VEC_FMA(ay[j], vs, vb);
367+
368+
GGML_F32_VEC_STORE(y + i + j*GGML_F32_EPR, ay[j]);
369+
}
370+
}
371+
372+
// leftovers
373+
for (int i = np; i < n; ++i) {
374+
y[i] = y[i]*s + b;
375+
}
376+
#else
377+
// scalar
378+
for (int i = 0; i < n; ++i) {
379+
y[i] *= y[i]*s + b;
380+
}
381+
#endif
382+
}
383+
354384
//inline static void ggml_vec_scale_f32(const int n, float * y, const float v) { for (int i = 0; i < n; ++i) y[i] *= v; }
355385
inline static void ggml_vec_scale_f32(const int n, float * y, const float v) {
356386
#if defined(GGML_USE_ACCELERATE)

0 commit comments

Comments
 (0)