add x param to ggml_vec_mad1_f32

ngxson · ngxson · commit ebbad7796df3 · 2025-07-09T14:11:53.000+02:00
diff --git a/ggml/src/ggml-cpu/ops.cpp b/ggml/src/ggml-cpu/ops.cpp
@@ -4670,17 +4670,17 @@ static void ggml_compute_forward_scale_f32(
         for (int i1 = ir0; i1 < ir1; i1++) {
             if (dst->data != src0->data) {
                 // src0 is same shape as dst => same indices
+                // TODO: add x parameter to ggml_vec_scale_f32 and remove this memcpy
                 memcpy((char *)dst->data + i1*nb1, (char *)src0->data + i1*nb01, nc * sizeof(float));
             }
             ggml_vec_scale_f32(nc, (float *) ((char *) dst->data + i1*nb1), s);
         }
     } else {
         for (int i1 = ir0; i1 < ir1; i1++) {
-            if (dst->data != src0->data) {
-                // src0 is same shape as dst => same indices
-                memcpy((char *)dst->data + i1*nb1, (char *)src0->data + i1*nb01, nc * sizeof(float));
-            }
-            ggml_vec_mad1_f32(nc, (float *) ((char *) dst->data + i1*nb1), s, b);
+            ggml_vec_mad1_f32(nc,
+                (float *) ((char *) dst->data  + i1*nb1),
+                (float *) ((char *) src0->data + i1*nb1),
+                s, b);
         }
     }
 }
diff --git a/ggml/src/ggml-cpu/vec.h b/ggml/src/ggml-cpu/vec.h
@@ -351,14 +351,14 @@ inline static void ggml_vec_mad_f32_unroll(const int n, const int xs, const int
 #endif
 }
 
-inline static void ggml_vec_mad1_f32(const int n, float * y, const float s, const float b) {
+inline static void ggml_vec_mad1_f32(const int n, float * y, const float * x, const float s, const float b) {
 #if defined(GGML_USE_ACCELERATE)
-    vDSP_vsmsa(y, 1, &s, &b, y, 1, n);
+    vDSP_vsmsa(x, 1, &s, &b, y, 1, n);
 #elif defined(GGML_SIMD)
     #if defined(__ARM_FEATURE_SVE)
         // scalar ; TODO: Write SVE code
         for (int i = 0; i < n; ++i) {
-            y[i] = y[i]*s + b;
+            y[i] = x[i]*s + b;
         }
     #else
         const int np = (n & ~(GGML_F32_STEP - 1));
@@ -370,7 +370,7 @@ inline static void ggml_vec_mad1_f32(const int n, float * y, const float s, cons
 
         for (int i = 0; i < np; i += GGML_F32_STEP) {
             for (int j = 0; j < GGML_F32_ARR; j++) {
-                ay[j] = GGML_F32_VEC_LOAD(y + i + j*GGML_F32_EPR);
+                ay[j] = GGML_F32_VEC_LOAD(x + i + j*GGML_F32_EPR);
                 ay[j] = GGML_F32_VEC_FMA(ay[j], vs, vb);
 
                 GGML_F32_VEC_STORE(y + i + j*GGML_F32_EPR, ay[j]);
@@ -379,13 +379,13 @@ inline static void ggml_vec_mad1_f32(const int n, float * y, const float s, cons
 
         // leftovers
         for (int i = np; i < n; ++i) {
-            y[i] = y[i]*s + b;
+            y[i] = x[i]*s + b;
         }
     #endif
 #else
     // scalar
     for (int i = 0; i < n; ++i) {
-        y[i] = y[i]*s + b;
+        y[i] = x[i]*s + b;
     }
 #endif
 }

Original file line number	Diff line number	Diff line change
`@@ -4670,17 +4670,17 @@ static void ggml_compute_forward_scale_f32(`
`4670`	`4670`	`for (int i1 = ir0; i1 < ir1; i1++) {`
`4671`	`4671`	`if (dst->data != src0->data) {`
`4672`	`4672`	`// src0 is same shape as dst => same indices`
	`4673`	`+ // TODO: add x parameter to ggml_vec_scale_f32 and remove this memcpy`
`4673`	`4674`	`memcpy((char )dst->data + i1nb1, (char )src0->data + i1nb01, nc * sizeof(float));`
`4674`	`4675`	`}`
`4675`	`4676`	`ggml_vec_scale_f32(nc, (float ) ((char ) dst->data + i1*nb1), s);`
`4676`	`4677`	`}`
`4677`	`4678`	`} else {`
`4678`	`4679`	`for (int i1 = ir0; i1 < ir1; i1++) {`
`4679`		`- if (dst->data != src0->data) {`
`4680`		`- // src0 is same shape as dst => same indices`
`4681`		`- memcpy((char )dst->data + i1nb1, (char )src0->data + i1nb01, nc * sizeof(float));`
`4682`		`- }`
`4683`		`- ggml_vec_mad1_f32(nc, (float ) ((char ) dst->data + i1*nb1), s, b);`
	`4680`	`+ ggml_vec_mad1_f32(nc,`
	`4681`	`+ (float ) ((char ) dst->data + i1*nb1),`
	`4682`	`+ (float ) ((char ) src0->data + i1*nb1),`
	`4683`	`+ s, b);`
`4684`	`4684`	`}`
`4685`	`4685`	`}`
`4686`	`4686`	`}`