Skip to content

Commit 805512a

Browse files
committed
ggml : remove unused fast broadcast path in GGML_MUL
This was initially added because states were masked with ggml_mul, but this is no longer done and so this "optimisation" is no longer necessary, or at least not worth the additional code complexity.
1 parent 038d958 commit 805512a

File tree

1 file changed

+1
-31
lines changed

1 file changed

+1
-31
lines changed

ggml/src/ggml.c

Lines changed: 1 addition & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -10173,37 +10173,7 @@ static void ggml_compute_forward_mul_f32(
1017310173
GGML_ASSERT( nb0 == sizeof(float));
1017410174
GGML_ASSERT(nb00 == sizeof(float));
1017510175

10176-
if (ne00 > 1 && ne10 == 1) {
10177-
// fast broadcast path
10178-
for (int64_t ir = ith; ir < nr; ir += nth) {
10179-
// src0 and dst are same shape => same indices
10180-
const int64_t i03 = ir/(ne02*ne01);
10181-
const int64_t i02 = (ir - i03*ne02*ne01)/ne01;
10182-
const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01);
10183-
10184-
const int64_t i13 = i03 % ne13;
10185-
const int64_t i12 = i02 % ne12;
10186-
const int64_t i11 = i01 % ne11;
10187-
10188-
float * src1_ptr = (float *) ((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11);
10189-
10190-
const float scale = src1_ptr[0];
10191-
10192-
if (scale == 0.0f) {
10193-
// NOTE: this also sets NANs to zero, which is not compliant with IEEE754,
10194-
// but it is useful when resetting the state of recurrent models.
10195-
memset((char *) dst->data + ir*nb1, 0, ne0 * sizeof(float));
10196-
} else {
10197-
if (dst->data != src0->data) {
10198-
// src0 is same shape as dst => same indices
10199-
memcpy((char *) dst->data + ir*nb1, (char *) src0->data + ir*nb01, ne0 * sizeof(float));
10200-
}
10201-
if (scale != 1.0f) {
10202-
ggml_vec_scale_f32(ne0, (float *) ((char *) dst->data + ir*nb1), scale);
10203-
}
10204-
}
10205-
}
10206-
} else if (nb10 == sizeof(float)) {
10176+
if (nb10 == sizeof(float)) {
1020710177
for (int64_t ir = ith; ir < nr; ir += nth) {
1020810178
// src0 and dst are same shape => same indices
1020910179
const int64_t i03 = ir/(ne02*ne01);

0 commit comments

Comments
 (0)