Skip to content

Commit 621aa7c

Browse files
committed
fixed clblast. but this part might not actually be helpful speed wise
1 parent e1d3c19 commit 621aa7c

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

ggml/src/ggml-cpu/binary-ops.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ static void apply_binary_op(const ggml_compute_params * params, ggml_tensor * ds
5858
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
5959

6060
#if defined(GGML_USE_CLBLAST)
61+
//do we even need this? it seems like its actually slower than just CPU
6162
const int ith = params->ith;
6263
if (op == op_add && src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32 && src1->clblast_offload_gpu) {
6364
// TODO: OpenCL kernel support full broadcast
@@ -73,7 +74,7 @@ static void apply_binary_op(const ggml_compute_params * params, ggml_tensor * ds
7374
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
7475
GGML_ASSERT((src1->ne[0] == src0->ne[0]) && ggml_can_repeat(src1, src0));
7576
if (ith == 0) {
76-
ggml_cl_add(src0, src1, dst);
77+
ggml_cl_mul(src0, src1, dst);
7778
}
7879
return;
7980
}

0 commit comments

Comments
 (0)