@@ -57,28 +57,28 @@ static void apply_binary_op(const ggml_compute_params * params, ggml_tensor * ds
5757
5858 GGML_ASSERT (ggml_can_repeat (src1, src0) && ggml_are_same_shape (src0, dst));
5959
60- #if defined(GGML_USE_CLBLAST)
61- // do we even need this? it seems like its actually slower than just CPU
62- const int ith = params->ith ;
63- if (op == op_add && src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32 && src1->clblast_offload_gpu ) {
64- // TODO: OpenCL kernel support full broadcast
65- static_assert (GGML_MAX_DIMS == 4 , " GGML_MAX_DIMS is not 4 - update this function" );
66- GGML_ASSERT ((src1->ne [0 ] == src0->ne [0 ]) && ggml_can_repeat (src1, src0));
67- if (ith == 0 ) {
68- ggml_cl_add (src0, src1, dst);
69- }
70- return ;
71- }
72- if (op == op_mul && src0->type == GGML_TYPE_F32 && src1->clblast_offload_gpu ) {
73- // TODO: OpenCL kernel support full broadcast
74- static_assert (GGML_MAX_DIMS == 4 , " GGML_MAX_DIMS is not 4 - update this function" );
75- GGML_ASSERT ((src1->ne [0 ] == src0->ne [0 ]) && ggml_can_repeat (src1, src0));
76- if (ith == 0 ) {
77- ggml_cl_mul (src0, src1, dst);
78- }
79- return ;
80- }
81- #endif
60+ // #if defined(GGML_USE_CLBLAST)
61+ // // do we even need this? it seems like its actually slower than just CPU
62+ // const int ith = params->ith;
63+ // if (op == op_add && src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32 && src1->clblast_offload_gpu) {
64+ // // TODO: OpenCL kernel support full broadcast
65+ // static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
66+ // GGML_ASSERT((src1->ne[0] == src0->ne[0]) && ggml_can_repeat(src1, src0));
67+ // if (ith == 0) {
68+ // ggml_cl_add(src0, src1, dst);
69+ // }
70+ // return;
71+ // }
72+ // if (op == op_mul && src0->type == GGML_TYPE_F32 && src1->clblast_offload_gpu) {
73+ // // TODO: OpenCL kernel support full broadcast
74+ // static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
75+ // GGML_ASSERT((src1->ne[0] == src0->ne[0]) && ggml_can_repeat(src1, src0));
76+ // if (ith == 0) {
77+ // ggml_cl_mul(src0, src1, dst);
78+ // }
79+ // return;
80+ // }
81+ // #endif
8282
8383 GGML_TENSOR_BINARY_OP_LOCALS
8484
0 commit comments