|
6 | 6 | using vDSP_fn_t = void (*)(const float *, vDSP_Stride, const float *, vDSP_Stride, float *, vDSP_Stride, vDSP_Length); |
7 | 7 | #endif |
8 | 8 |
|
9 | | -#if defined(GGML_USE_CLBLAST) // allow usage of CLBlast alongside Accelerate functions |
10 | | -#include "ggml_v3b-opencl.h" |
11 | | -#endif |
12 | 9 |
|
13 | 10 | static inline float op_add(float a, float b) { |
14 | 11 | return a + b; |
@@ -57,29 +54,6 @@ static void apply_binary_op(const ggml_compute_params * params, ggml_tensor * ds |
57 | 54 |
|
58 | 55 | GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst)); |
59 | 56 |
|
60 | | - // #if defined(GGML_USE_CLBLAST) |
61 | | - // //do we even need this? it seems like its actually slower than just CPU |
62 | | - // const int ith = params->ith; |
63 | | - // if (op == op_add && src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32 && src1->clblast_offload_gpu) { |
64 | | - // // TODO: OpenCL kernel support full broadcast |
65 | | - // static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); |
66 | | - // GGML_ASSERT((src1->ne[0] == src0->ne[0]) && ggml_can_repeat(src1, src0)); |
67 | | - // if (ith == 0) { |
68 | | - // ggml_cl_add(src0, src1, dst); |
69 | | - // } |
70 | | - // return; |
71 | | - // } |
72 | | - // if (op == op_mul && src0->type == GGML_TYPE_F32 && src1->clblast_offload_gpu) { |
73 | | - // // TODO: OpenCL kernel support full broadcast |
74 | | - // static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); |
75 | | - // GGML_ASSERT((src1->ne[0] == src0->ne[0]) && ggml_can_repeat(src1, src0)); |
76 | | - // if (ith == 0) { |
77 | | - // ggml_cl_mul(src0, src1, dst); |
78 | | - // } |
79 | | - // return; |
80 | | - // } |
81 | | - // #endif |
82 | | - |
83 | 57 | GGML_TENSOR_BINARY_OP_LOCALS |
84 | 58 |
|
85 | 59 | GGML_ASSERT( nb0 == sizeof(dst_t)); |
|
0 commit comments