@@ -4997,12 +4997,7 @@ static void ggml_cl_timestep_embedding(ggml_backend_t backend, const ggml_tensor
49974997 backend_ctx->enqueue_ndrange_kernel (kernel, 3 , global_work_size, NULL , dst);
49984998}
49994999
5000- <<<<<<< HEAD
50015000static void ggml_cl_mul_mat_f16_f32_tiled (ggml_backend_t backend, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
5002- =======
5003- static void ggml_cl_conv_2d (ggml_backend_t backend, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
5004- GGML_TENSOR_BINARY_OP_LOCALS;
5005- >>>>>>> 4d5d5a83 (add conv2d kernel)
50065001 ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context ;
50075002
50085003 ggml_tensor_extra_cl * extra0 = (ggml_tensor_extra_cl *)src0->extra ;
@@ -5013,7 +5008,6 @@ static void ggml_cl_conv_2d(ggml_backend_t backend, const ggml_tensor * src0, co
50135008 cl_ulong offset1 = extra1->offset + src1->view_offs ;
50145009 cl_ulong offsetd = extrad->offset + dst->view_offs ;
50155010
5016- <<<<<<< HEAD
50175011 const int M = src0->ne [1 ];
50185012 const int N = src1->ne [1 ];
50195013 const int K = src0->ne [0 ];
@@ -5053,7 +5047,20 @@ static void ggml_cl_conv_2d(ggml_backend_t backend, const ggml_tensor * src0, co
50535047 };
50545048
50555049 backend_ctx->enqueue_ndrange_kernel (kernel, 2 , global_work_size, local_work_size, dst);
5056- =======
5050+ }
5051+
5052+ static void ggml_cl_conv_2d (ggml_backend_t backend, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
5053+ GGML_TENSOR_BINARY_OP_LOCALS;
5054+ ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context ;
5055+
5056+ ggml_tensor_extra_cl * extra0 = (ggml_tensor_extra_cl *)src0->extra ;
5057+ ggml_tensor_extra_cl * extra1 = (ggml_tensor_extra_cl *)src1->extra ;
5058+ ggml_tensor_extra_cl * extrad = (ggml_tensor_extra_cl *)dst->extra ;
5059+
5060+ cl_ulong offset0 = extra0->offset + src0->view_offs ;
5061+ cl_ulong offset1 = extra1->offset + src1->view_offs ;
5062+ cl_ulong offsetd = extrad->offset + dst->view_offs ;
5063+
50575064 const cl_uint Cout = ne03; const cl_uint Cin = ne02; const cl_uint N = ne13;
50585065 const cl_uint KW = ne00; const cl_uint KH = ne01; const cl_uint W = ne10; const cl_uint H = ne11; const cl_uint OW = ne0; const cl_uint OH = ne1;
50595066
@@ -5126,7 +5133,6 @@ static void ggml_cl_conv_2d(ggml_backend_t backend, const ggml_tensor * src0, co
51265133 GGML_UNUSED (dst);
51275134 CL_CHECK (clEnqueueNDRangeKernel (backend_ctx->queue , kernel, 2 , NULL , global_work_size, local_work_size, 0 , NULL , NULL ));
51285135#endif
5129- >>>>>>> 4d5d5a83 (add conv2d kernel)
51305136}
51315137
51325138static void ggml_cl_mul_mat (ggml_backend_t backend, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
@@ -6971,4 +6977,3 @@ bool ggml_cl_compute_forward(ggml_backend_t backend, struct ggml_tensor * tensor
69716977 func (backend, tensor->src [0 ], tensor->src [1 ], tensor);
69726978 return true ;
69736979}
6974-
0 commit comments