11// SPDX-FileCopyrightText: Copyright (c) Qualcomm Innovation Center, Inc. All rights reserved
22// SPDX-License-Identifier: MIT
33
4+ #define CL_TARGET_OPENCL_VERSION 220
5+
6+ // suppress warnings in CL headers for GCC and Clang
7+ #pragma GCC diagnostic ignored "-Wgnu-anonymous-struct"
8+ #pragma GCC diagnostic ignored "-Woverlength-strings"
9+
410#include " ggml-opencl2.h"
511#include " ggml-backend.h"
612#include " ggml-impl.h"
@@ -1237,10 +1243,6 @@ static void ggml_backend_opencl2_buffer_init_tensor(ggml_backend_buffer_t buffer
12371243 tensor->extra = extra;
12381244 }
12391245 }
1240-
1241- // This should be removed. Keep it to make it easier to identify the backend
1242- // when debugging until backend is removed from tensor.
1243- tensor->backend = GGML_BACKEND_TYPE_GPU;
12441246}
12451247
12461248// The optimized gemm and gemv kernels are used for large matrices without batch.
@@ -1938,18 +1940,7 @@ static struct ggml_backend_device_i ggml_backend_opencl2_device_i = {
19381940 /* .event_synchronize = */ NULL ,
19391941};
19401942
1941- //
1942- // Backend registration
1943- //
1944-
1945- GGML_API ggml_backend_t ggml_backend_reg_opencl2_init (const char * params, void * user_data) {
1946- return ggml_backend_opencl2_init ();
1947-
1948- GGML_UNUSED (params);
1949- GGML_UNUSED (user_data);
1950- }
1951-
1952- // new API
1943+ // Backend registry
19531944
19541945static const char * ggml_backend_opencl2_reg_get_name (ggml_backend_reg_t reg) {
19551946 return " OpenCL2" ;
@@ -1986,6 +1977,7 @@ ggml_backend_reg_t ggml_backend_opencl2_reg(void) {
19861977
19871978 if (!initialized) {
19881979 reg = ggml_backend_reg {
1980+ /* .api_version = */ GGML_BACKEND_API_VERSION,
19891981 /* .iface = */ ggml_backend_opencl2_reg_i,
19901982 /* .context = */ NULL ,
19911983 };
@@ -2004,6 +1996,8 @@ ggml_backend_reg_t ggml_backend_opencl2_reg(void) {
20041996 return ®
20051997}
20061998
1999+ GGML_BACKEND_DL_IMPL (ggml_backend_opencl2_reg)
2000+
20072001// ------------------------------------------------------------------------------
20082002// Debugging utils
20092003// ------------------------------------------------------------------------------
@@ -2921,13 +2915,11 @@ static void ggml_cl_mul_mat(ggml_backend_t backend, const ggml_tensor * src0, co
29212915 // init CL objects
29222916 // <--------------------------------------------> //
29232917 cl_int status;
2924- cl_event evt;
29252918 cl_image_format img_fmt_1d;
29262919 cl_image_desc img_desc_1d;
29272920 cl_buffer_region region;
29282921 cl_mem A_image1d;
29292922 cl_mem B_image1d;
2930- cl_mem A_sub_buffer;
29312923 cl_mem B_sub_buffer;
29322924 cl_mem C_d;
29332925 // for B transpose
@@ -3623,7 +3615,7 @@ static void ggml_cl_cpy(ggml_backend_t backend, const ggml_tensor * src0, const
36233615 GGML_ASSERT (src1->extra );
36243616
36253617 // GGML_OP_CPY happens between src0 and src1.
3626- // GGML_OP_DUP and GGML_OP_CONT happen between src0 and dst.
3618+ // GGML_OP_DUP and GGML_OP_CONT happen between src0 and dst.
36273619 UNUSED (dst);
36283620
36293621 const int ne00 = src0 ? src0->ne [0 ] : 0 ;
0 commit comments