@@ -2031,30 +2031,30 @@ static void ggml_cl_add(ggml_backend_t backend, const ggml_tensor * src0, const
2031
2031
const int ne02 = src0 ? src0->ne [2 ] : 0 ;
2032
2032
const int ne03 = src0 ? src0->ne [3 ] : 0 ;
2033
2033
2034
- const int nb00 = src0 ? src0->nb [0 ] : 0 ;
2035
- const int nb01 = src0 ? src0->nb [1 ] : 0 ;
2036
- const int nb02 = src0 ? src0->nb [2 ] : 0 ;
2037
- const int nb03 = src0 ? src0->nb [3 ] : 0 ;
2034
+ const cl_ulong nb00 = src0 ? src0->nb [0 ] : 0 ;
2035
+ const cl_ulong nb01 = src0 ? src0->nb [1 ] : 0 ;
2036
+ const cl_ulong nb02 = src0 ? src0->nb [2 ] : 0 ;
2037
+ const cl_ulong nb03 = src0 ? src0->nb [3 ] : 0 ;
2038
2038
2039
2039
const int ne10 = src1 ? src1->ne [0 ] : 0 ;
2040
2040
const int ne11 = src1 ? src1->ne [1 ] : 0 ;
2041
2041
const int ne12 = src1 ? src1->ne [2 ] : 0 ;
2042
2042
const int ne13 = src1 ? src1->ne [3 ] : 0 ; UNUSED (ne13);
2043
2043
2044
- const int nb10 = src1 ? src1->nb [0 ] : 0 ;
2045
- const int nb11 = src1 ? src1->nb [1 ] : 0 ;
2046
- const int nb12 = src1 ? src1->nb [2 ] : 0 ;
2047
- const int nb13 = src1 ? src1->nb [3 ] : 0 ; UNUSED (nb13);
2044
+ const cl_ulong nb10 = src1 ? src1->nb [0 ] : 0 ;
2045
+ const cl_ulong nb11 = src1 ? src1->nb [1 ] : 0 ;
2046
+ const cl_ulong nb12 = src1 ? src1->nb [2 ] : 0 ;
2047
+ const cl_ulong nb13 = src1 ? src1->nb [3 ] : 0 ; UNUSED (nb13);
2048
2048
2049
2049
const int ne0 = dst ? dst->ne [0 ] : 0 ;
2050
2050
const int ne1 = dst ? dst->ne [1 ] : 0 ;
2051
2051
const int ne2 = dst ? dst->ne [2 ] : 0 ;
2052
2052
const int ne3 = dst ? dst->ne [3 ] : 0 ;
2053
2053
2054
- const int nb0 = dst ? dst->nb [0 ] : 0 ;
2055
- const int nb1 = dst ? dst->nb [1 ] : 0 ;
2056
- const int nb2 = dst ? dst->nb [2 ] : 0 ;
2057
- const int nb3 = dst ? dst->nb [3 ] : 0 ;
2054
+ const cl_ulong nb0 = dst ? dst->nb [0 ] : 0 ;
2055
+ const cl_ulong nb1 = dst ? dst->nb [1 ] : 0 ;
2056
+ const cl_ulong nb2 = dst ? dst->nb [2 ] : 0 ;
2057
+ const cl_ulong nb3 = dst ? dst->nb [3 ] : 0 ;
2058
2058
2059
2059
ggml_backend_opencl2_context *backend_ctx = (ggml_backend_opencl2_context *)backend->context ;
2060
2060
cl_command_queue queue = backend_ctx->queue ;
@@ -2063,9 +2063,9 @@ static void ggml_cl_add(ggml_backend_t backend, const ggml_tensor * src0, const
2063
2063
ggml_tensor_extra_cl * extra1 = (ggml_tensor_extra_cl *)src1->extra ;
2064
2064
ggml_tensor_extra_cl * extrad = (ggml_tensor_extra_cl *)dst->extra ;
2065
2065
2066
- int offset0 = extra0->offset + src0->view_offs ;
2067
- int offset1 = extra1->offset + src1->view_offs ;
2068
- int offsetd = extrad->offset + dst->view_offs ;
2066
+ cl_ulong offset0 = extra0->offset + src0->view_offs ;
2067
+ cl_ulong offset1 = extra1->offset + src1->view_offs ;
2068
+ cl_ulong offsetd = extrad->offset + dst->view_offs ;
2069
2069
2070
2070
bool bcast_row = false ;
2071
2071
int nb = ne00;
@@ -2081,46 +2081,46 @@ static void ggml_cl_add(ggml_backend_t backend, const ggml_tensor * src0, const
2081
2081
nb = ne00 / 4 ;
2082
2082
kernel = backend_ctx->kernel_add_row ;
2083
2083
2084
- CL_CHECK (clSetKernelArg (kernel, 0 , sizeof (cl_mem), &extra0->data_device ));
2085
- CL_CHECK (clSetKernelArg (kernel, 1 , sizeof (int ), &offset0));
2086
- CL_CHECK (clSetKernelArg (kernel, 2 , sizeof (cl_mem), &extra1->data_device ));
2087
- CL_CHECK (clSetKernelArg (kernel, 3 , sizeof (int ), &offset1));
2088
- CL_CHECK (clSetKernelArg (kernel, 4 , sizeof (cl_mem), &extrad->data_device ));
2089
- CL_CHECK (clSetKernelArg (kernel, 5 , sizeof (int ), &offsetd));
2090
- CL_CHECK (clSetKernelArg (kernel, 6 , sizeof (int ), &nb));
2084
+ CL_CHECK (clSetKernelArg (kernel, 0 , sizeof (cl_mem), &extra0->data_device ));
2085
+ CL_CHECK (clSetKernelArg (kernel, 1 , sizeof (cl_ulong), &offset0));
2086
+ CL_CHECK (clSetKernelArg (kernel, 2 , sizeof (cl_mem), &extra1->data_device ));
2087
+ CL_CHECK (clSetKernelArg (kernel, 3 , sizeof (cl_ulong), &offset1));
2088
+ CL_CHECK (clSetKernelArg (kernel, 4 , sizeof (cl_mem), &extrad->data_device ));
2089
+ CL_CHECK (clSetKernelArg (kernel, 5 , sizeof (cl_ulong), &offsetd));
2090
+ CL_CHECK (clSetKernelArg (kernel, 6 , sizeof (int ), &nb));
2091
2091
} else {
2092
2092
kernel = backend_ctx->kernel_add ;
2093
2093
2094
- CL_CHECK (clSetKernelArg (kernel, 0 , sizeof (cl_mem), &extra0->data_device ));
2095
- CL_CHECK (clSetKernelArg (kernel, 1 , sizeof (int ), &offset0));
2096
- CL_CHECK (clSetKernelArg (kernel, 2 , sizeof (cl_mem), &extra1->data_device ));
2097
- CL_CHECK (clSetKernelArg (kernel, 3 , sizeof (int ), &offset1));
2098
- CL_CHECK (clSetKernelArg (kernel, 4 , sizeof (cl_mem), &extrad->data_device ));
2099
- CL_CHECK (clSetKernelArg (kernel, 5 , sizeof (int ), &offsetd));
2100
- CL_CHECK (clSetKernelArg (kernel, 6 , sizeof (int ), &ne00));
2101
- CL_CHECK (clSetKernelArg (kernel, 7 , sizeof (int ), &ne01));
2102
- CL_CHECK (clSetKernelArg (kernel, 8 , sizeof (int ), &ne02));
2103
- CL_CHECK (clSetKernelArg (kernel, 9 , sizeof (int ), &ne03));
2104
- CL_CHECK (clSetKernelArg (kernel, 10 , sizeof (int ), &nb00));
2105
- CL_CHECK (clSetKernelArg (kernel, 11 , sizeof (int ), &nb01));
2106
- CL_CHECK (clSetKernelArg (kernel, 12 , sizeof (int ), &nb02));
2107
- CL_CHECK (clSetKernelArg (kernel, 13 , sizeof (int ), &nb03));
2108
- CL_CHECK (clSetKernelArg (kernel, 14 , sizeof (int ), &ne10));
2109
- CL_CHECK (clSetKernelArg (kernel, 15 , sizeof (int ), &ne11));
2110
- CL_CHECK (clSetKernelArg (kernel, 16 , sizeof (int ), &ne12));
2111
- CL_CHECK (clSetKernelArg (kernel, 17 , sizeof (int ), &ne13));
2112
- CL_CHECK (clSetKernelArg (kernel, 18 , sizeof (int ), &nb10));
2113
- CL_CHECK (clSetKernelArg (kernel, 19 , sizeof (int ), &nb11));
2114
- CL_CHECK (clSetKernelArg (kernel, 20 , sizeof (int ), &nb12));
2115
- CL_CHECK (clSetKernelArg (kernel, 21 , sizeof (int ), &nb13));
2116
- CL_CHECK (clSetKernelArg (kernel, 22 , sizeof (int ), &ne0));
2117
- CL_CHECK (clSetKernelArg (kernel, 23 , sizeof (int ), &ne1));
2118
- CL_CHECK (clSetKernelArg (kernel, 24 , sizeof (int ), &ne2));
2119
- CL_CHECK (clSetKernelArg (kernel, 25 , sizeof (int ), &ne3));
2120
- CL_CHECK (clSetKernelArg (kernel, 26 , sizeof (int ), &nb0));
2121
- CL_CHECK (clSetKernelArg (kernel, 27 , sizeof (int ), &nb1));
2122
- CL_CHECK (clSetKernelArg (kernel, 28 , sizeof (int ), &nb2));
2123
- CL_CHECK (clSetKernelArg (kernel, 29 , sizeof (int ), &nb3));
2094
+ CL_CHECK (clSetKernelArg (kernel, 0 , sizeof (cl_mem), &extra0->data_device ));
2095
+ CL_CHECK (clSetKernelArg (kernel, 1 , sizeof (cl_ulong), &offset0));
2096
+ CL_CHECK (clSetKernelArg (kernel, 2 , sizeof (cl_mem), &extra1->data_device ));
2097
+ CL_CHECK (clSetKernelArg (kernel, 3 , sizeof (cl_ulong), &offset1));
2098
+ CL_CHECK (clSetKernelArg (kernel, 4 , sizeof (cl_mem), &extrad->data_device ));
2099
+ CL_CHECK (clSetKernelArg (kernel, 5 , sizeof (cl_ulong), &offsetd));
2100
+ CL_CHECK (clSetKernelArg (kernel, 6 , sizeof (int ), &ne00));
2101
+ CL_CHECK (clSetKernelArg (kernel, 7 , sizeof (int ), &ne01));
2102
+ CL_CHECK (clSetKernelArg (kernel, 8 , sizeof (int ), &ne02));
2103
+ CL_CHECK (clSetKernelArg (kernel, 9 , sizeof (int ), &ne03));
2104
+ CL_CHECK (clSetKernelArg (kernel, 10 , sizeof (cl_ulong), &nb00));
2105
+ CL_CHECK (clSetKernelArg (kernel, 11 , sizeof (cl_ulong), &nb01));
2106
+ CL_CHECK (clSetKernelArg (kernel, 12 , sizeof (cl_ulong), &nb02));
2107
+ CL_CHECK (clSetKernelArg (kernel, 13 , sizeof (cl_ulong), &nb03));
2108
+ CL_CHECK (clSetKernelArg (kernel, 14 , sizeof (int ), &ne10));
2109
+ CL_CHECK (clSetKernelArg (kernel, 15 , sizeof (int ), &ne11));
2110
+ CL_CHECK (clSetKernelArg (kernel, 16 , sizeof (int ), &ne12));
2111
+ CL_CHECK (clSetKernelArg (kernel, 17 , sizeof (int ), &ne13));
2112
+ CL_CHECK (clSetKernelArg (kernel, 18 , sizeof (cl_ulong), &nb10));
2113
+ CL_CHECK (clSetKernelArg (kernel, 19 , sizeof (cl_ulong), &nb11));
2114
+ CL_CHECK (clSetKernelArg (kernel, 20 , sizeof (cl_ulong), &nb12));
2115
+ CL_CHECK (clSetKernelArg (kernel, 21 , sizeof (cl_ulong), &nb13));
2116
+ CL_CHECK (clSetKernelArg (kernel, 22 , sizeof (int ), &ne0));
2117
+ CL_CHECK (clSetKernelArg (kernel, 23 , sizeof (int ), &ne1));
2118
+ CL_CHECK (clSetKernelArg (kernel, 24 , sizeof (int ), &ne2));
2119
+ CL_CHECK (clSetKernelArg (kernel, 25 , sizeof (int ), &ne3));
2120
+ CL_CHECK (clSetKernelArg (kernel, 26 , sizeof (cl_ulong), &nb0));
2121
+ CL_CHECK (clSetKernelArg (kernel, 27 , sizeof (cl_ulong), &nb1));
2122
+ CL_CHECK (clSetKernelArg (kernel, 28 , sizeof (cl_ulong), &nb2));
2123
+ CL_CHECK (clSetKernelArg (kernel, 29 , sizeof (cl_ulong), &nb3));
2124
2124
}
2125
2125
2126
2126
if (bcast_row) {
0 commit comments