@@ -2031,30 +2031,30 @@ static void ggml_cl_add(ggml_backend_t backend, const ggml_tensor * src0, const
20312031 const int ne02 = src0 ? src0->ne [2 ] : 0 ;
20322032 const int ne03 = src0 ? src0->ne [3 ] : 0 ;
20332033
2034- const int nb00 = src0 ? src0->nb [0 ] : 0 ;
2035- const int nb01 = src0 ? src0->nb [1 ] : 0 ;
2036- const int nb02 = src0 ? src0->nb [2 ] : 0 ;
2037- const int nb03 = src0 ? src0->nb [3 ] : 0 ;
2034+ const cl_ulong nb00 = src0 ? src0->nb [0 ] : 0 ;
2035+ const cl_ulong nb01 = src0 ? src0->nb [1 ] : 0 ;
2036+ const cl_ulong nb02 = src0 ? src0->nb [2 ] : 0 ;
2037+ const cl_ulong nb03 = src0 ? src0->nb [3 ] : 0 ;
20382038
20392039 const int ne10 = src1 ? src1->ne [0 ] : 0 ;
20402040 const int ne11 = src1 ? src1->ne [1 ] : 0 ;
20412041 const int ne12 = src1 ? src1->ne [2 ] : 0 ;
20422042 const int ne13 = src1 ? src1->ne [3 ] : 0 ; UNUSED (ne13);
20432043
2044- const int nb10 = src1 ? src1->nb [0 ] : 0 ;
2045- const int nb11 = src1 ? src1->nb [1 ] : 0 ;
2046- const int nb12 = src1 ? src1->nb [2 ] : 0 ;
2047- const int nb13 = src1 ? src1->nb [3 ] : 0 ; UNUSED (nb13);
2044+ const cl_ulong nb10 = src1 ? src1->nb [0 ] : 0 ;
2045+ const cl_ulong nb11 = src1 ? src1->nb [1 ] : 0 ;
2046+ const cl_ulong nb12 = src1 ? src1->nb [2 ] : 0 ;
2047+ const cl_ulong nb13 = src1 ? src1->nb [3 ] : 0 ; UNUSED (nb13);
20482048
20492049 const int ne0 = dst ? dst->ne [0 ] : 0 ;
20502050 const int ne1 = dst ? dst->ne [1 ] : 0 ;
20512051 const int ne2 = dst ? dst->ne [2 ] : 0 ;
20522052 const int ne3 = dst ? dst->ne [3 ] : 0 ;
20532053
2054- const int nb0 = dst ? dst->nb [0 ] : 0 ;
2055- const int nb1 = dst ? dst->nb [1 ] : 0 ;
2056- const int nb2 = dst ? dst->nb [2 ] : 0 ;
2057- const int nb3 = dst ? dst->nb [3 ] : 0 ;
2054+ const cl_ulong nb0 = dst ? dst->nb [0 ] : 0 ;
2055+ const cl_ulong nb1 = dst ? dst->nb [1 ] : 0 ;
2056+ const cl_ulong nb2 = dst ? dst->nb [2 ] : 0 ;
2057+ const cl_ulong nb3 = dst ? dst->nb [3 ] : 0 ;
20582058
20592059 ggml_backend_opencl2_context *backend_ctx = (ggml_backend_opencl2_context *)backend->context ;
20602060 cl_command_queue queue = backend_ctx->queue ;
@@ -2063,9 +2063,9 @@ static void ggml_cl_add(ggml_backend_t backend, const ggml_tensor * src0, const
20632063 ggml_tensor_extra_cl * extra1 = (ggml_tensor_extra_cl *)src1->extra ;
20642064 ggml_tensor_extra_cl * extrad = (ggml_tensor_extra_cl *)dst->extra ;
20652065
2066- int offset0 = extra0->offset + src0->view_offs ;
2067- int offset1 = extra1->offset + src1->view_offs ;
2068- int offsetd = extrad->offset + dst->view_offs ;
2066+ cl_ulong offset0 = extra0->offset + src0->view_offs ;
2067+ cl_ulong offset1 = extra1->offset + src1->view_offs ;
2068+ cl_ulong offsetd = extrad->offset + dst->view_offs ;
20692069
20702070 bool bcast_row = false ;
20712071 int nb = ne00;
@@ -2081,46 +2081,46 @@ static void ggml_cl_add(ggml_backend_t backend, const ggml_tensor * src0, const
20812081 nb = ne00 / 4 ;
20822082 kernel = backend_ctx->kernel_add_row ;
20832083
2084- CL_CHECK (clSetKernelArg (kernel, 0 , sizeof (cl_mem), &extra0->data_device ));
2085- CL_CHECK (clSetKernelArg (kernel, 1 , sizeof (int ), &offset0));
2086- CL_CHECK (clSetKernelArg (kernel, 2 , sizeof (cl_mem), &extra1->data_device ));
2087- CL_CHECK (clSetKernelArg (kernel, 3 , sizeof (int ), &offset1));
2088- CL_CHECK (clSetKernelArg (kernel, 4 , sizeof (cl_mem), &extrad->data_device ));
2089- CL_CHECK (clSetKernelArg (kernel, 5 , sizeof (int ), &offsetd));
2090- CL_CHECK (clSetKernelArg (kernel, 6 , sizeof (int ), &nb));
2084+ CL_CHECK (clSetKernelArg (kernel, 0 , sizeof (cl_mem), &extra0->data_device ));
2085+ CL_CHECK (clSetKernelArg (kernel, 1 , sizeof (cl_ulong), &offset0));
2086+ CL_CHECK (clSetKernelArg (kernel, 2 , sizeof (cl_mem), &extra1->data_device ));
2087+ CL_CHECK (clSetKernelArg (kernel, 3 , sizeof (cl_ulong), &offset1));
2088+ CL_CHECK (clSetKernelArg (kernel, 4 , sizeof (cl_mem), &extrad->data_device ));
2089+ CL_CHECK (clSetKernelArg (kernel, 5 , sizeof (cl_ulong), &offsetd));
2090+ CL_CHECK (clSetKernelArg (kernel, 6 , sizeof (int ), &nb));
20912091 } else {
20922092 kernel = backend_ctx->kernel_add ;
20932093
2094- CL_CHECK (clSetKernelArg (kernel, 0 , sizeof (cl_mem), &extra0->data_device ));
2095- CL_CHECK (clSetKernelArg (kernel, 1 , sizeof (int ), &offset0));
2096- CL_CHECK (clSetKernelArg (kernel, 2 , sizeof (cl_mem), &extra1->data_device ));
2097- CL_CHECK (clSetKernelArg (kernel, 3 , sizeof (int ), &offset1));
2098- CL_CHECK (clSetKernelArg (kernel, 4 , sizeof (cl_mem), &extrad->data_device ));
2099- CL_CHECK (clSetKernelArg (kernel, 5 , sizeof (int ), &offsetd));
2100- CL_CHECK (clSetKernelArg (kernel, 6 , sizeof (int ), &ne00));
2101- CL_CHECK (clSetKernelArg (kernel, 7 , sizeof (int ), &ne01));
2102- CL_CHECK (clSetKernelArg (kernel, 8 , sizeof (int ), &ne02));
2103- CL_CHECK (clSetKernelArg (kernel, 9 , sizeof (int ), &ne03));
2104- CL_CHECK (clSetKernelArg (kernel, 10 , sizeof (int ), &nb00));
2105- CL_CHECK (clSetKernelArg (kernel, 11 , sizeof (int ), &nb01));
2106- CL_CHECK (clSetKernelArg (kernel, 12 , sizeof (int ), &nb02));
2107- CL_CHECK (clSetKernelArg (kernel, 13 , sizeof (int ), &nb03));
2108- CL_CHECK (clSetKernelArg (kernel, 14 , sizeof (int ), &ne10));
2109- CL_CHECK (clSetKernelArg (kernel, 15 , sizeof (int ), &ne11));
2110- CL_CHECK (clSetKernelArg (kernel, 16 , sizeof (int ), &ne12));
2111- CL_CHECK (clSetKernelArg (kernel, 17 , sizeof (int ), &ne13));
2112- CL_CHECK (clSetKernelArg (kernel, 18 , sizeof (int ), &nb10));
2113- CL_CHECK (clSetKernelArg (kernel, 19 , sizeof (int ), &nb11));
2114- CL_CHECK (clSetKernelArg (kernel, 20 , sizeof (int ), &nb12));
2115- CL_CHECK (clSetKernelArg (kernel, 21 , sizeof (int ), &nb13));
2116- CL_CHECK (clSetKernelArg (kernel, 22 , sizeof (int ), &ne0));
2117- CL_CHECK (clSetKernelArg (kernel, 23 , sizeof (int ), &ne1));
2118- CL_CHECK (clSetKernelArg (kernel, 24 , sizeof (int ), &ne2));
2119- CL_CHECK (clSetKernelArg (kernel, 25 , sizeof (int ), &ne3));
2120- CL_CHECK (clSetKernelArg (kernel, 26 , sizeof (int ), &nb0));
2121- CL_CHECK (clSetKernelArg (kernel, 27 , sizeof (int ), &nb1));
2122- CL_CHECK (clSetKernelArg (kernel, 28 , sizeof (int ), &nb2));
2123- CL_CHECK (clSetKernelArg (kernel, 29 , sizeof (int ), &nb3));
2094+ CL_CHECK (clSetKernelArg (kernel, 0 , sizeof (cl_mem), &extra0->data_device ));
2095+ CL_CHECK (clSetKernelArg (kernel, 1 , sizeof (cl_ulong), &offset0));
2096+ CL_CHECK (clSetKernelArg (kernel, 2 , sizeof (cl_mem), &extra1->data_device ));
2097+ CL_CHECK (clSetKernelArg (kernel, 3 , sizeof (cl_ulong), &offset1));
2098+ CL_CHECK (clSetKernelArg (kernel, 4 , sizeof (cl_mem), &extrad->data_device ));
2099+ CL_CHECK (clSetKernelArg (kernel, 5 , sizeof (cl_ulong), &offsetd));
2100+ CL_CHECK (clSetKernelArg (kernel, 6 , sizeof (int ), &ne00));
2101+ CL_CHECK (clSetKernelArg (kernel, 7 , sizeof (int ), &ne01));
2102+ CL_CHECK (clSetKernelArg (kernel, 8 , sizeof (int ), &ne02));
2103+ CL_CHECK (clSetKernelArg (kernel, 9 , sizeof (int ), &ne03));
2104+ CL_CHECK (clSetKernelArg (kernel, 10 , sizeof (cl_ulong), &nb00));
2105+ CL_CHECK (clSetKernelArg (kernel, 11 , sizeof (cl_ulong), &nb01));
2106+ CL_CHECK (clSetKernelArg (kernel, 12 , sizeof (cl_ulong), &nb02));
2107+ CL_CHECK (clSetKernelArg (kernel, 13 , sizeof (cl_ulong), &nb03));
2108+ CL_CHECK (clSetKernelArg (kernel, 14 , sizeof (int ), &ne10));
2109+ CL_CHECK (clSetKernelArg (kernel, 15 , sizeof (int ), &ne11));
2110+ CL_CHECK (clSetKernelArg (kernel, 16 , sizeof (int ), &ne12));
2111+ CL_CHECK (clSetKernelArg (kernel, 17 , sizeof (int ), &ne13));
2112+ CL_CHECK (clSetKernelArg (kernel, 18 , sizeof (cl_ulong), &nb10));
2113+ CL_CHECK (clSetKernelArg (kernel, 19 , sizeof (cl_ulong), &nb11));
2114+ CL_CHECK (clSetKernelArg (kernel, 20 , sizeof (cl_ulong), &nb12));
2115+ CL_CHECK (clSetKernelArg (kernel, 21 , sizeof (cl_ulong), &nb13));
2116+ CL_CHECK (clSetKernelArg (kernel, 22 , sizeof (int ), &ne0));
2117+ CL_CHECK (clSetKernelArg (kernel, 23 , sizeof (int ), &ne1));
2118+ CL_CHECK (clSetKernelArg (kernel, 24 , sizeof (int ), &ne2));
2119+ CL_CHECK (clSetKernelArg (kernel, 25 , sizeof (int ), &ne3));
2120+ CL_CHECK (clSetKernelArg (kernel, 26 , sizeof (cl_ulong), &nb0));
2121+ CL_CHECK (clSetKernelArg (kernel, 27 , sizeof (cl_ulong), &nb1));
2122+ CL_CHECK (clSetKernelArg (kernel, 28 , sizeof (cl_ulong), &nb2));
2123+ CL_CHECK (clSetKernelArg (kernel, 29 , sizeof (cl_ulong), &nb3));
21242124 }
21252125
21262126 if (bcast_row) {
0 commit comments