@@ -28,7 +28,7 @@ static void ggml_compute_forward_win_part_f16(
2828 for (int64_t i3 = 0; i3 < ne3; i3++) {
2929 int px = i3 % nep0;
3030 int py = (i3 / nep0) % nep1;
31- int b = i3 / (nep0 * nep1);
31+ int b = i3 / (nep0 * nep1);
3232 for (int64_t i2 = 0; i2 < ne2; ++i2) {
3333 for (int64_t i1 = 0; i1 < ne1; ++i1) {
3434 for (int64_t i0 = 0; i0 < ne0; ++i0) {
@@ -38,7 +38,7 @@ static void ggml_compute_forward_win_part_f16(
3838 const int64_t i00 = i0;
3939
4040 void * sp = ((void *) src0->data) + i03*nb03 + i02*nb02 + i01*nb01 + i00*nb00;
41- void * dp = ((void *) dst->data) + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0;
41+ void * dp = ((void *) dst->data) + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0;
4242
4343 if (py*w + i2 >= ne02 || px*w + i1 >= ne01) {
4444 *((ggml_fp16_t *) dp) = 0;
@@ -138,7 +138,7 @@ __global__ static void win_part_kernel(
138138 if (py*p.w + i2 >= p.ne2 || px*p.w + i1 >= p.ne1 ) {
139139 for (int i0 = threadIdx .x ; i0 < p.C ; i0 += blockDim .x ) {
140140 char * dp = (char *)dst + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0;
141- *((T *) dp) = 0 ;
141+ *((T *) dp) = 0.0 ;
142142 }
143143 return ;
144144 }
@@ -210,7 +210,7 @@ static unsigned int round_to_pow2(unsigned int v) {
210210 v++;
211211
212212 return v;
213- }
213+ }
214214
215215void ggml_cuda_op_win_part (ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
216216 const ggml_tensor * src0 = dst->src [0 ];
@@ -297,12 +297,12 @@ static void ggml_compute_forward_win_unpart_f16(
297297 for (int64_t i0 = 0; i0 < ne0; ++i0) {
298298 const int ip2 = i2/w;
299299 const int ip1 = i1/w;
300-
300+
301301 const int64_t i03 = i3*npx*npy + ip2*npx + ip1;
302302 const int64_t i02 = i2%w;
303303 const int64_t i01 = i1%w;
304304 const int64_t i00 = i0;
305-
305+
306306 void * sp = ((void *) src0->data) + i03*nb03 + i02*nb02 + i01*nb01 + i00*nb00;
307307 void * dp = ((void *) dst->data) + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0;
308308
0 commit comments