@@ -2394,9 +2394,9 @@ void kernel_mul_mv_impl(
23942394 sumf += (T0) x[i] * (T1) y[i];
23952395 }
23962396
2397- float all_sum = simd_sum (sumf);
2397+ float sum_all = simd_sum (sumf);
23982398 if (tiisg == 0 ) {
2399- dst_f32[(uint64_t )r1*args.ne0 + r0] = all_sum ;
2399+ dst_f32[(uint64_t )r1*args.ne0 + r0] = sum_all ;
24002400 }
24012401 }
24022402 } else {
@@ -2417,10 +2417,10 @@ void kernel_mul_mv_impl(
24172417 sumf += dot ((float4) x4[i], (float4) y4[i]);
24182418 }
24192419
2420- float all_sum = simd_sum (sumf);
2420+ float sum_all = simd_sum (sumf);
24212421 if (tiisg == 0 ) {
2422- for (int i = 4 *(args.ne00 /4 ); i < args.ne00 ; ++i) all_sum += (float ) (x[i] * y[i]);
2423- dst_f32[(uint64_t )r1*args.ne0 + r0] = all_sum ;
2422+ for (int i = 4 *(args.ne00 /4 ); i < args.ne00 ; ++i) sum_all += (float ) (x[i] * y[i]);
2423+ dst_f32[(uint64_t )r1*args.ne0 + r0] = sum_all ;
24242424 }
24252425 }
24262426 }
@@ -2482,9 +2482,9 @@ kernel void kernel_mul_mv_1row(
24822482 for (int i = tiisg; i < args.ne00 ; i += 32 ) {
24832483 sumf += (float ) x[i] * (float ) y[i];
24842484 }
2485- float all_sum = simd_sum (sumf);
2485+ float sum_all = simd_sum (sumf);
24862486 if (tiisg == 0 ) {
2487- dst_f32[r0] = all_sum ;
2487+ dst_f32[r0] = sum_all ;
24882488 }
24892489 } else {
24902490 device const T4 * x4 = (device const T4 *) x;
@@ -2494,11 +2494,11 @@ kernel void kernel_mul_mv_1row(
24942494 sumf += dot ((float4) x4[i], y4[i]);
24952495 }
24962496
2497- float all_sum = simd_sum (sumf);
2497+ float sum_all = simd_sum (sumf);
24982498
24992499 if (tiisg == 0 ) {
2500- for (int i = 4 *(args.ne00 /4 ); i < args.ne00 ; ++i) all_sum += (float ) (x[i] * y[i]);
2501- dst_f32[r0] = all_sum ;
2500+ for (int i = 4 *(args.ne00 /4 ); i < args.ne00 ; ++i) sum_all += (float ) (x[i] * y[i]);
2501+ dst_f32[r0] = sum_all ;
25022502 }
25032503 }
25042504}
@@ -2543,9 +2543,9 @@ kernel void kernel_mul_mv_l4(
25432543 sumf += dot ((float4) x4[i], y4[i]);
25442544 }
25452545
2546- float all_sum = simd_sum (sumf);
2546+ float sum_all = simd_sum (sumf);
25472547 if (tiisg == 0 ) {
2548- dst_f32[(uint64_t )r1*args.ne0 + r0] = all_sum ;
2548+ dst_f32[(uint64_t )r1*args.ne0 + r0] = sum_all ;
25492549 }
25502550 }
25512551}
@@ -4447,7 +4447,7 @@ void kernel_mul_mv_q2_K_f32_impl(
44474447 device const float * y = (device const float *) (src1 + offset1);
44484448
44494449 float yl[32 ];
4450- float sumf[nr0]={0 .f }, all_sum ;
4450+ float sumf[nr0]={0 .f };
44514451
44524452 const int ix = tiisg/8 ; // 0...3
44534453 const int it = tiisg%8 ; // 0...7
@@ -4503,9 +4503,9 @@ void kernel_mul_mv_q2_K_f32_impl(
45034503 device float * dst_f32 = (device float *) dst + (uint64_t )im*args.ne0 *args.ne1 + (uint64_t )r1*args.ne0 ;
45044504
45054505 for (int row = 0 ; row < nr0 && first_row + row < args.ne0 ; ++row) {
4506- all_sum = simd_sum (sumf[row]);
4506+ float sum_all = simd_sum (sumf[row]);
45074507 if (tiisg == 0 ) {
4508- dst_f32[first_row + row] = all_sum ;
4508+ dst_f32[first_row + row] = sum_all ;
45094509 }
45104510 }
45114511}
@@ -4727,7 +4727,7 @@ void kernel_mul_mv_q4_K_f32_impl(
47274727 float yl[16 ];
47284728 float yh[16 ];
47294729
4730- float sumf[N_R0_Q4_K]={0 .f }, all_sum ;
4730+ float sumf[N_R0_Q4_K]={0 .f };
47314731
47324732 device const float * y4 = y + ix * QK_K + 64 * iq + 8 * ir;
47334733
@@ -4793,9 +4793,9 @@ void kernel_mul_mv_q4_K_f32_impl(
47934793 device float * dst_f32 = (device float *) dst + (int64_t )im*args.ne0 *args.ne1 + (int64_t )r1*args.ne0 ;
47944794
47954795 for (int row = 0 ; row < N_R0_Q4_K && first_row + row < args.ne0 ; ++row) {
4796- all_sum = simd_sum (sumf[row]);
4796+ float sum_all = simd_sum (sumf[row]);
47974797 if (tiisg == 0 ) {
4798- dst_f32[first_row + row] = all_sum ;
4798+ dst_f32[first_row + row] = sum_all ;
47994799 }
48004800 }
48014801}
@@ -4981,7 +4981,6 @@ void kernel_mul_mv_q6_K_f32_impl(
49814981 // TODO: support nr0 > 1
49824982 static_assert (nr0 == 1 , " nr0 > 1 not supported" );
49834983 float sumf[1 ] = { 0 .f };
4984- float all_sum;
49854984
49864985 const short tid = tiisg/2 ;
49874986 const short ix = tiisg%2 ;
@@ -5020,9 +5019,9 @@ void kernel_mul_mv_q6_K_f32_impl(
50205019 device float * dst_f32 = (device float *) dst + (uint64_t )im*args.ne0 *args.ne1 + (uint64_t )r1*args.ne0 ;
50215020
50225021 for (int row = 0 ; row < nr0 && first_row + row < args.ne0 ; ++row) {
5023- all_sum = simd_sum (sumf[row]);
5022+ float sum_all = simd_sum (sumf[row]);
50245023 if (tiisg == 0 ) {
5025- dst_f32[first_row + row] = all_sum ;
5024+ dst_f32[first_row + row] = sum_all ;
50265025 }
50275026 }
50285027}
@@ -5070,7 +5069,7 @@ void kernel_mul_mv_iq2_xxs_f32_impl(
50705069 device const float * y = (device const float *) (src1 + offset1);
50715070
50725071 float yl[32 ];
5073- float sumf[nr0]={0 .f }, all_sum ;
5072+ float sumf[nr0]={0 .f };
50745073
50755074 const int nb32 = nb * (QK_K / 32 );
50765075
@@ -5130,9 +5129,9 @@ void kernel_mul_mv_iq2_xxs_f32_impl(
51305129 device float * dst_f32 = (device float *) dst + (uint64_t )im*args.ne0 *args.ne1 + (uint64_t )r1*args.ne0 ;
51315130
51325131 for (int row = 0 ; row < nr0 && first_row + row < args.ne0 ; ++row) {
5133- all_sum = simd_sum (sumf[row]);
5132+ float sum_all = simd_sum (sumf[row]);
51345133 if (tiisg == 0 ) {
5135- dst_f32[first_row + row] = all_sum * 0 .25f ;
5134+ dst_f32[first_row + row] = sum_all * 0 .25f ;
51365135 }
51375136 }
51385137}
@@ -5178,7 +5177,7 @@ void kernel_mul_mv_iq2_xs_f32_impl(
51785177 device const float * y = (device const float *) (src1 + offset1);
51795178
51805179 float yl[32 ];
5181- float sumf[nr0]={0 .f }, all_sum ;
5180+ float sumf[nr0]={0 .f };
51825181
51835182 const int nb32 = nb * (QK_K / 32 );
51845183
@@ -5248,9 +5247,9 @@ void kernel_mul_mv_iq2_xs_f32_impl(
52485247 device float * dst_f32 = (device float *) dst + (uint64_t )im*args.ne0 *args.ne1 + (uint64_t )r1*args.ne0 ;
52495248
52505249 for (int row = 0 ; row < nr0 && first_row + row < args.ne0 ; ++row) {
5251- all_sum = simd_sum (sumf[row]);
5250+ float sum_all = simd_sum (sumf[row]);
52525251 if (tiisg == 0 ) {
5253- dst_f32[first_row + row] = all_sum * 0 .25f ;
5252+ dst_f32[first_row + row] = sum_all * 0 .25f ;
52545253 }
52555254 }
52565255}
@@ -5297,7 +5296,7 @@ void kernel_mul_mv_iq3_xxs_f32_impl(
52975296 device const float * y = (device const float *) (src1 + offset1);
52985297
52995298 float yl[32 ];
5300- float sumf[nr0]={0 .f }, all_sum ;
5299+ float sumf[nr0]={0 .f };
53015300
53025301 const int nb32 = nb * (QK_K / 32 );
53035302
@@ -5358,9 +5357,9 @@ void kernel_mul_mv_iq3_xxs_f32_impl(
53585357 device float * dst_f32 = (device float *) dst + (uint64_t )im*args.ne0 *args.ne1 + (uint64_t )r1*args.ne0 ;
53595358
53605359 for (int row = 0 ; row < nr0 && first_row + row < args.ne0 ; ++row) {
5361- all_sum = simd_sum (sumf[row]);
5360+ float sum_all = simd_sum (sumf[row]);
53625361 if (tiisg == 0 ) {
5363- dst_f32[first_row + row] = all_sum * 0 .5f ;
5362+ dst_f32[first_row + row] = sum_all * 0 .5f ;
53645363 }
53655364 }
53665365}
@@ -5407,7 +5406,7 @@ void kernel_mul_mv_iq3_s_f32_impl(
54075406 device const float * y = (device const float *) (src1 + offset1);
54085407
54095408 float yl[32 ];
5410- float sumf[nr0]={0 .f }, all_sum ;
5409+ float sumf[nr0]={0 .f };
54115410
54125411 const int nb32 = nb * (QK_K / 32 );
54135412
@@ -5470,9 +5469,9 @@ void kernel_mul_mv_iq3_s_f32_impl(
54705469 device float * dst_f32 = (device float *) dst + (uint64_t )im*args.ne0 *args.ne1 + (uint64_t )r1*args.ne0 ;
54715470
54725471 for (int row = 0 ; row < nr0 && first_row + row < args.ne0 ; ++row) {
5473- all_sum = simd_sum (sumf[row]);
5472+ float sum_all = simd_sum (sumf[row]);
54745473 if (tiisg == 0 ) {
5475- dst_f32[first_row + row] = all_sum ;
5474+ dst_f32[first_row + row] = sum_all ;
54765475 }
54775476 }
54785477}
@@ -5519,7 +5518,7 @@ void kernel_mul_mv_iq2_s_f32_impl(
55195518 device const float * y = (device const float *) (src1 + offset1);
55205519
55215520 float yl[32 ];
5522- float sumf[nr0]={0 .f }, all_sum ;
5521+ float sumf[nr0]={0 .f };
55235522
55245523 const int nb32 = nb * (QK_K / 32 );
55255524
@@ -5583,9 +5582,9 @@ void kernel_mul_mv_iq2_s_f32_impl(
55835582 device float * dst_f32 = (device float *) dst + (uint64_t )im*args.ne0 *args.ne1 + (uint64_t )r1*args.ne0 ;
55845583
55855584 for (int row = 0 ; row < nr0 && first_row + row < args.ne0 ; ++row) {
5586- all_sum = simd_sum (sumf[row]);
5585+ float sum_all = simd_sum (sumf[row]);
55875586 if (tiisg == 0 ) {
5588- dst_f32[first_row + row] = all_sum * 0 .25f ;
5587+ dst_f32[first_row + row] = sum_all * 0 .25f ;
55895588 }
55905589 }
55915590}
@@ -5632,7 +5631,7 @@ void kernel_mul_mv_iq1_s_f32_impl(
56325631 device const float * y = (device const float *) (src1 + offset1);
56335632
56345633 float yl[32 ];
5635- float sumf[nr0]={0 .f }, all_sum ;
5634+ float sumf[nr0]={0 .f };
56365635
56375636 const int nb32 = nb * (QK_K / 32 );
56385637
@@ -5683,9 +5682,9 @@ void kernel_mul_mv_iq1_s_f32_impl(
56835682 device float * dst_f32 = (device float *) dst + (uint64_t )im*args.ne0 *args.ne1 + (uint64_t )r1*args.ne0 ;
56845683
56855684 for (int row = 0 ; row < nr0 && first_row + row < args.ne0 ; ++row) {
5686- all_sum = simd_sum (sumf[row]);
5685+ float sum_all = simd_sum (sumf[row]);
56875686 if (tiisg == 0 ) {
5688- dst_f32[first_row + row] = all_sum ;
5687+ dst_f32[first_row + row] = sum_all ;
56895688 }
56905689 }
56915690}
@@ -5732,7 +5731,7 @@ void kernel_mul_mv_iq1_m_f32_impl(
57325731 device const float * y = (device const float *) (src1 + offset1);
57335732
57345733 float yl[32 ];
5735- float sumf[nr0]={0 .f }, all_sum ;
5734+ float sumf[nr0]={0 .f };
57365735
57375736 const int nb32 = nb * (QK_K / 32 );
57385737
@@ -5792,9 +5791,9 @@ void kernel_mul_mv_iq1_m_f32_impl(
57925791 device float * dst_f32 = (device float *) dst + (uint64_t )im*args.ne0 *args.ne1 + (uint64_t )r1*args.ne0 ;
57935792
57945793 for (int row = 0 ; row < nr0 && first_row + row < args.ne0 ; ++row) {
5795- all_sum = simd_sum (sumf[row]);
5794+ float sum_all = simd_sum (sumf[row]);
57965795 if (tiisg == 0 ) {
5797- dst_f32[first_row + row] = all_sum ;
5796+ dst_f32[first_row + row] = sum_all ;
57985797 }
57995798 }
58005799}
@@ -5848,7 +5847,7 @@ void kernel_mul_mv_iq4_nl_f32_impl(
58485847 threadgroup_barrier (mem_flags::mem_threadgroup);
58495848
58505849 float4 yl[4 ];
5851- float sumf[nr0]={0 .f }, all_sum ;
5850+ float sumf[nr0]={0 .f };
58525851
58535852 device const float * yb = y + ix * QK4_NL + it * 8 ;
58545853
@@ -5897,9 +5896,9 @@ void kernel_mul_mv_iq4_nl_f32_impl(
58975896 device float * dst_f32 = (device float *) dst + (uint64_t )im*args.ne0 *args.ne1 + (uint64_t )r1*args.ne0 ;
58985897
58995898 for (int row = 0 ; row < nr0 && first_row + row < args.ne0 ; ++row) {
5900- all_sum = simd_sum (sumf[row]);
5899+ float sum_all = simd_sum (sumf[row]);
59015900 if (tiisg == 0 ) {
5902- dst_f32[first_row + row] = all_sum ;
5901+ dst_f32[first_row + row] = sum_all ;
59035902 }
59045903 }
59055904}
@@ -5954,7 +5953,7 @@ void kernel_mul_mv_iq4_xs_f32_impl(
59545953 threadgroup_barrier (mem_flags::mem_threadgroup);
59555954
59565955 float4 yl[4 ];
5957- float sumf[nr0]={0 .f }, all_sum ;
5956+ float sumf[nr0]={0 .f };
59585957
59595958 device const float * yb = y + ix * QK_K + ib * 32 + il * 8 ;
59605959
@@ -6000,9 +5999,9 @@ void kernel_mul_mv_iq4_xs_f32_impl(
60005999 device float * dst_f32 = (device float *) dst + (uint64_t )im*args.ne0 *args.ne1 + (uint64_t )r1*args.ne0 ;
60016000
60026001 for (int row = 0 ; row < nr0 && first_row + row < args.ne0 ; ++row) {
6003- all_sum = simd_sum (sumf[row]);
6002+ float sum_all = simd_sum (sumf[row]);
60046003 if (tiisg == 0 ) {
6005- dst_f32[first_row + row] = all_sum ;
6004+ dst_f32[first_row + row] = sum_all ;
60066005 }
60076006 }
60086007}
0 commit comments