Skip to content

Commit 24a9ea8

Browse files
committed
metal : rename all_sum -> sum_all
ggml-ci
1 parent fcca45c commit 24a9ea8

File tree

1 file changed

+47
-48
lines changed

1 file changed

+47
-48
lines changed

ggml/src/ggml-metal/ggml-metal.metal

Lines changed: 47 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -2394,9 +2394,9 @@ void kernel_mul_mv_impl(
23942394
sumf += (T0) x[i] * (T1) y[i];
23952395
}
23962396

2397-
float all_sum = simd_sum(sumf);
2397+
float sum_all = simd_sum(sumf);
23982398
if (tiisg == 0) {
2399-
dst_f32[(uint64_t)r1*args.ne0 + r0] = all_sum;
2399+
dst_f32[(uint64_t)r1*args.ne0 + r0] = sum_all;
24002400
}
24012401
}
24022402
} else {
@@ -2417,10 +2417,10 @@ void kernel_mul_mv_impl(
24172417
sumf += dot((float4) x4[i], (float4) y4[i]);
24182418
}
24192419

2420-
float all_sum = simd_sum(sumf);
2420+
float sum_all = simd_sum(sumf);
24212421
if (tiisg == 0) {
2422-
for (int i = 4*(args.ne00/4); i < args.ne00; ++i) all_sum += (float) (x[i] * y[i]);
2423-
dst_f32[(uint64_t)r1*args.ne0 + r0] = all_sum;
2422+
for (int i = 4*(args.ne00/4); i < args.ne00; ++i) sum_all += (float) (x[i] * y[i]);
2423+
dst_f32[(uint64_t)r1*args.ne0 + r0] = sum_all;
24242424
}
24252425
}
24262426
}
@@ -2482,9 +2482,9 @@ kernel void kernel_mul_mv_1row(
24822482
for (int i = tiisg; i < args.ne00; i += 32) {
24832483
sumf += (float) x[i] * (float) y[i];
24842484
}
2485-
float all_sum = simd_sum(sumf);
2485+
float sum_all = simd_sum(sumf);
24862486
if (tiisg == 0) {
2487-
dst_f32[r0] = all_sum;
2487+
dst_f32[r0] = sum_all;
24882488
}
24892489
} else {
24902490
device const T4 * x4 = (device const T4 *) x;
@@ -2494,11 +2494,11 @@ kernel void kernel_mul_mv_1row(
24942494
sumf += dot((float4) x4[i], y4[i]);
24952495
}
24962496

2497-
float all_sum = simd_sum(sumf);
2497+
float sum_all = simd_sum(sumf);
24982498

24992499
if (tiisg == 0) {
2500-
for (int i = 4*(args.ne00/4); i < args.ne00; ++i) all_sum += (float) (x[i] * y[i]);
2501-
dst_f32[r0] = all_sum;
2500+
for (int i = 4*(args.ne00/4); i < args.ne00; ++i) sum_all += (float) (x[i] * y[i]);
2501+
dst_f32[r0] = sum_all;
25022502
}
25032503
}
25042504
}
@@ -2543,9 +2543,9 @@ kernel void kernel_mul_mv_l4(
25432543
sumf += dot((float4) x4[i], y4[i]);
25442544
}
25452545

2546-
float all_sum = simd_sum(sumf);
2546+
float sum_all = simd_sum(sumf);
25472547
if (tiisg == 0) {
2548-
dst_f32[(uint64_t)r1*args.ne0 + r0] = all_sum;
2548+
dst_f32[(uint64_t)r1*args.ne0 + r0] = sum_all;
25492549
}
25502550
}
25512551
}
@@ -4447,7 +4447,7 @@ void kernel_mul_mv_q2_K_f32_impl(
44474447
device const float * y = (device const float *) (src1 + offset1);
44484448

44494449
float yl[32];
4450-
float sumf[nr0]={0.f}, all_sum;
4450+
float sumf[nr0]={0.f};
44514451

44524452
const int ix = tiisg/8; // 0...3
44534453
const int it = tiisg%8; // 0...7
@@ -4503,9 +4503,9 @@ void kernel_mul_mv_q2_K_f32_impl(
45034503
device float * dst_f32 = (device float *) dst + (uint64_t)im*args.ne0*args.ne1 + (uint64_t)r1*args.ne0;
45044504

45054505
for (int row = 0; row < nr0 && first_row + row < args.ne0; ++row) {
4506-
all_sum = simd_sum(sumf[row]);
4506+
float sum_all = simd_sum(sumf[row]);
45074507
if (tiisg == 0) {
4508-
dst_f32[first_row + row] = all_sum;
4508+
dst_f32[first_row + row] = sum_all;
45094509
}
45104510
}
45114511
}
@@ -4727,7 +4727,7 @@ void kernel_mul_mv_q4_K_f32_impl(
47274727
float yl[16];
47284728
float yh[16];
47294729

4730-
float sumf[N_R0_Q4_K]={0.f}, all_sum;
4730+
float sumf[N_R0_Q4_K]={0.f};
47314731

47324732
device const float * y4 = y + ix * QK_K + 64 * iq + 8 * ir;
47334733

@@ -4793,9 +4793,9 @@ void kernel_mul_mv_q4_K_f32_impl(
47934793
device float * dst_f32 = (device float *) dst + (int64_t)im*args.ne0*args.ne1 + (int64_t)r1*args.ne0;
47944794

47954795
for (int row = 0; row < N_R0_Q4_K && first_row + row < args.ne0; ++row) {
4796-
all_sum = simd_sum(sumf[row]);
4796+
float sum_all = simd_sum(sumf[row]);
47974797
if (tiisg == 0) {
4798-
dst_f32[first_row + row] = all_sum;
4798+
dst_f32[first_row + row] = sum_all;
47994799
}
48004800
}
48014801
}
@@ -4981,7 +4981,6 @@ void kernel_mul_mv_q6_K_f32_impl(
49814981
// TODO: support nr0 > 1
49824982
static_assert(nr0 == 1, "nr0 > 1 not supported");
49834983
float sumf[1] = { 0.f };
4984-
float all_sum;
49854984

49864985
const short tid = tiisg/2;
49874986
const short ix = tiisg%2;
@@ -5020,9 +5019,9 @@ void kernel_mul_mv_q6_K_f32_impl(
50205019
device float * dst_f32 = (device float *) dst + (uint64_t)im*args.ne0*args.ne1 + (uint64_t)r1*args.ne0;
50215020

50225021
for (int row = 0; row < nr0 && first_row + row < args.ne0; ++row) {
5023-
all_sum = simd_sum(sumf[row]);
5022+
float sum_all = simd_sum(sumf[row]);
50245023
if (tiisg == 0) {
5025-
dst_f32[first_row + row] = all_sum;
5024+
dst_f32[first_row + row] = sum_all;
50265025
}
50275026
}
50285027
}
@@ -5070,7 +5069,7 @@ void kernel_mul_mv_iq2_xxs_f32_impl(
50705069
device const float * y = (device const float *) (src1 + offset1);
50715070

50725071
float yl[32];
5073-
float sumf[nr0]={0.f}, all_sum;
5072+
float sumf[nr0]={0.f};
50745073

50755074
const int nb32 = nb * (QK_K / 32);
50765075

@@ -5130,9 +5129,9 @@ void kernel_mul_mv_iq2_xxs_f32_impl(
51305129
device float * dst_f32 = (device float *) dst + (uint64_t)im*args.ne0*args.ne1 + (uint64_t)r1*args.ne0;
51315130

51325131
for (int row = 0; row < nr0 && first_row + row < args.ne0; ++row) {
5133-
all_sum = simd_sum(sumf[row]);
5132+
float sum_all = simd_sum(sumf[row]);
51345133
if (tiisg == 0) {
5135-
dst_f32[first_row + row] = all_sum * 0.25f;
5134+
dst_f32[first_row + row] = sum_all * 0.25f;
51365135
}
51375136
}
51385137
}
@@ -5178,7 +5177,7 @@ void kernel_mul_mv_iq2_xs_f32_impl(
51785177
device const float * y = (device const float *) (src1 + offset1);
51795178

51805179
float yl[32];
5181-
float sumf[nr0]={0.f}, all_sum;
5180+
float sumf[nr0]={0.f};
51825181

51835182
const int nb32 = nb * (QK_K / 32);
51845183

@@ -5248,9 +5247,9 @@ void kernel_mul_mv_iq2_xs_f32_impl(
52485247
device float * dst_f32 = (device float *) dst + (uint64_t)im*args.ne0*args.ne1 + (uint64_t)r1*args.ne0;
52495248

52505249
for (int row = 0; row < nr0 && first_row + row < args.ne0; ++row) {
5251-
all_sum = simd_sum(sumf[row]);
5250+
float sum_all = simd_sum(sumf[row]);
52525251
if (tiisg == 0) {
5253-
dst_f32[first_row + row] = all_sum * 0.25f;
5252+
dst_f32[first_row + row] = sum_all * 0.25f;
52545253
}
52555254
}
52565255
}
@@ -5297,7 +5296,7 @@ void kernel_mul_mv_iq3_xxs_f32_impl(
52975296
device const float * y = (device const float *) (src1 + offset1);
52985297

52995298
float yl[32];
5300-
float sumf[nr0]={0.f}, all_sum;
5299+
float sumf[nr0]={0.f};
53015300

53025301
const int nb32 = nb * (QK_K / 32);
53035302

@@ -5358,9 +5357,9 @@ void kernel_mul_mv_iq3_xxs_f32_impl(
53585357
device float * dst_f32 = (device float *) dst + (uint64_t)im*args.ne0*args.ne1 + (uint64_t)r1*args.ne0;
53595358

53605359
for (int row = 0; row < nr0 && first_row + row < args.ne0; ++row) {
5361-
all_sum = simd_sum(sumf[row]);
5360+
float sum_all = simd_sum(sumf[row]);
53625361
if (tiisg == 0) {
5363-
dst_f32[first_row + row] = all_sum * 0.5f;
5362+
dst_f32[first_row + row] = sum_all * 0.5f;
53645363
}
53655364
}
53665365
}
@@ -5407,7 +5406,7 @@ void kernel_mul_mv_iq3_s_f32_impl(
54075406
device const float * y = (device const float *) (src1 + offset1);
54085407

54095408
float yl[32];
5410-
float sumf[nr0]={0.f}, all_sum;
5409+
float sumf[nr0]={0.f};
54115410

54125411
const int nb32 = nb * (QK_K / 32);
54135412

@@ -5470,9 +5469,9 @@ void kernel_mul_mv_iq3_s_f32_impl(
54705469
device float * dst_f32 = (device float *) dst + (uint64_t)im*args.ne0*args.ne1 + (uint64_t)r1*args.ne0;
54715470

54725471
for (int row = 0; row < nr0 && first_row + row < args.ne0; ++row) {
5473-
all_sum = simd_sum(sumf[row]);
5472+
float sum_all = simd_sum(sumf[row]);
54745473
if (tiisg == 0) {
5475-
dst_f32[first_row + row] = all_sum;
5474+
dst_f32[first_row + row] = sum_all;
54765475
}
54775476
}
54785477
}
@@ -5519,7 +5518,7 @@ void kernel_mul_mv_iq2_s_f32_impl(
55195518
device const float * y = (device const float *) (src1 + offset1);
55205519

55215520
float yl[32];
5522-
float sumf[nr0]={0.f}, all_sum;
5521+
float sumf[nr0]={0.f};
55235522

55245523
const int nb32 = nb * (QK_K / 32);
55255524

@@ -5583,9 +5582,9 @@ void kernel_mul_mv_iq2_s_f32_impl(
55835582
device float * dst_f32 = (device float *) dst + (uint64_t)im*args.ne0*args.ne1 + (uint64_t)r1*args.ne0;
55845583

55855584
for (int row = 0; row < nr0 && first_row + row < args.ne0; ++row) {
5586-
all_sum = simd_sum(sumf[row]);
5585+
float sum_all = simd_sum(sumf[row]);
55875586
if (tiisg == 0) {
5588-
dst_f32[first_row + row] = all_sum * 0.25f;
5587+
dst_f32[first_row + row] = sum_all * 0.25f;
55895588
}
55905589
}
55915590
}
@@ -5632,7 +5631,7 @@ void kernel_mul_mv_iq1_s_f32_impl(
56325631
device const float * y = (device const float *) (src1 + offset1);
56335632

56345633
float yl[32];
5635-
float sumf[nr0]={0.f}, all_sum;
5634+
float sumf[nr0]={0.f};
56365635

56375636
const int nb32 = nb * (QK_K / 32);
56385637

@@ -5683,9 +5682,9 @@ void kernel_mul_mv_iq1_s_f32_impl(
56835682
device float * dst_f32 = (device float *) dst + (uint64_t)im*args.ne0*args.ne1 + (uint64_t)r1*args.ne0;
56845683

56855684
for (int row = 0; row < nr0 && first_row + row < args.ne0; ++row) {
5686-
all_sum = simd_sum(sumf[row]);
5685+
float sum_all = simd_sum(sumf[row]);
56875686
if (tiisg == 0) {
5688-
dst_f32[first_row + row] = all_sum;
5687+
dst_f32[first_row + row] = sum_all;
56895688
}
56905689
}
56915690
}
@@ -5732,7 +5731,7 @@ void kernel_mul_mv_iq1_m_f32_impl(
57325731
device const float * y = (device const float *) (src1 + offset1);
57335732

57345733
float yl[32];
5735-
float sumf[nr0]={0.f}, all_sum;
5734+
float sumf[nr0]={0.f};
57365735

57375736
const int nb32 = nb * (QK_K / 32);
57385737

@@ -5792,9 +5791,9 @@ void kernel_mul_mv_iq1_m_f32_impl(
57925791
device float * dst_f32 = (device float *) dst + (uint64_t)im*args.ne0*args.ne1 + (uint64_t)r1*args.ne0;
57935792

57945793
for (int row = 0; row < nr0 && first_row + row < args.ne0; ++row) {
5795-
all_sum = simd_sum(sumf[row]);
5794+
float sum_all = simd_sum(sumf[row]);
57965795
if (tiisg == 0) {
5797-
dst_f32[first_row + row] = all_sum;
5796+
dst_f32[first_row + row] = sum_all;
57985797
}
57995798
}
58005799
}
@@ -5848,7 +5847,7 @@ void kernel_mul_mv_iq4_nl_f32_impl(
58485847
threadgroup_barrier(mem_flags::mem_threadgroup);
58495848

58505849
float4 yl[4];
5851-
float sumf[nr0]={0.f}, all_sum;
5850+
float sumf[nr0]={0.f};
58525851

58535852
device const float * yb = y + ix * QK4_NL + it * 8;
58545853

@@ -5897,9 +5896,9 @@ void kernel_mul_mv_iq4_nl_f32_impl(
58975896
device float * dst_f32 = (device float *) dst + (uint64_t)im*args.ne0*args.ne1 + (uint64_t)r1*args.ne0;
58985897

58995898
for (int row = 0; row < nr0 && first_row + row < args.ne0; ++row) {
5900-
all_sum = simd_sum(sumf[row]);
5899+
float sum_all = simd_sum(sumf[row]);
59015900
if (tiisg == 0) {
5902-
dst_f32[first_row + row] = all_sum;
5901+
dst_f32[first_row + row] = sum_all;
59035902
}
59045903
}
59055904
}
@@ -5954,7 +5953,7 @@ void kernel_mul_mv_iq4_xs_f32_impl(
59545953
threadgroup_barrier(mem_flags::mem_threadgroup);
59555954

59565955
float4 yl[4];
5957-
float sumf[nr0]={0.f}, all_sum;
5956+
float sumf[nr0]={0.f};
59585957

59595958
device const float * yb = y + ix * QK_K + ib * 32 + il * 8;
59605959

@@ -6000,9 +5999,9 @@ void kernel_mul_mv_iq4_xs_f32_impl(
60005999
device float * dst_f32 = (device float *) dst + (uint64_t)im*args.ne0*args.ne1 + (uint64_t)r1*args.ne0;
60016000

60026001
for (int row = 0; row < nr0 && first_row + row < args.ne0; ++row) {
6003-
all_sum = simd_sum(sumf[row]);
6002+
float sum_all = simd_sum(sumf[row]);
60046003
if (tiisg == 0) {
6005-
dst_f32[first_row + row] = all_sum;
6004+
dst_f32[first_row + row] = sum_all;
60066005
}
60076006
}
60086007
}

0 commit comments

Comments
 (0)