@@ -46,7 +46,7 @@ using block_q8_0x4 = block<8, 4>;
4646using block_q8_0x8 = block<8 , 8 >;
4747
4848
49- struct block_q4_Kx8 {
49+ struct block_q4_Kx8 {
5050 ggml_half d[8 ]; // super-block scale for quantized scales
5151 ggml_half dmin[8 ]; // super-block scale for quantized mins
5252 uint8_t scales[96 ]; // scales and mins, quantized with 6 bits
@@ -55,7 +55,7 @@ struct block_q4_Kx8{
5555
5656static_assert (sizeof (block_q4_Kx8) == sizeof (ggml_half) * 16 + K_SCALE_SIZE * 8 + QK_K * 4 , " wrong q4_K block size/padding" );
5757
58- struct block_q8_Kx4 {
58+ struct block_q8_Kx4 {
5959 float d[4 ]; // delta
6060 int8_t qs[QK_K * 4 ]; // quants
6161 int16_t bsums[QK_K / 4 ]; // sum of quants in groups of 16
@@ -726,7 +726,7 @@ static void quantize_q8_K_4x8(const float * GGML_RESTRICT x, void * GGML_RESTRIC
726726 __m256i one = _mm256_set1_epi8 (1 );
727727 __m256i bsums_r1 = _mm256_maddubs_epi16 (one, sb_h1_interleaved);
728728
729- for (int l = 0 ; l < 3 ; l++) {
729+ for (int l = 0 ; l < 3 ; l++) {
730730 // Quants value shifted to process next two values from each sub block
731731 q0 = _mm256_srli_epi64 (q0, 16 );
732732 q2 = _mm256_srli_epi64 (q2, 16 );
@@ -753,7 +753,7 @@ static void quantize_q8_K_4x8(const float * GGML_RESTRICT x, void * GGML_RESTRIC
753753
754754 __m256i bsums_r2 = _mm256_maddubs_epi16 (one, sb_h2_interleaved);
755755
756- for (int l = 0 ; l < 3 ; l++) {
756+ for (int l = 0 ; l < 3 ; l++) {
757757 // Quants value shifted to process next two values from each sub block
758758 q1 = _mm256_srli_epi64 (q1, 16 );
759759 q3 = _mm256_srli_epi64 (q3, 16 );
@@ -802,7 +802,7 @@ static void quantize_q8_K_4x8(const float * GGML_RESTRICT x, void * GGML_RESTRIC
802802 y[i].d [row_iter] = amax ? 1 /iscale[row_iter] : 0 ;
803803 }
804804
805- for (int j = 0 ; j < QK_K / 4 ; j++) {
805+ for (int j = 0 ; j < QK_K / 4 ; j++) {
806806 y[i].bsums [j] = 0 ;
807807 }
808808
@@ -1526,7 +1526,7 @@ static void ggml_gemv_q4_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, c
15261526 sum_minf[j] = 0.0 ;
15271527 }
15281528 for (int l = 0 ; l < nb; l++) {
1529- for (int sb = 0 ; sb < 8 ; sb++) {
1529+ for (int sb = 0 ; sb < 8 ; sb++) {
15301530 memcpy (utmp + sb * 4 , b_ptr[l].scales + sb * 12 , 12 );
15311531 utmp[sb * 4 + 3 ] = ((utmp[sb * 4 + 2 ] >> 4 ) & kmask2) | (((utmp[sb * 4 + 1 ] >> 6 ) & kmask3) << 4 );
15321532 const uint32_t uaux_0 = utmp[sb * 4 + 1 ] & kmask1;
@@ -1553,14 +1553,14 @@ static void ggml_gemv_q4_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, c
15531553 sumf[j] += sumi * GGML_FP16_TO_FP32 (b_ptr[l].d [j]) * a_ptr[l].d ;
15541554 }
15551555 }
1556- for (int sb = 0 ; sb < 8 ; sb++) {
1556+ for (int sb = 0 ; sb < 8 ; sb++) {
15571557 uint8_t *mins = (uint8_t *) utmp + 8 + sb * 16 ;
1558- for (int j = 0 ; j < ncols_interleaved; j++) {
1558+ for (int j = 0 ; j < ncols_interleaved; j++) {
15591559 sum_minf[j] += mins[j] * (a_ptr[l].bsums [sb * 2 ] + a_ptr[l].bsums [sb * 2 + 1 ]) * GGML_FP16_TO_FP32 (b_ptr[l].dmin [j]) * a_ptr[l].d ;
15601560 }
15611561 }
15621562 }
1563- for (int j = 0 ; j < ncols_interleaved; j++) {
1563+ for (int j = 0 ; j < ncols_interleaved; j++) {
15641564 s[x * ncols_interleaved + j] = sumf[j] - sum_minf[j];
15651565 }
15661566 }
@@ -4780,7 +4780,7 @@ static void ggml_gemm_q4_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, c
47804780 }
47814781 }
47824782 for (int l = 0 ; l < nb; l++) {
4783- for (int sb = 0 ; sb < 8 ; sb++) {
4783+ for (int sb = 0 ; sb < 8 ; sb++) {
47844784 memcpy (utmp + sb * 4 , b_ptr[l].scales + sb * 12 , 12 );
47854785 utmp[sb * 4 + 3 ] = ((utmp[sb * 4 + 2 ] >> 4 ) & kmask2) | (((utmp[sb * 4 + 1 ] >> 6 ) & kmask3) << 4 );
47864786 const uint32_t uaux_0 = utmp[sb * 4 + 1 ] & kmask1;
@@ -4809,7 +4809,7 @@ static void ggml_gemm_q4_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, c
48094809 }
48104810 }
48114811 }
4812- for (int sb = 0 ; sb < 8 ; sb++) {
4812+ for (int sb = 0 ; sb < 8 ; sb++) {
48134813 uint8_t *mins = (uint8_t *) utmp + 8 + sb * 16 ;
48144814 for (int m = 0 ; m < 4 ; m++) {
48154815 const int16_t *bsums = a_ptr[l].bsums + (sb * 8 ) + (m * 4 ) - ((sb % 2 ) * 6 );
@@ -5040,8 +5040,8 @@ static block_q4_Kx8 make_block_q4_Kx8(block_q4_K * in, unsigned int blck_size_in
50405040 // For eg - First 12 bytes contains 8 scales and 8 mins - each of first sub block from different Q4_K structures
50415041 uint8_t s[8 ], m[8 ];
50425042
5043- for (int i = 0 ; i < 4 ; i++) {
5044- for (int j = 0 ; j < 8 ; j++) {
5043+ for (int i = 0 ; i < 4 ; i++) {
5044+ for (int j = 0 ; j < 8 ; j++) {
50455045 s[j] = in[j].scales [i] & 63 ;
50465046 m[j] = in[j].scales [i + 4 ] & 63 ;
50475047 }
@@ -5061,8 +5061,8 @@ static block_q4_Kx8 make_block_q4_Kx8(block_q4_K * in, unsigned int blck_size_in
50615061
50625062 }
50635063
5064- for (int i = 0 ; i < 4 ; i++) {
5065- for (int j = 0 ; j < 8 ; j++) {
5064+ for (int i = 0 ; i < 4 ; i++) {
5065+ for (int j = 0 ; j < 8 ; j++) {
50665066 s[j] = ((in[j].scales [i] & 192 ) >> 2 ) | (in[j].scales [i+8 ] & 15 );
50675067 m[j] = ((in[j].scales [i + 4 ] & 192 ) >> 2 ) | ((in[j].scales [i+8 ] & 240 ) >> 4 );
50685068 }
0 commit comments