Skip to content

Commit 6426ad5

Browse files
committed
Initial CI/CD fixes
1 parent c2c53bc commit 6426ad5

File tree

2 files changed

+16
-20
lines changed

2 files changed

+16
-20
lines changed

ggml/src/ggml-cpu/arch/x86/repack.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3957,7 +3957,7 @@ void ggml_gemm_q2_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
39573957
const __m512i mins_23 = _mm512_cvtepu8_epi16(_mm256_and_si256(_mm256_srli_epi16(mins_and_scales_23, 4), m4b));
39583958
const __m512i mins_45 = _mm512_cvtepu8_epi16(_mm256_and_si256(_mm256_srli_epi16(mins_and_scales_45, 4), m4b));
39593959
const __m512i mins_67 = _mm512_cvtepu8_epi16(_mm256_and_si256(_mm256_srli_epi16(mins_and_scales_67, 4), m4b));
3960-
3960+
39613961
const __m512i scales_0 = _mm512_cvtepu8_epi16(_mm256_shuffle_epi8(scales_01,scalesmask1));
39623962
const __m512i scales_1 = _mm512_cvtepu8_epi16(_mm256_shuffle_epi8(scales_01,scalesmask2));
39633963
const __m512i scales_2 = _mm512_cvtepu8_epi16(_mm256_shuffle_epi8(scales_23,scalesmask1));
@@ -4102,7 +4102,7 @@ void ggml_gemm_q2_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
41024102
__m512i lhs_bsums_23_4567 = _mm512_inserti32x8(_mm512_castsi256_si512(lhs_bsums_ymm_23_4567), lhs_bsums_ymm_23_4567, 1);
41034103

41044104
// Shuffle pattern one - left side input
4105-
const __m512i lhs_mat_01_00_sp1 = _mm512_shuffle_epi32(lhs_mat_01_00, (_MM_PERM_ENUM)160); //A00(0-3) A00(0-3) A01(0-3) A01(0-3) A00(0-3) A00(0-3) A01(0-3) A01(0-3) A00(0-3) A00(0-3) A01(0-3) A01(0-3) A00(0-3) A00(0-3) A01(0-3) A01(0-3)
4105+
const __m512i lhs_mat_01_00_sp1 = _mm512_shuffle_epi32(lhs_mat_01_00, (_MM_PERM_ENUM)160); //A00(0-3) A00(0-3) A01(0-3) A01(0-3) A00(0-3) A00(0-3) A01(0-3) A01(0-3) A00(0-3) A00(0-3) A01(0-3) A01(0-3) A00(0-3) A00(0-3) A01(0-3) A01(0-3)
41064106
const __m512i lhs_mat_23_00_sp1 = _mm512_shuffle_epi32(lhs_mat_23_00, (_MM_PERM_ENUM)160); //A02(0-3) A02(0-3) A03(0-3) A03(0-3) A02(0-3) A02(0-3) A03(0-3) A03(0-3) A02(0-3) A02(0-3) A03(0-3) A03(0-3) A02(0-3) A02(0-3) A03(0-3) A03(0-3)
41074107

41084108
const __m512i lhs_mat_01_01_sp1 = _mm512_shuffle_epi32(lhs_mat_01_01, (_MM_PERM_ENUM)160); //A00(8-11) A00(8-11) A01(8-11) A01(8-11) A00(8-11) A00(8-11) A01(8-11) A01(8-11) A00(8-11) A00(8-11) A01(8-11) A01(8-11) A00(8-11) A00(8-11) A01(8-11) A01(8-11)
@@ -4831,7 +4831,7 @@ void ggml_gemm_q2_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
48314831
__m512i lhs_bsums_23_4567 = _mm512_inserti32x8(_mm512_castsi256_si512(lhs_bsums_ymm_23_4567), lhs_bsums_ymm_23_4567, 1);
48324832

48334833
// Shuffle pattern one - left side input
4834-
const __m512i lhs_mat_01_00_sp1 = _mm512_shuffle_epi32(lhs_mat_01_00, (_MM_PERM_ENUM)160); //A00(0-3) A00(0-3) A01(0-3) A01(0-3) A00(0-3) A00(0-3) A01(0-3) A01(0-3) A00(0-3) A00(0-3) A01(0-3) A01(0-3) A00(0-3) A00(0-3) A01(0-3) A01(0-3)
4834+
const __m512i lhs_mat_01_00_sp1 = _mm512_shuffle_epi32(lhs_mat_01_00, (_MM_PERM_ENUM)160); //A00(0-3) A00(0-3) A01(0-3) A01(0-3) A00(0-3) A00(0-3) A01(0-3) A01(0-3) A00(0-3) A00(0-3) A01(0-3) A01(0-3) A00(0-3) A00(0-3) A01(0-3) A01(0-3)
48354835
const __m512i lhs_mat_23_00_sp1 = _mm512_shuffle_epi32(lhs_mat_23_00, (_MM_PERM_ENUM)160); //A02(0-3) A02(0-3) A03(0-3) A03(0-3) A02(0-3) A02(0-3) A03(0-3) A03(0-3) A02(0-3) A02(0-3) A03(0-3) A03(0-3) A02(0-3) A02(0-3) A03(0-3) A03(0-3)
48364836

48374837
const __m512i lhs_mat_01_01_sp1 = _mm512_shuffle_epi32(lhs_mat_01_01, (_MM_PERM_ENUM)160); //A00(8-11) A00(8-11) A01(8-11) A01(8-11) A00(8-11) A00(8-11) A01(8-11) A01(8-11) A00(8-11) A00(8-11) A01(8-11) A01(8-11) A00(8-11) A00(8-11) A01(8-11) A01(8-11)
@@ -6522,7 +6522,7 @@ void ggml_gemm_q2_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
65226522
for (int i = 0; i < 4; i++) {
65236523
_mm256_storeu_ps((float * )(s + ((y * 4 + i) * bs + x * 8)), _mm256_sub_ps(acc_rows[i], acc_min_rows[i]));
65246524
}
6525-
}
6525+
}
65266526
}
65276527
#else
65286528

ggml/src/ggml-cpu/repack.cpp

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -446,10 +446,10 @@ void ggml_gemv_q2_K_8x8_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,
446446
}
447447
for (int l = 0; l < nb; l++) {
448448
for (int k = 0; k < (qk / (4 * blocklen)); k++) {
449-
uint8_t *scales_0 = (uint8_t*) b_ptr[l].scales + (k / 4) * 64 ;
450-
uint8_t *scales_1 = (uint8_t*) b_ptr[l].scales + (k / 4) * 64 + 16;
451-
uint8_t *scales_2 = (uint8_t*) b_ptr[l].scales + (k / 4) * 64 + 32;
452-
uint8_t *scales_3 = (uint8_t*) b_ptr[l].scales + (k / 4) * 64 + 48;
449+
const uint8_t *scales_0 = b_ptr[l].scales + (k / 4) * 64 ;
450+
const uint8_t *scales_1 = b_ptr[l].scales + (k / 4) * 64 + 16;
451+
const uint8_t *scales_2 = b_ptr[l].scales + (k / 4) * 64 + 32;
452+
const uint8_t *scales_3 = b_ptr[l].scales + (k / 4) * 64 + 48;
453453
for (int j = 0; j < ncols_interleaved; j++) {
454454
sumi1 = 0;
455455
sumi2 = 0;
@@ -471,13 +471,13 @@ void ggml_gemv_q2_K_8x8_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,
471471
sumi2 = sumi2 * (scales_1[offset] & 0xF);
472472
sumi3 = sumi3 * (scales_2[offset] & 0xF);
473473
sumi4 = sumi4 * (scales_3[offset] & 0xF);
474-
sumi += sumi1 + sumi2 + sumi3 + sumi4;
474+
sumi += sumi1 + sumi2 + sumi3 + sumi4;
475475
}
476-
sumf[j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * a_ptr[l].d;
476+
sumf[j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * a_ptr[l].d;
477477
}
478478
}
479479
for(int sb = 0; sb < 8; sb++) {
480-
uint8_t *mins = (uint8_t*) b_ptr[l].scales + sb * 16;
480+
const uint8_t *mins = b_ptr[l].scales + sb * 16;
481481
for(int j = 0; j < ncols_interleaved; j++){
482482
sum_minf[j] += ((mins[j * 2] >> 4) * a_ptr[l].bsums[sb * 2] + (mins[(j * 2)+ 1] >> 4) * a_ptr[l].bsums[sb * 2 + 1]) * GGML_FP16_TO_FP32(b_ptr[l].dmin[j]) * a_ptr[l].d;
483483
}
@@ -826,10 +826,10 @@ void ggml_gemm_q2_K_8x8_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,
826826
for (int l = 0; l < nb; l++) {
827827
for (int k = 0; k < (qk / (4 * blocklen)); k++) {
828828

829-
uint8_t *scales_0 = (uint8_t*) b_ptr[l].scales + (k / 4) * 64 ;
830-
uint8_t *scales_1 = (uint8_t*) b_ptr[l].scales + (k / 4) * 64 + 16;
831-
uint8_t *scales_2 = (uint8_t*) b_ptr[l].scales + (k / 4) * 64 + 32;
832-
uint8_t *scales_3 = (uint8_t*) b_ptr[l].scales + (k / 4) * 64 + 48;
829+
const uint8_t *scales_0 = b_ptr[l].scales + (k / 4) * 64 ;
830+
const uint8_t *scales_1 = b_ptr[l].scales + (k / 4) * 64 + 16;
831+
const uint8_t *scales_2 = b_ptr[l].scales + (k / 4) * 64 + 32;
832+
const uint8_t *scales_3 = b_ptr[l].scales + (k / 4) * 64 + 48;
833833
for (int m = 0; m < 4; m++) {
834834
for (int j = 0; j < ncols_interleaved; j++) {
835835
sumi1 = 0;
@@ -858,7 +858,7 @@ void ggml_gemm_q2_K_8x8_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,
858858
}
859859
}
860860
for(int sb = 0; sb < 8; sb++) {
861-
uint8_t *mins = (uint8_t*) b_ptr[l].scales + sb * 16;
861+
const uint8_t *mins = b_ptr[l].scales + sb * 16;
862862
for(int m = 0; m < 4; m++) {
863863
const int16_t *bsums = a_ptr[l].bsums + (sb * 8) + (m * 4) - ((sb % 2) * 6);
864864
for(int j = 0; j < ncols_interleaved; j++) {
@@ -1112,7 +1112,6 @@ static block_q2_Kx8 make_block_q2_Kx8(block_q2_K * in, unsigned int blck_size_in
11121112
// The output Q2_Kx8 structure has 128 bytes for storing scales and mins
11131113
// Every 16 byte is packed such that it contains scales and mins for corresponding sub blocks from Q2_K structure
11141114
// For eg - First 16 bytes contains 16 scales and 16 mins - each of first and second sub blocks from different Q2_K structures
1115-
uint8_t s[128], m[128];
11161115

11171116
for(int i = 0; i < 128; i++){
11181117

@@ -1121,9 +1120,6 @@ static block_q2_Kx8 make_block_q2_Kx8(block_q2_K * in, unsigned int blck_size_in
11211120
// Index for selecting scale
11221121
int src2 = ((i / 16) * 2) + (i % 2);
11231122

1124-
s[i] = in[src1].scales[src2] & 15;
1125-
m[i] = in[src1].scales[src2] & 240;
1126-
11271123
out.scales[i] = in[src1].scales[src2];
11281124
}
11291125
return out;

0 commit comments

Comments
 (0)