Skip to content

Commit 55b244c

Browse files
committed
enable the SGEMM/SKX C based kernel
In QA the final bug was found so now the sklyakex sgemm C based kernel can be activated....
1 parent 2263d39 commit 55b244c

File tree

3 files changed

+10
-554
lines changed

3 files changed

+10
-554
lines changed

kernel/x86_64/KERNEL.SKYLAKEX

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
include $(KERNELDIR)/KERNEL.HASWELL
22

3-
SGEMMKERNEL = sgemm_kernel_16x4_skylakex.S
3+
SGEMMKERNEL = sgemm_kernel_16x4_skylakex.c
4+
5+
SGEMMINCOPY = ../generic/gemm_ncopy_16.c
6+
SGEMMITCOPY = sgemm_tcopy_16_skylakex.c
7+
SGEMMONCOPY = sgemm_ncopy_4_skylakex.c
8+
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
49

510
DGEMMKERNEL = dgemm_kernel_4x8_skylakex.c
611

@@ -9,5 +14,5 @@ DGEMMITCOPY = dgemm_tcopy_8_skylakex.c
914
DGEMMONCOPY = dgemm_ncopy_8_skylakex.c
1015
DGEMMOTCOPY = dgemm_tcopy_8_skylakex.c
1116

12-
SGEMM_BETA = ../generic/gemm_beta.c
17+
SGEMM_BETA = sgemm_beta_skylakex.c
1318
DGEMM_BETA = dgemm_beta_skylakex.c

kernel/x86_64/sgemm_beta_skylakex.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,10 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta,
6060

6161
if (beta == ZERO){
6262
__m512 z_zero;
63+
__m256 y_zero;
6364

6465
z_zero = _mm512_setzero_ps();
66+
y_zero = _mm256_setzero_ps();
6567
j = n;
6668
do {
6769
c_offset1 = c_offset;
@@ -71,14 +73,12 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta,
7173

7274
while (i > 32) {
7375
_mm512_storeu_ps(c_offset1, z_zero);
74-
_mm512_storeu_ps(c_offset1 + 8, z_zero);
7576
_mm512_storeu_ps(c_offset1 + 16, z_zero);
76-
_mm512_storeu_ps(c_offset1 + 24 , z_zero);
7777
c_offset1 += 32;
7878
i -= 32;
7979
}
8080
while (i > 8) {
81-
_mm512_storeu_ps(c_offset1, z_zero);
81+
_mm256_storeu_ps(c_offset1, y_zero);
8282
c_offset1 += 8;
8383
i -= 8;
8484
}

0 commit comments

Comments
 (0)