Skip to content

Commit 69d2064

Browse files
committed
Make the skylakex/haswell sgemm code compile and run even with compilers without avx2 support
1 parent 3843e3e commit 69d2064

File tree

1 file changed

+5
-6
lines changed

1 file changed

+5
-6
lines changed

kernel/x86_64/sgemm_beta_skylakex.c

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -61,23 +61,21 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta,
6161
c_offset = c;
6262

6363
if (beta == ZERO){
64-
#ifdef __AVX512CD__
65-
__m512 z_zero = _mm512_setzero_ps();
66-
#endif
67-
__m256 y_zero = _mm256_setzero_ps();
6864

6965
j = n;
7066
do {
7167
c_offset1 = c_offset;
7268
c_offset += ldc;
7369

7470
i = m;
75-
71+
#ifdef __AVX2__
7672
while (i >= 32) {
7773
#ifdef __AVX512CD__
74+
__m512 z_zero = _mm512_setzero_ps();
7875
_mm512_storeu_ps(c_offset1, z_zero);
7976
_mm512_storeu_ps(c_offset1 + 16, z_zero);
8077
#else
78+
__m256 y_zero = _mm256_setzero_ps();
8179
_mm256_storeu_ps(c_offset1, y_zero);
8280
_mm256_storeu_ps(c_offset1 + 8, y_zero);
8381
_mm256_storeu_ps(c_offset1 + 16, y_zero);
@@ -87,11 +85,12 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta,
8785
i -= 32;
8886
}
8987
while (i >= 8) {
88+
__m256 y_zero = _mm256_setzero_ps();
9089
_mm256_storeu_ps(c_offset1, y_zero);
9190
c_offset1 += 8;
9291
i -= 8;
9392
}
94-
93+
#endif
9594
while (i > 0) {
9695
*c_offset1 = ZERO;
9796
c_offset1 ++;

0 commit comments

Comments
 (0)