Skip to content

Commit b8f3605

Browse files
authored
Merge pull request #23 from xianyi/develop
rebase
2 parents fbb8949 + b36018b commit b8f3605

18 files changed

+566
-28
lines changed

CONTRIBUTORS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,3 +179,4 @@ In chronological order:
179179
* [2019-11-12] AVX512 CGEMM & ZGEMM kernels
180180
* [2019-12-23] optimize AVX2 CGEMM and ZGEMM
181181
* [2019-12-30] AVX2 CGEMM3M & ZGEMM3M kernels
182+
* [2020-01-07] optimize AVX2 SGEMM and STRMM

kernel/x86_64/KERNEL.HASWELL

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,11 @@ DAXPYKERNEL = daxpy.c
3131
CAXPYKERNEL = caxpy.c
3232
ZAXPYKERNEL = zaxpy.c
3333

34-
STRMMKERNEL = sgemm_kernel_16x4_haswell.S
35-
SGEMMKERNEL = sgemm_kernel_16x4_haswell.S
34+
STRMMKERNEL = sgemm_kernel_8x4_haswell.c
35+
SGEMMKERNEL = sgemm_kernel_8x4_haswell.c
3636
SGEMM_BETA = sgemm_beta_skylakex.c
37-
SGEMMINCOPY = ../generic/gemm_ncopy_16.c
38-
SGEMMITCOPY = ../generic/gemm_tcopy_16.c
37+
SGEMMINCOPY = ../generic/gemm_ncopy_8.c
38+
SGEMMITCOPY = ../generic/gemm_tcopy_8.c
3939
SGEMMONCOPY = sgemm_ncopy_4_skylakex.c
4040
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
4141
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)

kernel/x86_64/KERNEL.SKYLAKEX

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
include $(KERNELDIR)/KERNEL.HASWELL
22

33
SGEMMKERNEL = sgemm_kernel_16x4_skylakex_2.c
4-
4+
STRMMKERNEL = sgemm_kernel_16x4_haswell.S
55
SGEMMINCOPY = ../generic/gemm_ncopy_16.c
66
SGEMMITCOPY = sgemm_tcopy_16_skylakex.c
77
SGEMMONCOPY = sgemm_ncopy_4_skylakex.c

kernel/x86_64/KERNEL.ZEN

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,10 @@ DAXPYKERNEL = daxpy.c
3030
CAXPYKERNEL = caxpy.c
3131
ZAXPYKERNEL = zaxpy.c
3232

33-
STRMMKERNEL = sgemm_kernel_16x4_haswell.S
34-
SGEMMKERNEL = sgemm_kernel_16x4_haswell.S
35-
SGEMMINCOPY = ../generic/gemm_ncopy_16.c
36-
SGEMMITCOPY = ../generic/gemm_tcopy_16.c
33+
STRMMKERNEL = sgemm_kernel_8x4_haswell.c
34+
SGEMMKERNEL = sgemm_kernel_8x4_haswell.c
35+
SGEMMINCOPY = ../generic/gemm_ncopy_8.c
36+
SGEMMITCOPY = ../generic/gemm_tcopy_8.c
3737
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
3838
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
3939
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)

kernel/x86_64/sgemm_kernel_8x4_haswell.c

Lines changed: 490 additions & 0 deletions
Large diffs are not rendered by default.

lapack-netlib/LAPACKE/src/lapacke_cheev_work.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,11 @@ lapack_int LAPACKE_cheev_work( int matrix_layout, char jobz, char uplo,
7878
info = info - 1;
7979
}
8080
/* Transpose output matrices */
81-
LAPACKE_che_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda );
81+
if ( jobz == 'V') {
82+
LAPACKE_cge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda );
83+
} else {
84+
LAPACKE_che_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda );
85+
}
8286
/* Release memory and exit */
8387
LAPACKE_free( a_t );
8488
exit_level_0:

lapack-netlib/LAPACKE/src/lapacke_cheevd_2stage_work.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,11 @@ lapack_int LAPACKE_cheevd_2stage_work( int matrix_layout, char jobz, char uplo,
7979
info = info - 1;
8080
}
8181
/* Transpose output matrices */
82-
LAPACKE_che_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda );
82+
if ( jobz == 'V') {
83+
LAPACKE_cge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda );
84+
} else {
85+
LAPACKE_che_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda );
86+
}
8387
/* Release memory and exit */
8488
LAPACKE_free( a_t );
8589
exit_level_0:

lapack-netlib/LAPACKE/src/lapacke_cheevd_work.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,11 @@ lapack_int LAPACKE_cheevd_work( int matrix_layout, char jobz, char uplo,
7979
info = info - 1;
8080
}
8181
/* Transpose output matrices */
82-
LAPACKE_che_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda );
83-
82+
if ( jobz == 'V') {
83+
LAPACKE_cge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda );
84+
} else {
85+
LAPACKE_che_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda );
86+
}
8487
/* Release memory and exit */
8588
LAPACKE_free( a_t );
8689
exit_level_0:

lapack-netlib/LAPACKE/src/lapacke_dsyev_work.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,11 @@ lapack_int LAPACKE_dsyev_work( int matrix_layout, char jobz, char uplo,
7272
info = info - 1;
7373
}
7474
/* Transpose output matrices */
75-
LAPACKE_dsy_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda );
75+
if ( jobz == 'V') {
76+
LAPACKE_dge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda );
77+
} else {
78+
LAPACKE_dsy_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda );
79+
}
7680
/* Release memory and exit */
7781
LAPACKE_free( a_t );
7882
exit_level_0:

lapack-netlib/LAPACKE/src/lapacke_dsyevd_2stage_work.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,11 @@ lapack_int LAPACKE_dsyevd_2stage_work( int matrix_layout, char jobz, char uplo,
7676
info = info - 1;
7777
}
7878
/* Transpose output matrices */
79-
LAPACKE_dsy_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda );
79+
if ( jobz == 'V') {
80+
LAPACKE_dge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda );
81+
} else {
82+
LAPACKE_dsy_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda );
83+
}
8084
/* Release memory and exit */
8185
LAPACKE_free( a_t );
8286
exit_level_0:

0 commit comments

Comments
 (0)