Skip to content

Commit 8dfda02

Browse files
authored
Merge pull request #68 from xianyi/develop
rebase
2 parents 4ca8bec + 28d69e0 commit 8dfda02

13 files changed

+1227
-124
lines changed

cpuid_x86.c

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1406,6 +1406,16 @@ int get_cpuname(void){
14061406
return CPUTYPE_SANDYBRIDGE;
14071407
else
14081408
return CPUTYPE_NEHALEM;
1409+
}
1410+
case 10: //family 6 exmodel 10
1411+
switch (model) {
1412+
case 6: // Comet Lake U
1413+
if(support_avx2())
1414+
return CPUTYPE_HASWELL;
1415+
if(support_avx())
1416+
return CPUTYPE_SANDYBRIDGE;
1417+
else
1418+
return CPUTYPE_NEHALEM;
14091419
}
14101420
break;
14111421
}
@@ -1955,6 +1965,19 @@ int get_coretype(void){
19551965
return CORE_NEHALEM;
19561966
}
19571967
break;
1968+
case 10:
1969+
switch (model) {
1970+
case 6:
1971+
// Comet Lake U
1972+
if(support_avx())
1973+
#ifndef NO_AVX2
1974+
return CORE_HASWELL;
1975+
#else
1976+
return CORE_SANDYBRIDGE;
1977+
#endif
1978+
else
1979+
return CORE_NEHALEM;
1980+
}
19581981
case 5:
19591982
switch (model) {
19601983
case 6:

driver/others/dynamic.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -618,6 +618,18 @@ static gotoblas_t *get_coretype(void){
618618
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
619619
}
620620
}
621+
case 10:
622+
if (model == 6) {
623+
if(support_avx2())
624+
return &gotoblas_HASWELL;
625+
if(support_avx()) {
626+
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
627+
return &gotoblas_SANDYBRIDGE;
628+
} else {
629+
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
630+
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
631+
}
632+
}
621633
return NULL;
622634
}
623635
case 0xf:

kernel/Makefile.L3

Lines changed: 47 additions & 47 deletions
Large diffs are not rendered by default.

kernel/generic/gemm_ncopy_16.c

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -39,24 +39,24 @@
3939
#include <stdio.h>
4040
#include "common.h"
4141

42-
int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
42+
int CNAME(BLASLONG m, BLASLONG n, IFLOAT *a, BLASLONG lda, IFLOAT *b){
4343
BLASLONG i, j;
4444

45-
FLOAT *aoffset;
46-
FLOAT *aoffset1, *aoffset2, *aoffset3, *aoffset4;
47-
FLOAT *aoffset5, *aoffset6, *aoffset7, *aoffset8;
48-
FLOAT *aoffset9, *aoffset10, *aoffset11, *aoffset12;
49-
FLOAT *aoffset13, *aoffset14, *aoffset15, *aoffset16;
50-
51-
FLOAT *boffset;
52-
FLOAT ctemp01, ctemp02, ctemp03, ctemp04;
53-
FLOAT ctemp05, ctemp06, ctemp07, ctemp08;
54-
FLOAT ctemp09, ctemp10, ctemp11, ctemp12;
55-
FLOAT ctemp13, ctemp14, ctemp15, ctemp16;
56-
FLOAT ctemp17, ctemp18, ctemp19, ctemp20;
57-
FLOAT ctemp21, ctemp22, ctemp23, ctemp24;
58-
FLOAT ctemp25, ctemp26, ctemp27, ctemp28;
59-
FLOAT ctemp29, ctemp30, ctemp31, ctemp32;
45+
IFLOAT *aoffset;
46+
IFLOAT *aoffset1, *aoffset2, *aoffset3, *aoffset4;
47+
IFLOAT *aoffset5, *aoffset6, *aoffset7, *aoffset8;
48+
IFLOAT *aoffset9, *aoffset10, *aoffset11, *aoffset12;
49+
IFLOAT *aoffset13, *aoffset14, *aoffset15, *aoffset16;
50+
51+
IFLOAT *boffset;
52+
IFLOAT ctemp01, ctemp02, ctemp03, ctemp04;
53+
IFLOAT ctemp05, ctemp06, ctemp07, ctemp08;
54+
IFLOAT ctemp09, ctemp10, ctemp11, ctemp12;
55+
IFLOAT ctemp13, ctemp14, ctemp15, ctemp16;
56+
IFLOAT ctemp17, ctemp18, ctemp19, ctemp20;
57+
IFLOAT ctemp21, ctemp22, ctemp23, ctemp24;
58+
IFLOAT ctemp25, ctemp26, ctemp27, ctemp28;
59+
IFLOAT ctemp29, ctemp30, ctemp31, ctemp32;
6060

6161
aoffset = a;
6262
boffset = b;

kernel/generic/gemm_ncopy_8.c

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -39,30 +39,30 @@
3939
#include <stdio.h>
4040
#include "common.h"
4141

42-
int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
42+
int CNAME(BLASLONG m, BLASLONG n, IFLOAT *a, BLASLONG lda, IFLOAT *b){
4343
BLASLONG i, j;
4444

45-
FLOAT *aoffset;
46-
FLOAT *aoffset1, *aoffset2, *aoffset3, *aoffset4;
47-
FLOAT *aoffset5, *aoffset6, *aoffset7, *aoffset8;
48-
49-
FLOAT *boffset;
50-
FLOAT ctemp01, ctemp02, ctemp03, ctemp04;
51-
FLOAT ctemp05, ctemp06, ctemp07, ctemp08;
52-
FLOAT ctemp09, ctemp10, ctemp11, ctemp12;
53-
FLOAT ctemp13, ctemp14, ctemp15, ctemp16;
54-
FLOAT ctemp17, ctemp18, ctemp19, ctemp20;
55-
FLOAT ctemp21, ctemp22, ctemp23, ctemp24;
56-
FLOAT ctemp25, ctemp26, ctemp27, ctemp28;
57-
FLOAT ctemp29, ctemp30, ctemp31, ctemp32;
58-
FLOAT ctemp33, ctemp34, ctemp35, ctemp36;
59-
FLOAT ctemp37, ctemp38, ctemp39, ctemp40;
60-
FLOAT ctemp41, ctemp42, ctemp43, ctemp44;
61-
FLOAT ctemp45, ctemp46, ctemp47, ctemp48;
62-
FLOAT ctemp49, ctemp50, ctemp51, ctemp52;
63-
FLOAT ctemp53, ctemp54, ctemp55, ctemp56;
64-
FLOAT ctemp57, ctemp58, ctemp59, ctemp60;
65-
FLOAT ctemp61, ctemp62, ctemp63, ctemp64;
45+
IFLOAT *aoffset;
46+
IFLOAT *aoffset1, *aoffset2, *aoffset3, *aoffset4;
47+
IFLOAT *aoffset5, *aoffset6, *aoffset7, *aoffset8;
48+
49+
IFLOAT *boffset;
50+
IFLOAT ctemp01, ctemp02, ctemp03, ctemp04;
51+
IFLOAT ctemp05, ctemp06, ctemp07, ctemp08;
52+
IFLOAT ctemp09, ctemp10, ctemp11, ctemp12;
53+
IFLOAT ctemp13, ctemp14, ctemp15, ctemp16;
54+
IFLOAT ctemp17, ctemp18, ctemp19, ctemp20;
55+
IFLOAT ctemp21, ctemp22, ctemp23, ctemp24;
56+
IFLOAT ctemp25, ctemp26, ctemp27, ctemp28;
57+
IFLOAT ctemp29, ctemp30, ctemp31, ctemp32;
58+
IFLOAT ctemp33, ctemp34, ctemp35, ctemp36;
59+
IFLOAT ctemp37, ctemp38, ctemp39, ctemp40;
60+
IFLOAT ctemp41, ctemp42, ctemp43, ctemp44;
61+
IFLOAT ctemp45, ctemp46, ctemp47, ctemp48;
62+
IFLOAT ctemp49, ctemp50, ctemp51, ctemp52;
63+
IFLOAT ctemp53, ctemp54, ctemp55, ctemp56;
64+
IFLOAT ctemp57, ctemp58, ctemp59, ctemp60;
65+
IFLOAT ctemp61, ctemp62, ctemp63, ctemp64;
6666

6767

6868
aoffset = a;

kernel/generic/gemm_tcopy_16.c

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -39,22 +39,22 @@
3939
#include <stdio.h>
4040
#include "common.h"
4141

42-
int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
42+
int CNAME(BLASLONG m, BLASLONG n, IFLOAT *a, BLASLONG lda, IFLOAT *b){
4343

4444
BLASLONG i, j;
4545

46-
FLOAT *aoffset;
47-
FLOAT *aoffset1, *aoffset2;
48-
FLOAT *boffset;
49-
50-
FLOAT ctemp01, ctemp02, ctemp03, ctemp04;
51-
FLOAT ctemp05, ctemp06, ctemp07, ctemp08;
52-
FLOAT ctemp09, ctemp10, ctemp11, ctemp12;
53-
FLOAT ctemp13, ctemp14, ctemp15, ctemp16;
54-
FLOAT ctemp17, ctemp18, ctemp19, ctemp20;
55-
FLOAT ctemp21, ctemp22, ctemp23, ctemp24;
56-
FLOAT ctemp25, ctemp26, ctemp27, ctemp28;
57-
FLOAT ctemp29, ctemp30, ctemp31, ctemp32;
46+
IFLOAT *aoffset;
47+
IFLOAT *aoffset1, *aoffset2;
48+
IFLOAT *boffset;
49+
50+
IFLOAT ctemp01, ctemp02, ctemp03, ctemp04;
51+
IFLOAT ctemp05, ctemp06, ctemp07, ctemp08;
52+
IFLOAT ctemp09, ctemp10, ctemp11, ctemp12;
53+
IFLOAT ctemp13, ctemp14, ctemp15, ctemp16;
54+
IFLOAT ctemp17, ctemp18, ctemp19, ctemp20;
55+
IFLOAT ctemp21, ctemp22, ctemp23, ctemp24;
56+
IFLOAT ctemp25, ctemp26, ctemp27, ctemp28;
57+
IFLOAT ctemp29, ctemp30, ctemp31, ctemp32;
5858

5959
aoffset = a;
6060
boffset = b;

kernel/generic/gemm_tcopy_8.c

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -39,32 +39,32 @@
3939
#include <stdio.h>
4040
#include "common.h"
4141

42-
int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
42+
int CNAME(BLASLONG m, BLASLONG n, IFLOAT *a, BLASLONG lda, IFLOAT *b){
4343

4444
BLASLONG i, j;
4545

46-
FLOAT *aoffset;
47-
FLOAT *aoffset1, *aoffset2, *aoffset3, *aoffset4;
48-
FLOAT *aoffset5, *aoffset6, *aoffset7, *aoffset8;
49-
50-
FLOAT *boffset, *boffset1, *boffset2, *boffset3, *boffset4;
51-
52-
FLOAT ctemp01, ctemp02, ctemp03, ctemp04;
53-
FLOAT ctemp05, ctemp06, ctemp07, ctemp08;
54-
FLOAT ctemp09, ctemp10, ctemp11, ctemp12;
55-
FLOAT ctemp13, ctemp14, ctemp15, ctemp16;
56-
FLOAT ctemp17, ctemp18, ctemp19, ctemp20;
57-
FLOAT ctemp21, ctemp22, ctemp23, ctemp24;
58-
FLOAT ctemp25, ctemp26, ctemp27, ctemp28;
59-
FLOAT ctemp29, ctemp30, ctemp31, ctemp32;
60-
FLOAT ctemp33, ctemp34, ctemp35, ctemp36;
61-
FLOAT ctemp37, ctemp38, ctemp39, ctemp40;
62-
FLOAT ctemp41, ctemp42, ctemp43, ctemp44;
63-
FLOAT ctemp45, ctemp46, ctemp47, ctemp48;
64-
FLOAT ctemp49, ctemp50, ctemp51, ctemp52;
65-
FLOAT ctemp53, ctemp54, ctemp55, ctemp56;
66-
FLOAT ctemp57, ctemp58, ctemp59, ctemp60;
67-
FLOAT ctemp61, ctemp62, ctemp63, ctemp64;
46+
IFLOAT *aoffset;
47+
IFLOAT *aoffset1, *aoffset2, *aoffset3, *aoffset4;
48+
IFLOAT *aoffset5, *aoffset6, *aoffset7, *aoffset8;
49+
50+
IFLOAT *boffset, *boffset1, *boffset2, *boffset3, *boffset4;
51+
52+
IFLOAT ctemp01, ctemp02, ctemp03, ctemp04;
53+
IFLOAT ctemp05, ctemp06, ctemp07, ctemp08;
54+
IFLOAT ctemp09, ctemp10, ctemp11, ctemp12;
55+
IFLOAT ctemp13, ctemp14, ctemp15, ctemp16;
56+
IFLOAT ctemp17, ctemp18, ctemp19, ctemp20;
57+
IFLOAT ctemp21, ctemp22, ctemp23, ctemp24;
58+
IFLOAT ctemp25, ctemp26, ctemp27, ctemp28;
59+
IFLOAT ctemp29, ctemp30, ctemp31, ctemp32;
60+
IFLOAT ctemp33, ctemp34, ctemp35, ctemp36;
61+
IFLOAT ctemp37, ctemp38, ctemp39, ctemp40;
62+
IFLOAT ctemp41, ctemp42, ctemp43, ctemp44;
63+
IFLOAT ctemp45, ctemp46, ctemp47, ctemp48;
64+
IFLOAT ctemp49, ctemp50, ctemp51, ctemp52;
65+
IFLOAT ctemp53, ctemp54, ctemp55, ctemp56;
66+
IFLOAT ctemp57, ctemp58, ctemp59, ctemp60;
67+
IFLOAT ctemp61, ctemp62, ctemp63, ctemp64;
6868

6969
aoffset = a;
7070
boffset = b;

kernel/power/KERNEL.POWER10

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,17 @@ else
77
#CGEMM_BETA = ../generic/zgemm_beta.c
88
#ZGEMM_BETA = ../generic/zgemm_beta.c
99

10+
SHGEMM_BETA = ../generic/gemm_beta.c
11+
SHGEMMKERNEL = shgemm_kernel_power10.c
12+
SHGEMMINCOPY = ../generic/gemm_ncopy_16.c
13+
SHGEMMITCOPY = ../generic/gemm_tcopy_16.c
14+
SHGEMMONCOPY = ../generic/gemm_ncopy_8.c
15+
SHGEMMOTCOPY = ../generic/gemm_tcopy_8.c
16+
SHGEMMINCOPYOBJ = shgemm_incopy$(TSUFFIX).$(SUFFIX)
17+
SHGEMMITCOPYOBJ = shgemm_itcopy$(TSUFFIX).$(SUFFIX)
18+
SHGEMMONCOPYOBJ = shgemm_oncopy$(TSUFFIX).$(SUFFIX)
19+
SHGEMMOTCOPYOBJ = shgemm_otcopy$(TSUFFIX).$(SUFFIX)
20+
1021
STRMMKERNEL = sgemm_kernel_power10.c
1122
DTRMMKERNEL = dgemm_kernel_power10.c
1223
CTRMMKERNEL = cgemm_kernel_power10.S

0 commit comments

Comments
 (0)