Skip to content

Commit 8d99dba

Browse files
authored
Merge pull request #1949 from martin-frbg/issue1947
Query AVX2 and AVX512VL support when selecting x86 kernels
2 parents cf5d48e + 1650311 commit 8d99dba

File tree

4 files changed

+186
-98
lines changed

4 files changed

+186
-98
lines changed

common_x86_64.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ static __inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
134134
"=b" (*ebx),
135135
"=c" (*ecx),
136136
"=d" (*edx)
137-
: "0" (op));
137+
: "0" (op), "c"(0));
138138
#endif
139139
}
140140

cpuid.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@
139139
#define HAVE_FMA4 (1 << 19)
140140
#define HAVE_FMA3 (1 << 20)
141141
#define HAVE_AVX512VL (1 << 21)
142+
#define HAVE_AVX2 (1 << 22)
142143

143144
#define CACHE_INFO_L1_I 1
144145
#define CACHE_INFO_L1_D 2

cpuid_x86.c

Lines changed: 76 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -97,10 +97,10 @@ static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
9797
("mov %%ebx, %%edi;"
9898
"cpuid;"
9999
"xchgl %%ebx, %%edi;"
100-
: "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc");
100+
: "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op), "c" (0) : "cc");
101101
#else
102102
__asm__ __volatile__
103-
("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc");
103+
("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) , "c" (0) : "cc");
104104
#endif
105105
}
106106

@@ -211,6 +211,44 @@ int support_avx(){
211211
#endif
212212
}
213213

214+
int support_avx2(){
215+
#ifndef NO_AVX2
216+
int eax, ebx, ecx=0, edx;
217+
int ret=0;
218+
219+
if (!support_avx)
220+
return 0;
221+
cpuid(7, &eax, &ebx, &ecx, &edx);
222+
if((ebx & (1<<7)) != 0)
223+
ret=1; //OS supports AVX2
224+
return ret;
225+
#else
226+
return 0;
227+
#endif
228+
}
229+
230+
int support_avx512(){
231+
#ifndef NO_AVX512
232+
int eax, ebx, ecx, edx;
233+
int ret=0;
234+
235+
if (!support_avx)
236+
return 0;
237+
cpuid(7, &eax, &ebx, &ecx, &edx);
238+
if((ebx & 32) != 32){
239+
ret=0; //OS does not even support AVX2
240+
}
241+
if((ebx & (1<<31)) != 0){
242+
xgetbv(0, &eax, &edx);
243+
if((eax & 0xe0) == 0xe0)
244+
ret=1; //OS supports AVX512VL
245+
}
246+
return ret;
247+
#else
248+
return 0;
249+
#endif
250+
}
251+
214252

215253
int get_vendor(void){
216254
int eax, ebx, ecx, edx;
@@ -294,6 +332,8 @@ int get_cputype(int gettype){
294332
if ((ecx & (1 << 20)) != 0) feature |= HAVE_SSE4_2;
295333
#ifndef NO_AVX
296334
if (support_avx()) feature |= HAVE_AVX;
335+
if (support_avx2()) feature |= HAVE_AVX2;
336+
if (support_avx512()) feature |= HAVE_AVX512VL;
297337
if ((ecx & (1 << 12)) != 0) feature |= HAVE_FMA3;
298338
#endif
299339

@@ -1228,22 +1268,18 @@ int get_cpuname(void){
12281268
return CPUTYPE_NEHALEM;
12291269
case 12:
12301270
case 15:
1231-
if(support_avx())
1232-
#ifndef NO_AVX2
1271+
if(support_avx2())
12331272
return CPUTYPE_HASWELL;
1234-
#else
1273+
if(support_avx())
12351274
return CPUTYPE_SANDYBRIDGE;
1236-
#endif
12371275
else
12381276
return CPUTYPE_NEHALEM;
12391277
case 13:
12401278
//Broadwell
1241-
if(support_avx())
1242-
#ifndef NO_AVX2
1279+
if(support_avx2())
12431280
return CPUTYPE_HASWELL;
1244-
#else
1281+
if(support_avx())
12451282
return CPUTYPE_SANDYBRIDGE;
1246-
#endif
12471283
else
12481284
return CPUTYPE_NEHALEM;
12491285
}
@@ -1252,33 +1288,27 @@ int get_cpuname(void){
12521288
switch (model) {
12531289
case 5:
12541290
case 6:
1255-
if(support_avx())
1256-
#ifndef NO_AVX2
1291+
if(support_avx2())
12571292
return CPUTYPE_HASWELL;
1258-
#else
1293+
if(support_avx())
12591294
return CPUTYPE_SANDYBRIDGE;
1260-
#endif
12611295
else
12621296
return CPUTYPE_NEHALEM;
12631297
case 7:
12641298
case 15:
12651299
//Broadwell
1266-
if(support_avx())
1267-
#ifndef NO_AVX2
1300+
if(support_avx2())
12681301
return CPUTYPE_HASWELL;
1269-
#else
1302+
if(support_avx())
12701303
return CPUTYPE_SANDYBRIDGE;
1271-
#endif
12721304
else
12731305
return CPUTYPE_NEHALEM;
12741306
case 14:
12751307
//Skylake
1276-
if(support_avx())
1277-
#ifndef NO_AVX2
1308+
if(support_avx2())
12781309
return CPUTYPE_HASWELL;
1279-
#else
1310+
if(support_avx())
12801311
return CPUTYPE_SANDYBRIDGE;
1281-
#endif
12821312
else
12831313
return CPUTYPE_NEHALEM;
12841314
case 12:
@@ -1292,46 +1322,36 @@ int get_cpuname(void){
12921322
switch (model) {
12931323
case 6:
12941324
//Broadwell
1295-
if(support_avx())
1296-
#ifndef NO_AVX2
1325+
if(support_avx2())
12971326
return CPUTYPE_HASWELL;
1298-
#else
1327+
if(support_avx())
12991328
return CPUTYPE_SANDYBRIDGE;
1300-
#endif
13011329
else
13021330
return CPUTYPE_NEHALEM;
13031331
case 5:
13041332
// Skylake X
1305-
#ifndef NO_AVX512
1306-
return CPUTYPE_SKYLAKEX;
1307-
#else
1308-
if(support_avx())
1309-
#ifndef NO_AVX2
1310-
return CPUTYPE_HASWELL;
1311-
#else
1312-
return CPUTYPE_SANDYBRIDGE;
1313-
#endif
1333+
if(support_avx512())
1334+
return CPUTYPE_SKYLAKEX;
1335+
if(support_avx2())
1336+
return CPUTYPE_HASWELL;
1337+
if(support_avx())
1338+
return CPUTYPE_SANDYBRIDGE;
13141339
else
13151340
return CPUTYPE_NEHALEM;
1316-
#endif
13171341
case 14:
13181342
// Skylake
1319-
if(support_avx())
1320-
#ifndef NO_AVX2
1343+
if(support_avx2())
13211344
return CPUTYPE_HASWELL;
1322-
#else
1345+
if(support_avx())
13231346
return CPUTYPE_SANDYBRIDGE;
1324-
#endif
13251347
else
13261348
return CPUTYPE_NEHALEM;
13271349
case 7:
13281350
// Xeon Phi Knights Landing
1329-
if(support_avx())
1330-
#ifndef NO_AVX2
1351+
if(support_avx2())
13311352
return CPUTYPE_HASWELL;
1332-
#else
1353+
if(support_avx())
13331354
return CPUTYPE_SANDYBRIDGE;
1334-
#endif
13351355
else
13361356
return CPUTYPE_NEHALEM;
13371357
case 12:
@@ -1342,30 +1362,24 @@ int get_cpuname(void){
13421362
case 6:
13431363
switch (model) {
13441364
case 6: // Cannon Lake
1345-
#ifndef NO_AVX512
1346-
return CPUTYPE_SKYLAKEX;
1347-
#else
1348-
if(support_avx())
1349-
#ifndef NO_AVX2
1350-
return CPUTYPE_HASWELL;
1351-
#else
1352-
return CPUTYPE_SANDYBRIDGE;
1353-
#endif
1365+
if(support_avx512())
1366+
return CPUTYPE_SKYLAKEX;
1367+
if(support_avx2())
1368+
return CPUTYPE_HASWELL;
1369+
if(support_avx())
1370+
return CPUTYPE_SANDYBRIDGE;
13541371
else
13551372
return CPUTYPE_NEHALEM;
1356-
#endif
13571373
}
13581374
break;
13591375
case 9:
13601376
case 8:
13611377
switch (model) {
13621378
case 14: // Kaby Lake
1363-
if(support_avx())
1364-
#ifndef NO_AVX2
1379+
if(support_avx2())
13651380
return CPUTYPE_HASWELL;
1366-
#else
1381+
if(support_avx())
13671382
return CPUTYPE_SANDYBRIDGE;
1368-
#endif
13691383
else
13701384
return CPUTYPE_NEHALEM;
13711385
}
@@ -2112,6 +2126,8 @@ void get_cpuconfig(void){
21122126
if (features & HAVE_SSE4A) printf("#define HAVE_SSE4A\n");
21132127
if (features & HAVE_SSE5 ) printf("#define HAVE_SSSE5\n");
21142128
if (features & HAVE_AVX ) printf("#define HAVE_AVX\n");
2129+
if (features & HAVE_AVX2 ) printf("#define HAVE_AVX2\n");
2130+
if (features & HAVE_AVX512VL ) printf("#define HAVE_AVX512VL\n");
21152131
if (features & HAVE_3DNOWEX) printf("#define HAVE_3DNOWEX\n");
21162132
if (features & HAVE_3DNOW) printf("#define HAVE_3DNOW\n");
21172133
if (features & HAVE_FMA4 ) printf("#define HAVE_FMA4\n");
@@ -2180,6 +2196,8 @@ void get_sse(void){
21802196
if (features & HAVE_SSE4A) printf("HAVE_SSE4A=1\n");
21812197
if (features & HAVE_SSE5 ) printf("HAVE_SSSE5=1\n");
21822198
if (features & HAVE_AVX ) printf("HAVE_AVX=1\n");
2199+
if (features & HAVE_AVX2 ) printf("HAVE_AVX2=1\n");
2200+
if (features & HAVE_AVX512VL ) printf("HAVE_AVX512VL=1\n");
21832201
if (features & HAVE_3DNOWEX) printf("HAVE_3DNOWEX=1\n");
21842202
if (features & HAVE_3DNOW) printf("HAVE_3DNOW=1\n");
21852203
if (features & HAVE_FMA4 ) printf("HAVE_FMA4=1\n");

0 commit comments

Comments
 (0)