Skip to content

Commit 0afaae4

Browse files
authored
Query AVX2 and AVX512VL capability in x86 cpu detection
1 parent ae1d1f7 commit 0afaae4

File tree

3 files changed

+76
-59
lines changed

3 files changed

+76
-59
lines changed

common_x86_64.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ static __inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
134134
"=b" (*ebx),
135135
"=c" (*ecx),
136136
"=d" (*edx)
137-
: "0" (op));
137+
: "0" (op), "c"(0));
138138
#endif
139139
}
140140

cpuid.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@
139139
#define HAVE_FMA4 (1 << 19)
140140
#define HAVE_FMA3 (1 << 20)
141141
#define HAVE_AVX512VL (1 << 21)
142+
#define HAVE_AVX2 (1 << 22)
142143

143144
#define CACHE_INFO_L1_I 1
144145
#define CACHE_INFO_L1_D 2

cpuid_x86.c

Lines changed: 74 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -97,10 +97,10 @@ static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
9797
("mov %%ebx, %%edi;"
9898
"cpuid;"
9999
"xchgl %%ebx, %%edi;"
100-
: "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc");
100+
: "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op), "c" (0) : "cc");
101101
#else
102102
__asm__ __volatile__
103-
("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc");
103+
("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) , "c" (0) : "cc");
104104
#endif
105105
}
106106

@@ -211,6 +211,42 @@ int support_avx(){
211211
#endif
212212
}
213213

214+
int support_avx2(){
215+
#ifndef NO_AVX2
216+
int eax, ebx, ecx=0, edx;
217+
int ret=0;
218+
219+
if (!support_avx)
220+
return 0;
221+
cpuid(7, &eax, &ebx, &ecx, &edx);
222+
if((ebx & (1<<7)) != 0)
223+
ret=1; //OS supports AVX2
224+
return ret;
225+
#else
226+
return 0;
227+
#endif
228+
}
229+
230+
int support_avx512(){
231+
#ifndef NO_AVX512
232+
int eax, ebx, ecx, edx;
233+
int ret=0;
234+
235+
if (!support_avx)
236+
return 0;
237+
cpuid(7, &eax, &ebx, &ecx, &edx);
238+
if((ebx & 32) != 32){
239+
ret=0; //OS does not even support AVX2
240+
}
241+
if((ebx & (1<<31)) != 0){
242+
ret=1; //OS supports AVX512VL
243+
}
244+
return ret;
245+
#else
246+
return 0;
247+
#endif
248+
}
249+
214250

215251
int get_vendor(void){
216252
int eax, ebx, ecx, edx;
@@ -294,6 +330,8 @@ int get_cputype(int gettype){
294330
if ((ecx & (1 << 20)) != 0) feature |= HAVE_SSE4_2;
295331
#ifndef NO_AVX
296332
if (support_avx()) feature |= HAVE_AVX;
333+
if (support_avx2()) feature |= HAVE_AVX2;
334+
if (support_avx512()) feature |= HAVE_AVX512VL;
297335
if ((ecx & (1 << 12)) != 0) feature |= HAVE_FMA3;
298336
#endif
299337

@@ -1228,22 +1266,18 @@ int get_cpuname(void){
12281266
return CPUTYPE_NEHALEM;
12291267
case 12:
12301268
case 15:
1231-
if(support_avx())
1232-
#ifndef NO_AVX2
1269+
if(support_avx2())
12331270
return CPUTYPE_HASWELL;
1234-
#else
1271+
if(support_avx())
12351272
return CPUTYPE_SANDYBRIDGE;
1236-
#endif
12371273
else
12381274
return CPUTYPE_NEHALEM;
12391275
case 13:
12401276
//Broadwell
1241-
if(support_avx())
1242-
#ifndef NO_AVX2
1277+
if(support_avx2())
12431278
return CPUTYPE_HASWELL;
1244-
#else
1279+
if(support_avx())
12451280
return CPUTYPE_SANDYBRIDGE;
1246-
#endif
12471281
else
12481282
return CPUTYPE_NEHALEM;
12491283
}
@@ -1252,33 +1286,27 @@ int get_cpuname(void){
12521286
switch (model) {
12531287
case 5:
12541288
case 6:
1255-
if(support_avx())
1256-
#ifndef NO_AVX2
1289+
if(support_avx2())
12571290
return CPUTYPE_HASWELL;
1258-
#else
1291+
if(support_avx())
12591292
return CPUTYPE_SANDYBRIDGE;
1260-
#endif
12611293
else
12621294
return CPUTYPE_NEHALEM;
12631295
case 7:
12641296
case 15:
12651297
//Broadwell
1266-
if(support_avx())
1267-
#ifndef NO_AVX2
1298+
if(support_avx2())
12681299
return CPUTYPE_HASWELL;
1269-
#else
1300+
if(support_avx())
12701301
return CPUTYPE_SANDYBRIDGE;
1271-
#endif
12721302
else
12731303
return CPUTYPE_NEHALEM;
12741304
case 14:
12751305
//Skylake
1276-
if(support_avx())
1277-
#ifndef NO_AVX2
1306+
if(support_avx2())
12781307
return CPUTYPE_HASWELL;
1279-
#else
1308+
if(support_avx())
12801309
return CPUTYPE_SANDYBRIDGE;
1281-
#endif
12821310
else
12831311
return CPUTYPE_NEHALEM;
12841312
case 12:
@@ -1292,46 +1320,36 @@ int get_cpuname(void){
12921320
switch (model) {
12931321
case 6:
12941322
//Broadwell
1295-
if(support_avx())
1296-
#ifndef NO_AVX2
1323+
if(support_avx2())
12971324
return CPUTYPE_HASWELL;
1298-
#else
1325+
if(support_avx())
12991326
return CPUTYPE_SANDYBRIDGE;
1300-
#endif
13011327
else
13021328
return CPUTYPE_NEHALEM;
13031329
case 5:
13041330
// Skylake X
1305-
#ifndef NO_AVX512
1306-
return CPUTYPE_SKYLAKEX;
1307-
#else
1308-
if(support_avx())
1309-
#ifndef NO_AVX2
1310-
return CPUTYPE_HASWELL;
1311-
#else
1312-
return CPUTYPE_SANDYBRIDGE;
1313-
#endif
1331+
if(support_avx512())
1332+
return CPUTYPE_SKYLAKEX;
1333+
if(support_avx2())
1334+
return CPUTYPE_HASWELL;
1335+
if(support_avx())
1336+
return CPUTYPE_SANDYBRIDGE;
13141337
else
13151338
return CPUTYPE_NEHALEM;
1316-
#endif
13171339
case 14:
13181340
// Skylake
1319-
if(support_avx())
1320-
#ifndef NO_AVX2
1341+
if(support_avx2())
13211342
return CPUTYPE_HASWELL;
1322-
#else
1343+
if(support_avx())
13231344
return CPUTYPE_SANDYBRIDGE;
1324-
#endif
13251345
else
13261346
return CPUTYPE_NEHALEM;
13271347
case 7:
13281348
// Xeon Phi Knights Landing
1329-
if(support_avx())
1330-
#ifndef NO_AVX2
1349+
if(support_avx2())
13311350
return CPUTYPE_HASWELL;
1332-
#else
1351+
if(support_avx())
13331352
return CPUTYPE_SANDYBRIDGE;
1334-
#endif
13351353
else
13361354
return CPUTYPE_NEHALEM;
13371355
case 12:
@@ -1342,30 +1360,24 @@ int get_cpuname(void){
13421360
case 6:
13431361
switch (model) {
13441362
case 6: // Cannon Lake
1345-
#ifndef NO_AVX512
1346-
return CPUTYPE_SKYLAKEX;
1347-
#else
1348-
if(support_avx())
1349-
#ifndef NO_AVX2
1350-
return CPUTYPE_HASWELL;
1351-
#else
1352-
return CPUTYPE_SANDYBRIDGE;
1353-
#endif
1363+
if(support_avx512())
1364+
return CPUTYPE_SKYLAKEX;
1365+
if(support_avx2())
1366+
return CPUTYPE_HASWELL;
1367+
if(support_avx())
1368+
return CPUTYPE_SANDYBRIDGE;
13541369
else
13551370
return CPUTYPE_NEHALEM;
1356-
#endif
13571371
}
13581372
break;
13591373
case 9:
13601374
case 8:
13611375
switch (model) {
13621376
case 14: // Kaby Lake
1363-
if(support_avx())
1364-
#ifndef NO_AVX2
1377+
if(support_avx2())
13651378
return CPUTYPE_HASWELL;
1366-
#else
1379+
if(support_avx())
13671380
return CPUTYPE_SANDYBRIDGE;
1368-
#endif
13691381
else
13701382
return CPUTYPE_NEHALEM;
13711383
}
@@ -2112,6 +2124,8 @@ void get_cpuconfig(void){
21122124
if (features & HAVE_SSE4A) printf("#define HAVE_SSE4A\n");
21132125
if (features & HAVE_SSE5 ) printf("#define HAVE_SSSE5\n");
21142126
if (features & HAVE_AVX ) printf("#define HAVE_AVX\n");
2127+
if (features & HAVE_AVX2 ) printf("#define HAVE_AVX2\n");
2128+
if (features & HAVE_AVX512VL ) printf("#define HAVE_AVX512VL\n");
21152129
if (features & HAVE_3DNOWEX) printf("#define HAVE_3DNOWEX\n");
21162130
if (features & HAVE_3DNOW) printf("#define HAVE_3DNOW\n");
21172131
if (features & HAVE_FMA4 ) printf("#define HAVE_FMA4\n");
@@ -2180,6 +2194,8 @@ void get_sse(void){
21802194
if (features & HAVE_SSE4A) printf("HAVE_SSE4A=1\n");
21812195
if (features & HAVE_SSE5 ) printf("HAVE_SSSE5=1\n");
21822196
if (features & HAVE_AVX ) printf("HAVE_AVX=1\n");
2197+
if (features & HAVE_AVX2 ) printf("HAVE_AVX2=1\n");
2198+
if (features & HAVE_AVX512VL ) printf("HAVE_AVX512VL=1\n");
21832199
if (features & HAVE_3DNOWEX) printf("HAVE_3DNOWEX=1\n");
21842200
if (features & HAVE_3DNOW) printf("HAVE_3DNOW=1\n");
21852201
if (features & HAVE_FMA4 ) printf("HAVE_FMA4=1\n");

0 commit comments

Comments
 (0)