@@ -97,10 +97,10 @@ static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
97
97
("mov %%ebx, %%edi;"
98
98
"cpuid;"
99
99
"xchgl %%ebx, %%edi;"
100
- : "=a" (* eax ), "=D" (* ebx ), "=c" (* ecx ), "=d" (* edx ) : "a" (op ) : "cc" );
100
+ : "=a" (* eax ), "=D" (* ebx ), "=c" (* ecx ), "=d" (* edx ) : "a" (op ), "c" ( 0 ) : "cc" );
101
101
#else
102
102
__asm__ __volatile__
103
- ("cpuid" : "=a" (* eax ), "=b" (* ebx ), "=c" (* ecx ), "=d" (* edx ) : "a" (op ) : "cc" );
103
+ ("cpuid" : "=a" (* eax ), "=b" (* ebx ), "=c" (* ecx ), "=d" (* edx ) : "a" (op ) , "c" ( 0 ) : "cc" );
104
104
#endif
105
105
}
106
106
@@ -211,6 +211,42 @@ int support_avx(){
211
211
#endif
212
212
}
213
213
214
+ int support_avx2 (){
215
+ #ifndef NO_AVX2
216
+ int eax , ebx , ecx = 0 , edx ;
217
+ int ret = 0 ;
218
+
219
+ if (!support_avx )
220
+ return 0 ;
221
+ cpuid (7 , & eax , & ebx , & ecx , & edx );
222
+ if ((ebx & (1 <<7 )) != 0 )
223
+ ret = 1 ; //OS supports AVX2
224
+ return ret ;
225
+ #else
226
+ return 0 ;
227
+ #endif
228
+ }
229
+
230
+ int support_avx512 (){
231
+ #ifndef NO_AVX512
232
+ int eax , ebx , ecx , edx ;
233
+ int ret = 0 ;
234
+
235
+ if (!support_avx )
236
+ return 0 ;
237
+ cpuid (7 , & eax , & ebx , & ecx , & edx );
238
+ if ((ebx & 32 ) != 32 ){
239
+ ret = 0 ; //OS does not even support AVX2
240
+ }
241
+ if ((ebx & (1 <<31 )) != 0 ){
242
+ ret = 1 ; //OS supports AVX512VL
243
+ }
244
+ return ret ;
245
+ #else
246
+ return 0 ;
247
+ #endif
248
+ }
249
+
214
250
215
251
int get_vendor (void ){
216
252
int eax , ebx , ecx , edx ;
@@ -294,6 +330,8 @@ int get_cputype(int gettype){
294
330
if ((ecx & (1 << 20 )) != 0 ) feature |= HAVE_SSE4_2 ;
295
331
#ifndef NO_AVX
296
332
if (support_avx ()) feature |= HAVE_AVX ;
333
+ if (support_avx2 ()) feature |= HAVE_AVX2 ;
334
+ if (support_avx512 ()) feature |= HAVE_AVX512VL ;
297
335
if ((ecx & (1 << 12 )) != 0 ) feature |= HAVE_FMA3 ;
298
336
#endif
299
337
@@ -1228,22 +1266,18 @@ int get_cpuname(void){
1228
1266
return CPUTYPE_NEHALEM ;
1229
1267
case 12 :
1230
1268
case 15 :
1231
- if (support_avx ())
1232
- #ifndef NO_AVX2
1269
+ if (support_avx2 ())
1233
1270
return CPUTYPE_HASWELL ;
1234
- #else
1271
+ if ( support_avx ())
1235
1272
return CPUTYPE_SANDYBRIDGE ;
1236
- #endif
1237
1273
else
1238
1274
return CPUTYPE_NEHALEM ;
1239
1275
case 13 :
1240
1276
//Broadwell
1241
- if (support_avx ())
1242
- #ifndef NO_AVX2
1277
+ if (support_avx2 ())
1243
1278
return CPUTYPE_HASWELL ;
1244
- #else
1279
+ if ( support_avx ())
1245
1280
return CPUTYPE_SANDYBRIDGE ;
1246
- #endif
1247
1281
else
1248
1282
return CPUTYPE_NEHALEM ;
1249
1283
}
@@ -1252,33 +1286,27 @@ int get_cpuname(void){
1252
1286
switch (model ) {
1253
1287
case 5 :
1254
1288
case 6 :
1255
- if (support_avx ())
1256
- #ifndef NO_AVX2
1289
+ if (support_avx2 ())
1257
1290
return CPUTYPE_HASWELL ;
1258
- #else
1291
+ if ( support_avx ())
1259
1292
return CPUTYPE_SANDYBRIDGE ;
1260
- #endif
1261
1293
else
1262
1294
return CPUTYPE_NEHALEM ;
1263
1295
case 7 :
1264
1296
case 15 :
1265
1297
//Broadwell
1266
- if (support_avx ())
1267
- #ifndef NO_AVX2
1298
+ if (support_avx2 ())
1268
1299
return CPUTYPE_HASWELL ;
1269
- #else
1300
+ if ( support_avx ())
1270
1301
return CPUTYPE_SANDYBRIDGE ;
1271
- #endif
1272
1302
else
1273
1303
return CPUTYPE_NEHALEM ;
1274
1304
case 14 :
1275
1305
//Skylake
1276
- if (support_avx ())
1277
- #ifndef NO_AVX2
1306
+ if (support_avx2 ())
1278
1307
return CPUTYPE_HASWELL ;
1279
- #else
1308
+ if ( support_avx ())
1280
1309
return CPUTYPE_SANDYBRIDGE ;
1281
- #endif
1282
1310
else
1283
1311
return CPUTYPE_NEHALEM ;
1284
1312
case 12 :
@@ -1292,46 +1320,36 @@ int get_cpuname(void){
1292
1320
switch (model ) {
1293
1321
case 6 :
1294
1322
//Broadwell
1295
- if (support_avx ())
1296
- #ifndef NO_AVX2
1323
+ if (support_avx2 ())
1297
1324
return CPUTYPE_HASWELL ;
1298
- #else
1325
+ if ( support_avx ())
1299
1326
return CPUTYPE_SANDYBRIDGE ;
1300
- #endif
1301
1327
else
1302
1328
return CPUTYPE_NEHALEM ;
1303
1329
case 5 :
1304
1330
// Skylake X
1305
- #ifndef NO_AVX512
1306
- return CPUTYPE_SKYLAKEX ;
1307
- #else
1308
- if (support_avx ())
1309
- #ifndef NO_AVX2
1310
- return CPUTYPE_HASWELL ;
1311
- #else
1312
- return CPUTYPE_SANDYBRIDGE ;
1313
- #endif
1331
+ if (support_avx512 ())
1332
+ return CPUTYPE_SKYLAKEX ;
1333
+ if (support_avx2 ())
1334
+ return CPUTYPE_HASWELL ;
1335
+ if (support_avx ())
1336
+ return CPUTYPE_SANDYBRIDGE ;
1314
1337
else
1315
1338
return CPUTYPE_NEHALEM ;
1316
- #endif
1317
1339
case 14 :
1318
1340
// Skylake
1319
- if (support_avx ())
1320
- #ifndef NO_AVX2
1341
+ if (support_avx2 ())
1321
1342
return CPUTYPE_HASWELL ;
1322
- #else
1343
+ if ( support_avx ())
1323
1344
return CPUTYPE_SANDYBRIDGE ;
1324
- #endif
1325
1345
else
1326
1346
return CPUTYPE_NEHALEM ;
1327
1347
case 7 :
1328
1348
// Xeon Phi Knights Landing
1329
- if (support_avx ())
1330
- #ifndef NO_AVX2
1349
+ if (support_avx2 ())
1331
1350
return CPUTYPE_HASWELL ;
1332
- #else
1351
+ if ( support_avx ())
1333
1352
return CPUTYPE_SANDYBRIDGE ;
1334
- #endif
1335
1353
else
1336
1354
return CPUTYPE_NEHALEM ;
1337
1355
case 12 :
@@ -1342,30 +1360,24 @@ int get_cpuname(void){
1342
1360
case 6 :
1343
1361
switch (model ) {
1344
1362
case 6 : // Cannon Lake
1345
- #ifndef NO_AVX512
1346
- return CPUTYPE_SKYLAKEX ;
1347
- #else
1348
- if (support_avx ())
1349
- #ifndef NO_AVX2
1350
- return CPUTYPE_HASWELL ;
1351
- #else
1352
- return CPUTYPE_SANDYBRIDGE ;
1353
- #endif
1363
+ if (support_avx512 ())
1364
+ return CPUTYPE_SKYLAKEX ;
1365
+ if (support_avx2 ())
1366
+ return CPUTYPE_HASWELL ;
1367
+ if (support_avx ())
1368
+ return CPUTYPE_SANDYBRIDGE ;
1354
1369
else
1355
1370
return CPUTYPE_NEHALEM ;
1356
- #endif
1357
1371
}
1358
1372
break ;
1359
1373
case 9 :
1360
1374
case 8 :
1361
1375
switch (model ) {
1362
1376
case 14 : // Kaby Lake
1363
- if (support_avx ())
1364
- #ifndef NO_AVX2
1377
+ if (support_avx2 ())
1365
1378
return CPUTYPE_HASWELL ;
1366
- #else
1379
+ if ( support_avx ())
1367
1380
return CPUTYPE_SANDYBRIDGE ;
1368
- #endif
1369
1381
else
1370
1382
return CPUTYPE_NEHALEM ;
1371
1383
}
@@ -2112,6 +2124,8 @@ void get_cpuconfig(void){
2112
2124
if (features & HAVE_SSE4A ) printf ("#define HAVE_SSE4A\n" );
2113
2125
if (features & HAVE_SSE5 ) printf ("#define HAVE_SSSE5\n" );
2114
2126
if (features & HAVE_AVX ) printf ("#define HAVE_AVX\n" );
2127
+ if (features & HAVE_AVX2 ) printf ("#define HAVE_AVX2\n" );
2128
+ if (features & HAVE_AVX512VL ) printf ("#define HAVE_AVX512VL\n" );
2115
2129
if (features & HAVE_3DNOWEX ) printf ("#define HAVE_3DNOWEX\n" );
2116
2130
if (features & HAVE_3DNOW ) printf ("#define HAVE_3DNOW\n" );
2117
2131
if (features & HAVE_FMA4 ) printf ("#define HAVE_FMA4\n" );
@@ -2180,6 +2194,8 @@ void get_sse(void){
2180
2194
if (features & HAVE_SSE4A ) printf ("HAVE_SSE4A=1\n" );
2181
2195
if (features & HAVE_SSE5 ) printf ("HAVE_SSSE5=1\n" );
2182
2196
if (features & HAVE_AVX ) printf ("HAVE_AVX=1\n" );
2197
+ if (features & HAVE_AVX2 ) printf ("HAVE_AVX2=1\n" );
2198
+ if (features & HAVE_AVX512VL ) printf ("HAVE_AVX512VL=1\n" );
2183
2199
if (features & HAVE_3DNOWEX ) printf ("HAVE_3DNOWEX=1\n" );
2184
2200
if (features & HAVE_3DNOW ) printf ("HAVE_3DNOW=1\n" );
2185
2201
if (features & HAVE_FMA4 ) printf ("HAVE_FMA4=1\n" );
0 commit comments