@@ -97,10 +97,10 @@ static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
97
97
("mov %%ebx, %%edi;"
98
98
"cpuid;"
99
99
"xchgl %%ebx, %%edi;"
100
- : "=a" (* eax ), "=D" (* ebx ), "=c" (* ecx ), "=d" (* edx ) : "a" (op ) : "cc" );
100
+ : "=a" (* eax ), "=D" (* ebx ), "=c" (* ecx ), "=d" (* edx ) : "a" (op ), "c" ( 0 ) : "cc" );
101
101
#else
102
102
__asm__ __volatile__
103
- ("cpuid" : "=a" (* eax ), "=b" (* ebx ), "=c" (* ecx ), "=d" (* edx ) : "a" (op ) : "cc" );
103
+ ("cpuid" : "=a" (* eax ), "=b" (* ebx ), "=c" (* ecx ), "=d" (* edx ) : "a" (op ) , "c" ( 0 ) : "cc" );
104
104
#endif
105
105
}
106
106
@@ -211,6 +211,44 @@ int support_avx(){
211
211
#endif
212
212
}
213
213
214
+ int support_avx2 (){
215
+ #ifndef NO_AVX2
216
+ int eax , ebx , ecx = 0 , edx ;
217
+ int ret = 0 ;
218
+
219
+ if (!support_avx )
220
+ return 0 ;
221
+ cpuid (7 , & eax , & ebx , & ecx , & edx );
222
+ if ((ebx & (1 <<7 )) != 0 )
223
+ ret = 1 ; //OS supports AVX2
224
+ return ret ;
225
+ #else
226
+ return 0 ;
227
+ #endif
228
+ }
229
+
230
+ int support_avx512 (){
231
+ #ifndef NO_AVX512
232
+ int eax , ebx , ecx , edx ;
233
+ int ret = 0 ;
234
+
235
+ if (!support_avx )
236
+ return 0 ;
237
+ cpuid (7 , & eax , & ebx , & ecx , & edx );
238
+ if ((ebx & 32 ) != 32 ){
239
+ ret = 0 ; //OS does not even support AVX2
240
+ }
241
+ if ((ebx & (1 <<31 )) != 0 ){
242
+ xgetbv (0 , & eax , & edx );
243
+ if ((eax & 0xe0 ) == 0xe0 )
244
+ ret = 1 ; //OS supports AVX512VL
245
+ }
246
+ return ret ;
247
+ #else
248
+ return 0 ;
249
+ #endif
250
+ }
251
+
214
252
215
253
int get_vendor (void ){
216
254
int eax , ebx , ecx , edx ;
@@ -294,6 +332,8 @@ int get_cputype(int gettype){
294
332
if ((ecx & (1 << 20 )) != 0 ) feature |= HAVE_SSE4_2 ;
295
333
#ifndef NO_AVX
296
334
if (support_avx ()) feature |= HAVE_AVX ;
335
+ if (support_avx2 ()) feature |= HAVE_AVX2 ;
336
+ if (support_avx512 ()) feature |= HAVE_AVX512VL ;
297
337
if ((ecx & (1 << 12 )) != 0 ) feature |= HAVE_FMA3 ;
298
338
#endif
299
339
@@ -1228,22 +1268,18 @@ int get_cpuname(void){
1228
1268
return CPUTYPE_NEHALEM ;
1229
1269
case 12 :
1230
1270
case 15 :
1231
- if (support_avx ())
1232
- #ifndef NO_AVX2
1271
+ if (support_avx2 ())
1233
1272
return CPUTYPE_HASWELL ;
1234
- #else
1273
+ if ( support_avx ())
1235
1274
return CPUTYPE_SANDYBRIDGE ;
1236
- #endif
1237
1275
else
1238
1276
return CPUTYPE_NEHALEM ;
1239
1277
case 13 :
1240
1278
//Broadwell
1241
- if (support_avx ())
1242
- #ifndef NO_AVX2
1279
+ if (support_avx2 ())
1243
1280
return CPUTYPE_HASWELL ;
1244
- #else
1281
+ if ( support_avx ())
1245
1282
return CPUTYPE_SANDYBRIDGE ;
1246
- #endif
1247
1283
else
1248
1284
return CPUTYPE_NEHALEM ;
1249
1285
}
@@ -1252,33 +1288,27 @@ int get_cpuname(void){
1252
1288
switch (model ) {
1253
1289
case 5 :
1254
1290
case 6 :
1255
- if (support_avx ())
1256
- #ifndef NO_AVX2
1291
+ if (support_avx2 ())
1257
1292
return CPUTYPE_HASWELL ;
1258
- #else
1293
+ if ( support_avx ())
1259
1294
return CPUTYPE_SANDYBRIDGE ;
1260
- #endif
1261
1295
else
1262
1296
return CPUTYPE_NEHALEM ;
1263
1297
case 7 :
1264
1298
case 15 :
1265
1299
//Broadwell
1266
- if (support_avx ())
1267
- #ifndef NO_AVX2
1300
+ if (support_avx2 ())
1268
1301
return CPUTYPE_HASWELL ;
1269
- #else
1302
+ if ( support_avx ())
1270
1303
return CPUTYPE_SANDYBRIDGE ;
1271
- #endif
1272
1304
else
1273
1305
return CPUTYPE_NEHALEM ;
1274
1306
case 14 :
1275
1307
//Skylake
1276
- if (support_avx ())
1277
- #ifndef NO_AVX2
1308
+ if (support_avx2 ())
1278
1309
return CPUTYPE_HASWELL ;
1279
- #else
1310
+ if ( support_avx ())
1280
1311
return CPUTYPE_SANDYBRIDGE ;
1281
- #endif
1282
1312
else
1283
1313
return CPUTYPE_NEHALEM ;
1284
1314
case 12 :
@@ -1292,46 +1322,36 @@ int get_cpuname(void){
1292
1322
switch (model ) {
1293
1323
case 6 :
1294
1324
//Broadwell
1295
- if (support_avx ())
1296
- #ifndef NO_AVX2
1325
+ if (support_avx2 ())
1297
1326
return CPUTYPE_HASWELL ;
1298
- #else
1327
+ if ( support_avx ())
1299
1328
return CPUTYPE_SANDYBRIDGE ;
1300
- #endif
1301
1329
else
1302
1330
return CPUTYPE_NEHALEM ;
1303
1331
case 5 :
1304
1332
// Skylake X
1305
- #ifndef NO_AVX512
1306
- return CPUTYPE_SKYLAKEX ;
1307
- #else
1308
- if (support_avx ())
1309
- #ifndef NO_AVX2
1310
- return CPUTYPE_HASWELL ;
1311
- #else
1312
- return CPUTYPE_SANDYBRIDGE ;
1313
- #endif
1333
+ if (support_avx512 ())
1334
+ return CPUTYPE_SKYLAKEX ;
1335
+ if (support_avx2 ())
1336
+ return CPUTYPE_HASWELL ;
1337
+ if (support_avx ())
1338
+ return CPUTYPE_SANDYBRIDGE ;
1314
1339
else
1315
1340
return CPUTYPE_NEHALEM ;
1316
- #endif
1317
1341
case 14 :
1318
1342
// Skylake
1319
- if (support_avx ())
1320
- #ifndef NO_AVX2
1343
+ if (support_avx2 ())
1321
1344
return CPUTYPE_HASWELL ;
1322
- #else
1345
+ if ( support_avx ())
1323
1346
return CPUTYPE_SANDYBRIDGE ;
1324
- #endif
1325
1347
else
1326
1348
return CPUTYPE_NEHALEM ;
1327
1349
case 7 :
1328
1350
// Xeon Phi Knights Landing
1329
- if (support_avx ())
1330
- #ifndef NO_AVX2
1351
+ if (support_avx2 ())
1331
1352
return CPUTYPE_HASWELL ;
1332
- #else
1353
+ if ( support_avx ())
1333
1354
return CPUTYPE_SANDYBRIDGE ;
1334
- #endif
1335
1355
else
1336
1356
return CPUTYPE_NEHALEM ;
1337
1357
case 12 :
@@ -1342,30 +1362,24 @@ int get_cpuname(void){
1342
1362
case 6 :
1343
1363
switch (model ) {
1344
1364
case 6 : // Cannon Lake
1345
- #ifndef NO_AVX512
1346
- return CPUTYPE_SKYLAKEX ;
1347
- #else
1348
- if (support_avx ())
1349
- #ifndef NO_AVX2
1350
- return CPUTYPE_HASWELL ;
1351
- #else
1352
- return CPUTYPE_SANDYBRIDGE ;
1353
- #endif
1365
+ if (support_avx512 ())
1366
+ return CPUTYPE_SKYLAKEX ;
1367
+ if (support_avx2 ())
1368
+ return CPUTYPE_HASWELL ;
1369
+ if (support_avx ())
1370
+ return CPUTYPE_SANDYBRIDGE ;
1354
1371
else
1355
1372
return CPUTYPE_NEHALEM ;
1356
- #endif
1357
1373
}
1358
1374
break ;
1359
1375
case 9 :
1360
1376
case 8 :
1361
1377
switch (model ) {
1362
1378
case 14 : // Kaby Lake
1363
- if (support_avx ())
1364
- #ifndef NO_AVX2
1379
+ if (support_avx2 ())
1365
1380
return CPUTYPE_HASWELL ;
1366
- #else
1381
+ if ( support_avx ())
1367
1382
return CPUTYPE_SANDYBRIDGE ;
1368
- #endif
1369
1383
else
1370
1384
return CPUTYPE_NEHALEM ;
1371
1385
}
@@ -2112,6 +2126,8 @@ void get_cpuconfig(void){
2112
2126
if (features & HAVE_SSE4A ) printf ("#define HAVE_SSE4A\n" );
2113
2127
if (features & HAVE_SSE5 ) printf ("#define HAVE_SSSE5\n" );
2114
2128
if (features & HAVE_AVX ) printf ("#define HAVE_AVX\n" );
2129
+ if (features & HAVE_AVX2 ) printf ("#define HAVE_AVX2\n" );
2130
+ if (features & HAVE_AVX512VL ) printf ("#define HAVE_AVX512VL\n" );
2115
2131
if (features & HAVE_3DNOWEX ) printf ("#define HAVE_3DNOWEX\n" );
2116
2132
if (features & HAVE_3DNOW ) printf ("#define HAVE_3DNOW\n" );
2117
2133
if (features & HAVE_FMA4 ) printf ("#define HAVE_FMA4\n" );
@@ -2180,6 +2196,8 @@ void get_sse(void){
2180
2196
if (features & HAVE_SSE4A ) printf ("HAVE_SSE4A=1\n" );
2181
2197
if (features & HAVE_SSE5 ) printf ("HAVE_SSSE5=1\n" );
2182
2198
if (features & HAVE_AVX ) printf ("HAVE_AVX=1\n" );
2199
+ if (features & HAVE_AVX2 ) printf ("HAVE_AVX2=1\n" );
2200
+ if (features & HAVE_AVX512VL ) printf ("HAVE_AVX512VL=1\n" );
2183
2201
if (features & HAVE_3DNOWEX ) printf ("HAVE_3DNOWEX=1\n" );
2184
2202
if (features & HAVE_3DNOW ) printf ("HAVE_3DNOW=1\n" );
2185
2203
if (features & HAVE_FMA4 ) printf ("HAVE_FMA4=1\n" );
0 commit comments