Skip to content

Commit ae1d1f7

Browse files
authored
Query AVX2 and AVX512 capability for runtime cpu selection
1 parent 20d1aad commit ae1d1f7

File tree

1 file changed

+102
-39
lines changed

1 file changed

+102
-39
lines changed

driver/others/dynamic.c

Lines changed: 102 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -304,9 +304,47 @@ int support_avx(){
304304
#endif
305305
}
306306

307+
int support_avx2(){
308+
#ifndef NO_AVX2
309+
int eax, ebx, ecx=0, edx;
310+
int ret=0;
311+
312+
if (!support_avx)
313+
return 0;
314+
cpuid(7, &eax, &ebx, &ecx, &edx);
315+
if((ebx & (1<<7)) != 0)
316+
ret=1; //OS supports AVX2
317+
return ret;
318+
#else
319+
return 0;
320+
#endif
321+
}
322+
323+
int support_avx512(){
324+
#ifndef NO_AVX512
325+
int eax, ebx, ecx, edx;
326+
int ret=0;
327+
328+
if (!support_avx)
329+
return 0;
330+
cpuid(7, &eax, &ebx, &ecx, &edx);
331+
if((ebx & (1<<7)) != 1){
332+
ret=0; //OS does not even support AVX2
333+
}
334+
if((ebx & (1<<31)) != 0){
335+
ret=1; //OS supports AVX512VL
336+
}
337+
return ret;
338+
#else
339+
return 0;
340+
#endif
341+
}
342+
307343
extern void openblas_warning(int verbose, const char * msg);
308344
#define FALLBACK_VERBOSE 1
309345
#define NEHALEM_FALLBACK "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Nehalem kernels as a fallback, which may give poorer performance.\n"
346+
#define SANDYBRIDGE_FALLBACK "OpenBLAS : Your OS does not support AVX2 instructions. OpenBLAS is using Sandybridge kernels as a fallback, which may give poorer performance.\n"
347+
#define HASWELL_FALLBACK "OpenBLAS : Your OS does not support AVX512 instructions. OpenBLAS is using Haswell kernels as a fallback, which may give poorer performance.\n"
310348
#define BARCELONA_FALLBACK "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Barcelona kernels as a fallback, which may give poorer performance.\n"
311349

312350
static int get_vendor(void){
@@ -403,18 +441,24 @@ static gotoblas_t *get_coretype(void){
403441
}
404442
//Intel Haswell
405443
if (model == 12 || model == 15) {
406-
if(support_avx())
444+
if(support_avx2())
407445
return &gotoblas_HASWELL;
408-
else{
446+
if(support_avx()) {
447+
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
448+
return &gotoblas_SANDYBRIDGE;
449+
} else {
409450
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
410451
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
411452
}
412453
}
413454
//Intel Broadwell
414455
if (model == 13) {
415-
if(support_avx())
456+
if(support_avx2())
416457
return &gotoblas_HASWELL;
417-
else{
458+
if(support_avx()) {
459+
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
460+
return &gotoblas_SANDYBRIDGE;
461+
} else {
418462
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
419463
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
420464
}
@@ -424,27 +468,36 @@ static gotoblas_t *get_coretype(void){
424468
case 4:
425469
//Intel Haswell
426470
if (model == 5 || model == 6) {
427-
if(support_avx())
471+
if(support_avx2())
428472
return &gotoblas_HASWELL;
429-
else{
473+
if(support_avx()) {
474+
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
475+
return &gotoblas_SANDYBRIDGE;
476+
} else {
430477
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
431478
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
432479
}
433480
}
434481
//Intel Broadwell
435482
if (model == 7 || model == 15) {
436-
if(support_avx())
483+
if(support_avx2())
437484
return &gotoblas_HASWELL;
438-
else{
485+
if(support_avx()) {
486+
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
487+
return &gotoblas_SANDYBRIDGE;
488+
} else {
439489
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
440490
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
441491
}
442492
}
443493
//Intel Skylake
444494
if (model == 14) {
445-
if(support_avx())
495+
if(support_avx2())
446496
return &gotoblas_HASWELL;
447-
else{
497+
if(support_avx()) {
498+
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
499+
return &gotoblas_SANDYBRIDGE;
500+
} else {
448501
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
449502
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
450503
}
@@ -457,40 +510,50 @@ static gotoblas_t *get_coretype(void){
457510
case 5:
458511
//Intel Broadwell
459512
if (model == 6) {
460-
if(support_avx())
513+
if(support_avx2())
461514
return &gotoblas_HASWELL;
462-
else{
515+
if(support_avx()) {
516+
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
517+
return &gotoblas_SANDYBRIDGE;
518+
} else {
463519
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
464520
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
465521
}
466522
}
467523
if (model == 5) {
468524
// Intel Skylake X
469-
#ifndef NO_AVX512
470-
return &gotoblas_SKYLAKEX;
471-
#else
472-
if(support_avx())
525+
if (support_avx512())
526+
return &gotoblas_SKYLAKEX;
527+
if(support_avx2())
473528
return &gotoblas_HASWELL;
474-
else {
475-
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
476-
return &gotoblas_NEHALEM;
477-
}
478-
#endif
529+
if(support_avx()) {
530+
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
531+
return &gotoblas_SANDYBRIDGE;
532+
} else {
533+
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
534+
return &gotoblas_NEHALEM;
535+
}
479536
}
480537
//Intel Skylake
481538
if (model == 14) {
482-
if(support_avx())
539+
if(support_avx2())
483540
return &gotoblas_HASWELL;
484-
else{
541+
if(support_avx()) {
542+
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
543+
return &gotoblas_SANDYBRIDGE;
544+
} else {
485545
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
486546
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
487547
}
488548
}
489549
//Intel Phi Knights Landing
490550
if (model == 7) {
491-
if(support_avx())
551+
if(support_avx2())
492552
return &gotoblas_HASWELL;
493-
else{
553+
if(support_avx()) {
554+
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
555+
return &gotoblas_SANDYBRIDGE;
556+
} else {
494557
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
495558
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
496559
}
@@ -503,26 +566,26 @@ static gotoblas_t *get_coretype(void){
503566
case 6:
504567
if (model == 6) {
505568
// Cannon Lake
506-
#ifndef NO_AVX512
507-
return &gotoblas_SKYLAKEX;
508-
#else
509-
if(support_avx())
510-
#ifndef NO_AVX2
511-
return &gotoblas_HASWELL;
512-
#else
513-
return &gotoblas_SANDYBRIDGE;
514-
#endif
515-
else
516-
return &gotoblas_NEHALEM;
517-
#endif
569+
if(support_avx2())
570+
return &gotoblas_HASWELL;
571+
if(support_avx()) {
572+
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
573+
return &gotoblas_SANDYBRIDGE;
574+
} else {
575+
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
576+
return &gotoblas_NEHALEM;
577+
}
518578
}
519579
return NULL;
520580
case 9:
521581
case 8:
522582
if (model == 14 ) { // Kaby Lake
523-
if(support_avx())
583+
if(support_avx2())
524584
return &gotoblas_HASWELL;
525-
else{
585+
if(support_avx()) {
586+
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
587+
return &gotoblas_SANDYBRIDGE;
588+
} else {
526589
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
527590
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
528591
}

0 commit comments

Comments
 (0)