@@ -304,9 +304,47 @@ int support_avx(){
304
304
#endif
305
305
}
306
306
307
+ int support_avx2 (){
308
+ #ifndef NO_AVX2
309
+ int eax , ebx , ecx = 0 , edx ;
310
+ int ret = 0 ;
311
+
312
+ if (!support_avx )
313
+ return 0 ;
314
+ cpuid (7 , & eax , & ebx , & ecx , & edx );
315
+ if ((ebx & (1 <<7 )) != 0 )
316
+ ret = 1 ; //OS supports AVX2
317
+ return ret ;
318
+ #else
319
+ return 0 ;
320
+ #endif
321
+ }
322
+
323
+ int support_avx512 (){
324
+ #ifndef NO_AVX512
325
+ int eax , ebx , ecx , edx ;
326
+ int ret = 0 ;
327
+
328
+ if (!support_avx )
329
+ return 0 ;
330
+ cpuid (7 , & eax , & ebx , & ecx , & edx );
331
+ if ((ebx & (1 <<7 )) != 1 ){
332
+ ret = 0 ; //OS does not even support AVX2
333
+ }
334
+ if ((ebx & (1 <<31 )) != 0 ){
335
+ ret = 1 ; //OS supports AVX512VL
336
+ }
337
+ return ret ;
338
+ #else
339
+ return 0 ;
340
+ #endif
341
+ }
342
+
307
343
extern void openblas_warning (int verbose , const char * msg );
308
344
#define FALLBACK_VERBOSE 1
309
345
#define NEHALEM_FALLBACK "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Nehalem kernels as a fallback, which may give poorer performance.\n"
346
+ #define SANDYBRIDGE_FALLBACK "OpenBLAS : Your OS does not support AVX2 instructions. OpenBLAS is using Sandybridge kernels as a fallback, which may give poorer performance.\n"
347
+ #define HASWELL_FALLBACK "OpenBLAS : Your OS does not support AVX512 instructions. OpenBLAS is using Haswell kernels as a fallback, which may give poorer performance.\n"
310
348
#define BARCELONA_FALLBACK "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Barcelona kernels as a fallback, which may give poorer performance.\n"
311
349
312
350
static int get_vendor (void ){
@@ -403,18 +441,24 @@ static gotoblas_t *get_coretype(void){
403
441
}
404
442
//Intel Haswell
405
443
if (model == 12 || model == 15 ) {
406
- if (support_avx ())
444
+ if (support_avx2 ())
407
445
return & gotoblas_HASWELL ;
408
- else {
446
+ if (support_avx ()) {
447
+ openblas_warning (FALLBACK_VERBOSE , SANDYBRIDGE_FALLBACK );
448
+ return & gotoblas_SANDYBRIDGE ;
449
+ } else {
409
450
openblas_warning (FALLBACK_VERBOSE , NEHALEM_FALLBACK );
410
451
return & gotoblas_NEHALEM ; //OS doesn't support AVX. Use old kernels.
411
452
}
412
453
}
413
454
//Intel Broadwell
414
455
if (model == 13 ) {
415
- if (support_avx ())
456
+ if (support_avx2 ())
416
457
return & gotoblas_HASWELL ;
417
- else {
458
+ if (support_avx ()) {
459
+ openblas_warning (FALLBACK_VERBOSE , SANDYBRIDGE_FALLBACK );
460
+ return & gotoblas_SANDYBRIDGE ;
461
+ } else {
418
462
openblas_warning (FALLBACK_VERBOSE , NEHALEM_FALLBACK );
419
463
return & gotoblas_NEHALEM ; //OS doesn't support AVX. Use old kernels.
420
464
}
@@ -424,27 +468,36 @@ static gotoblas_t *get_coretype(void){
424
468
case 4 :
425
469
//Intel Haswell
426
470
if (model == 5 || model == 6 ) {
427
- if (support_avx ())
471
+ if (support_avx2 ())
428
472
return & gotoblas_HASWELL ;
429
- else {
473
+ if (support_avx ()) {
474
+ openblas_warning (FALLBACK_VERBOSE , SANDYBRIDGE_FALLBACK );
475
+ return & gotoblas_SANDYBRIDGE ;
476
+ } else {
430
477
openblas_warning (FALLBACK_VERBOSE , NEHALEM_FALLBACK );
431
478
return & gotoblas_NEHALEM ; //OS doesn't support AVX. Use old kernels.
432
479
}
433
480
}
434
481
//Intel Broadwell
435
482
if (model == 7 || model == 15 ) {
436
- if (support_avx ())
483
+ if (support_avx2 ())
437
484
return & gotoblas_HASWELL ;
438
- else {
485
+ if (support_avx ()) {
486
+ openblas_warning (FALLBACK_VERBOSE , SANDYBRIDGE_FALLBACK );
487
+ return & gotoblas_SANDYBRIDGE ;
488
+ } else {
439
489
openblas_warning (FALLBACK_VERBOSE , NEHALEM_FALLBACK );
440
490
return & gotoblas_NEHALEM ; //OS doesn't support AVX. Use old kernels.
441
491
}
442
492
}
443
493
//Intel Skylake
444
494
if (model == 14 ) {
445
- if (support_avx ())
495
+ if (support_avx2 ())
446
496
return & gotoblas_HASWELL ;
447
- else {
497
+ if (support_avx ()) {
498
+ openblas_warning (FALLBACK_VERBOSE , SANDYBRIDGE_FALLBACK );
499
+ return & gotoblas_SANDYBRIDGE ;
500
+ } else {
448
501
openblas_warning (FALLBACK_VERBOSE , NEHALEM_FALLBACK );
449
502
return & gotoblas_NEHALEM ; //OS doesn't support AVX. Use old kernels.
450
503
}
@@ -457,40 +510,50 @@ static gotoblas_t *get_coretype(void){
457
510
case 5 :
458
511
//Intel Broadwell
459
512
if (model == 6 ) {
460
- if (support_avx ())
513
+ if (support_avx2 ())
461
514
return & gotoblas_HASWELL ;
462
- else {
515
+ if (support_avx ()) {
516
+ openblas_warning (FALLBACK_VERBOSE , SANDYBRIDGE_FALLBACK );
517
+ return & gotoblas_SANDYBRIDGE ;
518
+ } else {
463
519
openblas_warning (FALLBACK_VERBOSE , NEHALEM_FALLBACK );
464
520
return & gotoblas_NEHALEM ; //OS doesn't support AVX. Use old kernels.
465
521
}
466
522
}
467
523
if (model == 5 ) {
468
524
// Intel Skylake X
469
- #ifndef NO_AVX512
470
- return & gotoblas_SKYLAKEX ;
471
- #else
472
- if (support_avx ())
525
+ if (support_avx512 ())
526
+ return & gotoblas_SKYLAKEX ;
527
+ if (support_avx2 ())
473
528
return & gotoblas_HASWELL ;
474
- else {
475
- openblas_warning (FALLBACK_VERBOSE , NEHALEM_FALLBACK );
476
- return & gotoblas_NEHALEM ;
477
- }
478
- #endif
529
+ if (support_avx ()) {
530
+ openblas_warning (FALLBACK_VERBOSE , SANDYBRIDGE_FALLBACK );
531
+ return & gotoblas_SANDYBRIDGE ;
532
+ } else {
533
+ openblas_warning (FALLBACK_VERBOSE , NEHALEM_FALLBACK );
534
+ return & gotoblas_NEHALEM ;
535
+ }
479
536
}
480
537
//Intel Skylake
481
538
if (model == 14 ) {
482
- if (support_avx ())
539
+ if (support_avx2 ())
483
540
return & gotoblas_HASWELL ;
484
- else {
541
+ if (support_avx ()) {
542
+ openblas_warning (FALLBACK_VERBOSE , SANDYBRIDGE_FALLBACK );
543
+ return & gotoblas_SANDYBRIDGE ;
544
+ } else {
485
545
openblas_warning (FALLBACK_VERBOSE , NEHALEM_FALLBACK );
486
546
return & gotoblas_NEHALEM ; //OS doesn't support AVX. Use old kernels.
487
547
}
488
548
}
489
549
//Intel Phi Knights Landing
490
550
if (model == 7 ) {
491
- if (support_avx ())
551
+ if (support_avx2 ())
492
552
return & gotoblas_HASWELL ;
493
- else {
553
+ if (support_avx ()) {
554
+ openblas_warning (FALLBACK_VERBOSE , SANDYBRIDGE_FALLBACK );
555
+ return & gotoblas_SANDYBRIDGE ;
556
+ } else {
494
557
openblas_warning (FALLBACK_VERBOSE , NEHALEM_FALLBACK );
495
558
return & gotoblas_NEHALEM ; //OS doesn't support AVX. Use old kernels.
496
559
}
@@ -503,26 +566,26 @@ static gotoblas_t *get_coretype(void){
503
566
case 6 :
504
567
if (model == 6 ) {
505
568
// Cannon Lake
506
- #ifndef NO_AVX512
507
- return & gotoblas_SKYLAKEX ;
508
- #else
509
- if (support_avx ())
510
- #ifndef NO_AVX2
511
- return & gotoblas_HASWELL ;
512
- #else
513
- return & gotoblas_SANDYBRIDGE ;
514
- #endif
515
- else
516
- return & gotoblas_NEHALEM ;
517
- #endif
569
+ if (support_avx2 ())
570
+ return & gotoblas_HASWELL ;
571
+ if (support_avx ()) {
572
+ openblas_warning (FALLBACK_VERBOSE , SANDYBRIDGE_FALLBACK );
573
+ return & gotoblas_SANDYBRIDGE ;
574
+ } else {
575
+ openblas_warning (FALLBACK_VERBOSE , NEHALEM_FALLBACK );
576
+ return & gotoblas_NEHALEM ;
577
+ }
518
578
}
519
579
return NULL ;
520
580
case 9 :
521
581
case 8 :
522
582
if (model == 14 ) { // Kaby Lake
523
- if (support_avx ())
583
+ if (support_avx2 ())
524
584
return & gotoblas_HASWELL ;
525
- else {
585
+ if (support_avx ()) {
586
+ openblas_warning (FALLBACK_VERBOSE , SANDYBRIDGE_FALLBACK );
587
+ return & gotoblas_SANDYBRIDGE ;
588
+ } else {
526
589
openblas_warning (FALLBACK_VERBOSE , NEHALEM_FALLBACK );
527
590
return & gotoblas_NEHALEM ; //OS doesn't support AVX. Use old kernels.
528
591
}
0 commit comments