@@ -422,14 +422,128 @@ void ffCPUDetectByCpuid(FFCPUResult* cpu)
422
422
423
423
#undef HAS_CAP
424
424
}
425
+ #elif _WIN32
426
+ #include <processthreadsapi.h>
427
+
428
+ // Missing from winnt.h of MinGW-w64
429
+ #define PF_ARM_LSE2_AVAILABLE 62
430
+ #define PF_RESERVED_FEATURE 63
431
+ #define PF_ARM_SHA3_INSTRUCTIONS_AVAILABLE 64
432
+ #define PF_ARM_SHA512_INSTRUCTIONS_AVAILABLE 65
433
+ #define PF_ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE 66
434
+ #define PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE 67
435
+ #define PF_ARM_V86_BF16_INSTRUCTIONS_AVAILABLE 68
436
+ #define PF_ARM_V86_EBF16_INSTRUCTIONS_AVAILABLE 69
437
+ #define PF_ARM_SME_INSTRUCTIONS_AVAILABLE 70
438
+ #define PF_ARM_SME2_INSTRUCTIONS_AVAILABLE 71
439
+ #define PF_ARM_SME2_1_INSTRUCTIONS_AVAILABLE 72
440
+ #define PF_ARM_SME2_2_INSTRUCTIONS_AVAILABLE 73
441
+ #define PF_ARM_SME_AES_INSTRUCTIONS_AVAILABLE 74
442
+ #define PF_ARM_SME_SBITPERM_INSTRUCTIONS_AVAILABLE 75
443
+ #define PF_ARM_SME_SF8MM4_INSTRUCTIONS_AVAILABLE 76
444
+ #define PF_ARM_SME_SF8MM8_INSTRUCTIONS_AVAILABLE 77
445
+ #define PF_ARM_SME_SF8DP2_INSTRUCTIONS_AVAILABLE 78
446
+ #define PF_ARM_SME_SF8DP4_INSTRUCTIONS_AVAILABLE 79
447
+ #define PF_ARM_SME_SF8FMA_INSTRUCTIONS_AVAILABLE 80
448
+ #define PF_ARM_SME_F8F32_INSTRUCTIONS_AVAILABLE 81
449
+ #define PF_ARM_SME_F8F16_INSTRUCTIONS_AVAILABLE 82
450
+ #define PF_ARM_SME_F16F16_INSTRUCTIONS_AVAILABLE 83
451
+ #define PF_ARM_SME_B16B16_INSTRUCTIONS_AVAILABLE 84
452
+ #define PF_ARM_SME_F64F64_INSTRUCTIONS_AVAILABLE 85
453
+ #define PF_ARM_SME_I16I64_INSTRUCTIONS_AVAILABLE 86
454
+ #define PF_ARM_SME_LUTv2_INSTRUCTIONS_AVAILABLE 87
455
+ #define PF_ARM_SME_FA64_INSTRUCTIONS_AVAILABLE 88
456
+
457
+ void ffCPUDetectByCpuid (FFCPUResult * cpu )
458
+ {
459
+ // ARMv8-A
460
+ bool has_vfp = IsProcessorFeaturePresent (PF_ARM_VFP_32_REGISTERS_AVAILABLE ); // Implies basic FP support
461
+ bool has_neon = IsProcessorFeaturePresent (PF_ARM_NEON_INSTRUCTIONS_AVAILABLE ); // NEON (ASIMD)
462
+
463
+ // ARMv8.1-A
464
+ bool has_atomics = IsProcessorFeaturePresent (PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE ); // LSE atomics
465
+ bool has_crc32 = IsProcessorFeaturePresent (PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE ); // CRC32
466
+
467
+ // ARMv8.2-A
468
+ bool has_fp16 = IsProcessorFeaturePresent (PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE ); // Half-precision FP
469
+
470
+ // ARMv8.3-A
471
+ bool has_lrcpc = IsProcessorFeaturePresent (PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE ); // LDAPR/LR with RCPC semantics
472
+ bool has_jscvt = IsProcessorFeaturePresent (PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE ); // FJCVTZS
473
+
474
+ // ARMv8.4-A
475
+ // My CPU (Apple M1 Pro in VM) does support LSE2, but Windows doesn't detect it for some reason
476
+ // bool has_lse2 = IsProcessorFeaturePresent(PF_ARM_LSE2_AVAILABLE); // Large System Extensions version 2, optional from v8.2
477
+ bool has_dp = IsProcessorFeaturePresent (PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE ); // DotProd, optional from v8.1 (*)
478
+
479
+ // ARMv9.0-A
480
+ bool has_sve2 = IsProcessorFeaturePresent (PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE ); // SVE2
481
+
482
+ // ARMv9.1-A
483
+ // ARMv8.6-A
484
+ bool has_bf16 = IsProcessorFeaturePresent (PF_ARM_V86_BF16_INSTRUCTIONS_AVAILABLE ); // BF16, optional from v8.2
485
+ bool has_i8mm = IsProcessorFeaturePresent (PF_ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE ); // Int8 matrix multiply, optional from v8.2
486
+
487
+ // ARMv8.7-A
488
+ bool has_ebf16 = IsProcessorFeaturePresent (PF_ARM_V86_EBF16_INSTRUCTIONS_AVAILABLE ); // Extended BFloat16 behaviors, optional from v8.2
489
+
490
+ // ARMv9.2-A
491
+ bool has_sme = IsProcessorFeaturePresent (PF_ARM_SME_INSTRUCTIONS_AVAILABLE ); // SME
492
+
493
+ // ARMv9.3-A
494
+ bool has_sme2 = IsProcessorFeaturePresent (PF_ARM_SME2_INSTRUCTIONS_AVAILABLE ); // SME2
495
+
496
+ // ARMv9.4-A
497
+ bool has_sme2p1 = IsProcessorFeaturePresent (PF_ARM_SME2_1_INSTRUCTIONS_AVAILABLE ); // SME2.1
498
+
499
+
500
+ if (has_sve2 || has_sme )
501
+ {
502
+ // ARMv9 family
503
+ if (has_sme2p1 ) {
504
+ cpu -> march = "ARMv9.4-A" ;
505
+ } else if (has_sme2 ) {
506
+ cpu -> march = "ARMv9.3-A" ;
507
+ } else if (has_sme ) {
508
+ cpu -> march = "ARMv9.2-A" ;
509
+ } else if (has_i8mm && has_bf16 ) {
510
+ cpu -> march = "ARMv9.1-A" ;
511
+ } else {
512
+ cpu -> march = "ARMv9.0-A" ;
513
+ }
514
+ }
515
+ else
516
+ {
517
+ // ARMv8 family
518
+ if (has_ebf16 ) {
519
+ cpu -> march = "ARMv8.7-A" ;
520
+ } else if (has_i8mm && has_bf16 ) {
521
+ cpu -> march = "ARMv8.6-A" ;
522
+ } else if (has_dp ) {
523
+ cpu -> march = "ARMv8.4-A" ;
524
+ } else if (has_lrcpc && has_jscvt ) {
525
+ cpu -> march = "ARMv8.3-A" ;
526
+ } else if (has_fp16 ) {
527
+ cpu -> march = "ARMv8.2-A" ;
528
+ } else if (has_atomics && has_crc32 ) {
529
+ cpu -> march = "ARMv8.1-A" ;
530
+ } else if (has_neon && has_vfp ) {
531
+ cpu -> march = "ARMv8-A" ;
532
+ }
533
+ }
534
+ }
425
535
#else
426
- #endif // __linux__
536
+ void ffCPUDetectByCpuid (FF_MAYBE_UNUSED FFCPUResult * cpu )
537
+ {
538
+ // Unsupported system
539
+ }
540
+ #endif
427
541
428
542
#else
429
543
430
544
void ffCPUDetectByCpuid (FF_MAYBE_UNUSED FFCPUResult * cpu )
431
545
{
432
- // Unsupported platform
546
+ // Unsupported architecture
433
547
}
434
548
435
549
#endif
0 commit comments