Skip to content

Commit 00f163b

Browse files
committed
CPU (Windows): detects march on WoA
Tested in VM
1 parent 68bbb29 commit 00f163b

File tree

1 file changed

+116
-2
lines changed

1 file changed

+116
-2
lines changed

src/detection/cpu/cpu.c

Lines changed: 116 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -422,14 +422,128 @@ void ffCPUDetectByCpuid(FFCPUResult* cpu)
422422

423423
#undef HAS_CAP
424424
}
425+
#elif _WIN32
426+
#include <processthreadsapi.h>
427+
428+
// Missing from winnt.h of MinGW-w64
429+
#define PF_ARM_LSE2_AVAILABLE 62
430+
#define PF_RESERVED_FEATURE 63
431+
#define PF_ARM_SHA3_INSTRUCTIONS_AVAILABLE 64
432+
#define PF_ARM_SHA512_INSTRUCTIONS_AVAILABLE 65
433+
#define PF_ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE 66
434+
#define PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE 67
435+
#define PF_ARM_V86_BF16_INSTRUCTIONS_AVAILABLE 68
436+
#define PF_ARM_V86_EBF16_INSTRUCTIONS_AVAILABLE 69
437+
#define PF_ARM_SME_INSTRUCTIONS_AVAILABLE 70
438+
#define PF_ARM_SME2_INSTRUCTIONS_AVAILABLE 71
439+
#define PF_ARM_SME2_1_INSTRUCTIONS_AVAILABLE 72
440+
#define PF_ARM_SME2_2_INSTRUCTIONS_AVAILABLE 73
441+
#define PF_ARM_SME_AES_INSTRUCTIONS_AVAILABLE 74
442+
#define PF_ARM_SME_SBITPERM_INSTRUCTIONS_AVAILABLE 75
443+
#define PF_ARM_SME_SF8MM4_INSTRUCTIONS_AVAILABLE 76
444+
#define PF_ARM_SME_SF8MM8_INSTRUCTIONS_AVAILABLE 77
445+
#define PF_ARM_SME_SF8DP2_INSTRUCTIONS_AVAILABLE 78
446+
#define PF_ARM_SME_SF8DP4_INSTRUCTIONS_AVAILABLE 79
447+
#define PF_ARM_SME_SF8FMA_INSTRUCTIONS_AVAILABLE 80
448+
#define PF_ARM_SME_F8F32_INSTRUCTIONS_AVAILABLE 81
449+
#define PF_ARM_SME_F8F16_INSTRUCTIONS_AVAILABLE 82
450+
#define PF_ARM_SME_F16F16_INSTRUCTIONS_AVAILABLE 83
451+
#define PF_ARM_SME_B16B16_INSTRUCTIONS_AVAILABLE 84
452+
#define PF_ARM_SME_F64F64_INSTRUCTIONS_AVAILABLE 85
453+
#define PF_ARM_SME_I16I64_INSTRUCTIONS_AVAILABLE 86
454+
#define PF_ARM_SME_LUTv2_INSTRUCTIONS_AVAILABLE 87
455+
#define PF_ARM_SME_FA64_INSTRUCTIONS_AVAILABLE 88
456+
457+
void ffCPUDetectByCpuid(FFCPUResult* cpu)
458+
{
459+
// ARMv8-A
460+
bool has_vfp = IsProcessorFeaturePresent(PF_ARM_VFP_32_REGISTERS_AVAILABLE); // Implies basic FP support
461+
bool has_neon = IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE); // NEON (ASIMD)
462+
463+
// ARMv8.1-A
464+
bool has_atomics = IsProcessorFeaturePresent(PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE); // LSE atomics
465+
bool has_crc32 = IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE); // CRC32
466+
467+
// ARMv8.2-A
468+
bool has_fp16 = IsProcessorFeaturePresent(PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE); // Half-precision FP
469+
470+
// ARMv8.3-A
471+
bool has_lrcpc = IsProcessorFeaturePresent(PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE); // LDAPR/LR with RCPC semantics
472+
bool has_jscvt = IsProcessorFeaturePresent(PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE); // FJCVTZS
473+
474+
// ARMv8.4-A
475+
// My CPU (Apple M1 Pro in VM) does support LSE2, but Windows doesn't detect it for some reason
476+
// bool has_lse2 = IsProcessorFeaturePresent(PF_ARM_LSE2_AVAILABLE); // Large System Extensions version 2, optional from v8.2
477+
bool has_dp = IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE); // DotProd, optional from v8.1 (*)
478+
479+
// ARMv9.0-A
480+
bool has_sve2 = IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE); // SVE2
481+
482+
// ARMv9.1-A
483+
// ARMv8.6-A
484+
bool has_bf16 = IsProcessorFeaturePresent(PF_ARM_V86_BF16_INSTRUCTIONS_AVAILABLE); // BF16, optional from v8.2
485+
bool has_i8mm = IsProcessorFeaturePresent(PF_ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE); // Int8 matrix multiply, optional from v8.2
486+
487+
// ARMv8.7-A
488+
bool has_ebf16 = IsProcessorFeaturePresent(PF_ARM_V86_EBF16_INSTRUCTIONS_AVAILABLE); // Extended BFloat16 behaviors, optional from v8.2
489+
490+
// ARMv9.2-A
491+
bool has_sme = IsProcessorFeaturePresent(PF_ARM_SME_INSTRUCTIONS_AVAILABLE); // SME
492+
493+
// ARMv9.3-A
494+
bool has_sme2 = IsProcessorFeaturePresent(PF_ARM_SME2_INSTRUCTIONS_AVAILABLE); // SME2
495+
496+
// ARMv9.4-A
497+
bool has_sme2p1 = IsProcessorFeaturePresent(PF_ARM_SME2_1_INSTRUCTIONS_AVAILABLE); // SME2.1
498+
499+
500+
if (has_sve2 || has_sme)
501+
{
502+
// ARMv9 family
503+
if (has_sme2p1) {
504+
cpu->march = "ARMv9.4-A";
505+
} else if (has_sme2) {
506+
cpu->march = "ARMv9.3-A";
507+
} else if (has_sme) {
508+
cpu->march = "ARMv9.2-A";
509+
} else if (has_i8mm && has_bf16) {
510+
cpu->march = "ARMv9.1-A";
511+
} else {
512+
cpu->march = "ARMv9.0-A";
513+
}
514+
}
515+
else
516+
{
517+
// ARMv8 family
518+
if (has_ebf16) {
519+
cpu->march = "ARMv8.7-A";
520+
} else if (has_i8mm && has_bf16) {
521+
cpu->march = "ARMv8.6-A";
522+
} else if (has_dp) {
523+
cpu->march = "ARMv8.4-A";
524+
} else if (has_lrcpc && has_jscvt) {
525+
cpu->march = "ARMv8.3-A";
526+
} else if (has_fp16) {
527+
cpu->march = "ARMv8.2-A";
528+
} else if (has_atomics && has_crc32) {
529+
cpu->march = "ARMv8.1-A";
530+
} else if (has_neon && has_vfp) {
531+
cpu->march = "ARMv8-A";
532+
}
533+
}
534+
}
425535
#else
426-
#endif // __linux__
536+
void ffCPUDetectByCpuid(FF_MAYBE_UNUSED FFCPUResult* cpu)
537+
{
538+
// Unsupported system
539+
}
540+
#endif
427541

428542
#else
429543

430544
void ffCPUDetectByCpuid(FF_MAYBE_UNUSED FFCPUResult* cpu)
431545
{
432-
// Unsupported platform
546+
// Unsupported architecture
433547
}
434548

435549
#endif

0 commit comments

Comments
 (0)