@@ -15,6 +15,7 @@ typedef struct FFCPUResult
15
15
{
16
16
FFstrbuf name ;
17
17
FFstrbuf vendor ;
18
+ const char * march ; // Microarchitecture
18
19
19
20
uint16_t packages ;
20
21
uint16_t coresPhysical ;
@@ -37,22 +38,66 @@ const char* ffCPUQualcommCodeToName(uint32_t code);
37
38
38
39
#include <cpuid.h>
39
40
40
- // WARNING: CPUID may report frequencies of efficient cores
41
- inline static const char * ffCPUDetectSpeedByCpuid (FFCPUResult * cpu )
41
+ inline static void ffCPUDetectByCpuid (FFCPUResult * cpu )
42
42
{
43
- uint32_t base = 0 , max = 0 , bus = 0 , unused = 0 ;
44
- if (!__get_cpuid (0x16 , & base , & max , & bus , & unused ))
45
- return "Unsupported instruction" ;
46
-
47
- // cpuid returns 0 MHz when hyper-v is enabled
48
- if (base ) cpu -> frequencyBase = base ;
49
- if (max ) cpu -> frequencyMax = max ;
50
- return NULL ;
43
+ uint32_t eax = 0 , ebx = 0 , ecx = 0 , edx = 0 ;
44
+ if (__get_cpuid (0x16 , & eax , & ebx , & ecx , & edx ))
45
+ {
46
+ // WARNING: CPUID may report frequencies of efficient cores
47
+ // cpuid returns 0 MHz when hypervisor is enabled
48
+ if (eax ) cpu -> frequencyBase = eax ;
49
+ if (ebx ) cpu -> frequencyMax = ebx ;
50
+ }
51
+
52
+ if (__get_cpuid (1 , & eax , & ebx , & ecx , & edx ))
53
+ {
54
+ // Feature tests (leaf1.ecx, leaf7.ebx)
55
+ bool sse2 = (ecx & bit_SSE2 ) != 0 ;
56
+ bool sse4_2 = (ecx & bit_SSE4_2 ) != 0 ;
57
+ bool pclmul = (ecx & bit_PCLMUL ) != 0 ;
58
+ bool popcnt = (ecx & bit_POPCNT ) != 0 ;
59
+ bool fma = (ecx & bit_FMA ) != 0 ;
60
+ bool osxsave = (ecx & bit_OSXSAVE ) != 0 ;
61
+
62
+ unsigned int eax7 = 0 , ebx7 = 0 , ecx7 = 0 , edx7 = 0 ;
63
+ __get_cpuid_count (7 , 0 , & eax7 , & ebx7 , & ecx7 , & edx7 );
64
+
65
+ bool avx2 = (ebx7 & bit_AVX2 ) != 0 ;
66
+ bool bmi2 = (ebx7 & bit_BMI2 ) != 0 ;
67
+ bool avx512f = (ebx7 & bit_AVX512F ) != 0 ;
68
+ bool avx512bw = (ebx7 & bit_AVX512BW ) != 0 ;
69
+ bool avx512dq = (ebx7 & bit_AVX512DQ ) != 0 ;
70
+
71
+ // OS support for AVX/AVX512: check XGETBV (requires OSXSAVE)
72
+ bool avx_os = false;
73
+ bool avx512_os = false;
74
+ if (osxsave )
75
+ {
76
+ __asm__ __volatile__(
77
+ "xgetbv"
78
+ : "=a" (eax ), "=d" (edx )
79
+ : "c" (0 )
80
+ :
81
+ );
82
+ uint64_t xcr0 = ((uint64_t )edx << 32 ) | eax ;
83
+
84
+ // AVX requires XCR0[1:2] == 11b (XMM and YMM state)
85
+ avx_os = (xcr0 & 0x6ULL ) == 0x6ULL ;
86
+ // AVX512 requires XCR0[7,5,6] etc. common mask 0xE6 (bits 1,2,5,6,7)
87
+ avx512_os = (xcr0 & 0xE6ULL ) == 0xE6ULL ;
88
+ }
89
+
90
+ cpu -> march = "unknown" ;
91
+ if (avx512f && avx512bw && avx512dq && avx512_os ) cpu -> march = "x86_64-v4" ;
92
+ else if (avx2 && fma && bmi2 && avx_os ) cpu -> march = "x86_64-v3" ;
93
+ else if (sse4_2 && popcnt && pclmul ) cpu -> march = "x86_64-v2" ;
94
+ else if (sse2 ) cpu -> march = "x86_64-v1" ;
95
+ }
51
96
}
52
97
53
98
#else
54
99
55
- inline static const char * ffCPUDetectSpeedByCpuid ( FF_MAYBE_UNUSED FFCPUResult * cpu )
100
+ inline static void ffCPUDetectByCpuid ( FFCPUResult * cpu )
56
101
{
57
102
return "Unsupported platform" ;
58
103
}
0 commit comments