@@ -60,3 +60,252 @@ const char* ffCPUQualcommCodeToName(uint32_t code)
60
60
default : return NULL ;
61
61
}
62
62
}
63
+
64
+ #if defined(__x86_64__ ) || defined(__i386__ )
65
+
66
+ #include <cpuid.h>
67
+
68
+ void ffCPUDetectByCpuid (FFCPUResult * cpu )
69
+ {
70
+ uint32_t eax = 0 , ebx = 0 , ecx = 0 , edx = 0 ;
71
+ if (__get_cpuid (0x16 , & eax , & ebx , & ecx , & edx ))
72
+ {
73
+ // WARNING: CPUID may report frequencies of efficient cores
74
+ // cpuid returns 0 MHz when hypervisor is enabled
75
+ if (eax ) cpu -> frequencyBase = eax ;
76
+ if (ebx ) cpu -> frequencyMax = ebx ;
77
+ }
78
+
79
+ if (__get_cpuid (1 , & eax , & ebx , & ecx , & edx ))
80
+ {
81
+ // Feature tests (leaf1.ecx, leaf7.ebx)
82
+ bool sse2 = (ecx & bit_SSE2 ) != 0 ;
83
+ bool sse4_2 = (ecx & bit_SSE4_2 ) != 0 ;
84
+ bool pclmul = (ecx & bit_PCLMUL ) != 0 ;
85
+ bool popcnt = (ecx & bit_POPCNT ) != 0 ;
86
+ bool fma = (ecx & bit_FMA ) != 0 ;
87
+ bool osxsave = (ecx & bit_OSXSAVE ) != 0 ;
88
+
89
+ unsigned int eax7 = 0 , ebx7 = 0 , ecx7 = 0 , edx7 = 0 ;
90
+ __get_cpuid_count (7 , 0 , & eax7 , & ebx7 , & ecx7 , & edx7 );
91
+
92
+ bool avx2 = (ebx7 & bit_AVX2 ) != 0 ;
93
+ bool bmi2 = (ebx7 & bit_BMI2 ) != 0 ;
94
+ bool avx512f = (ebx7 & bit_AVX512F ) != 0 ;
95
+ bool avx512bw = (ebx7 & bit_AVX512BW ) != 0 ;
96
+ bool avx512dq = (ebx7 & bit_AVX512DQ ) != 0 ;
97
+
98
+ // OS support for AVX/AVX512: check XGETBV (requires OSXSAVE)
99
+ bool avx_os = false;
100
+ bool avx512_os = false;
101
+ if (osxsave )
102
+ {
103
+ __asm__ __volatile__(
104
+ "xgetbv"
105
+ : "=a" (eax ), "=d" (edx )
106
+ : "c" (0 )
107
+ :
108
+ );
109
+ uint64_t xcr0 = ((uint64_t )edx << 32 ) | eax ;
110
+
111
+ // AVX requires XCR0[1:2] == 11b (XMM and YMM state)
112
+ avx_os = (xcr0 & 0x6ULL ) == 0x6ULL ;
113
+ // AVX512 requires XCR0[7,5,6] etc. common mask 0xE6 (bits 1,2,5,6,7)
114
+ avx512_os = (xcr0 & 0xE6ULL ) == 0xE6ULL ;
115
+ }
116
+
117
+ cpu -> march = "unknown" ;
118
+ if (avx512f && avx512bw && avx512dq && avx512_os ) cpu -> march = "x86_64-v4" ;
119
+ else if (avx2 && fma && bmi2 && avx_os ) cpu -> march = "x86_64-v3" ;
120
+ else if (sse4_2 && popcnt && pclmul ) cpu -> march = "x86_64-v2" ;
121
+ else if (sse2 ) cpu -> march = "x86_64-v1" ;
122
+ }
123
+ }
124
+
125
+ #elif defined(__aarch64__ )
126
+
127
+ // This is not accurate because a lot of flags are optional from old versions
128
+ // https://developer.arm.com/documentation/109697/2025_06/Feature-descriptions?lang=en
129
+ // https://en.wikipedia.org/wiki/AArch64#ARM-A_(application_architecture)
130
+ // Worth noting: Apple M1 is marked as ARMv8.5-A on Wikipedia, but it lacks BTI (mandatory in v8.5)
131
+
132
+ #ifdef __linux__
133
+ #include "common/io/io.h"
134
+ #include <elf.h>
135
+ #include <asm/hwcap.h>
136
+
137
+ #ifndef HWCAP2_SME
138
+ #define HWCAP2_SME (1UL << 23)
139
+ #endif
140
+ #ifndef HWCAP2_SME2
141
+ #define HWCAP2_SME2 (1UL << 37)
142
+ #endif
143
+ #ifndef HWCAP2_CSSC
144
+ #define HWCAP2_CSSC (1UL << 34)
145
+ #endif
146
+ #ifndef HWCAP2_SME2P1
147
+ #define HWCAP2_SME2P1 (1UL << 38)
148
+ #endif
149
+ #ifndef HWCAP2_MOPS
150
+ #define HWCAP2_MOPS (1UL << 43)
151
+ #endif
152
+ #ifndef HWCAP2_F8E4M3
153
+ #define HWCAP2_F8E4M3 (1UL << 55)
154
+ #endif
155
+ #ifndef HWCAP2_F8E5M2
156
+ #define HWCAP2_F8E5M2 (1UL << 56)
157
+ #endif
158
+ #ifndef HWCAP_CMPBR
159
+ #define HWCAP_CMPBR (1UL << 33)
160
+ #endif
161
+ #ifndef HWCAP_FPRCVT
162
+ #define HWCAP_FPRCVT (1UL << 34)
163
+ #endif
164
+
165
+ void ffCPUDetectByCpuid (FFCPUResult * cpu )
166
+ {
167
+ char buf [PROC_FILE_BUFFSIZ ];
168
+ ssize_t nRead = ffReadFileData ("/proc/self/auxv" , ARRAY_SIZE (buf ), buf );
169
+
170
+ if (nRead < (ssize_t ) sizeof (Elf64_auxv_t )) return ;
171
+
172
+ uint64_t hwcap = 0 , hwcap2 = 0 ;
173
+
174
+ for (Elf64_auxv_t * auxv = (Elf64_auxv_t * )buf ; (char * )auxv < buf + nRead ; ++ auxv )
175
+ {
176
+ if (auxv -> a_type == AT_HWCAP )
177
+ {
178
+ hwcap = auxv -> a_un .a_val ;
179
+ }
180
+ else if (auxv -> a_type == AT_HWCAP2 )
181
+ {
182
+ hwcap2 = auxv -> a_un .a_val ;
183
+ }
184
+ }
185
+
186
+ if (!hwcap ) return ;
187
+
188
+ cpu -> march = "unknown" ;
189
+
190
+ // ARMv8-A
191
+ bool has_fp = (hwcap & HWCAP_FP ) != 0 ;
192
+ bool has_asimd = (hwcap & HWCAP_ASIMD ) != 0 ;
193
+
194
+ // ARMv8.1-A
195
+ bool has_atomics = (hwcap & HWCAP_ATOMICS ) != 0 ; // optional from v8.0
196
+ bool has_crc32 = (hwcap & HWCAP_CRC32 ) != 0 ; // optional from v8.0
197
+ bool has_asimdrdm = (hwcap & HWCAP_ASIMDRDM ) != 0 ; // optional from v8.0
198
+
199
+ // ARMv8.2-A
200
+ bool has_fphp = (hwcap & HWCAP_FPHP ) != 0 ; // optional
201
+ bool has_dcpop = (hwcap & HWCAP_DCPOP ) != 0 ; // DC CVAP, optional from v8.1
202
+
203
+ // ARMv8.3-A
204
+ bool has_paca = (hwcap & HWCAP_PACA ) != 0 ; // optional from v8.2
205
+ bool has_lrcpc = (hwcap & HWCAP_LRCPC ) != 0 ; // optional from v8.2
206
+ bool has_fcma = (hwcap & HWCAP_FCMA ) != 0 ; // optional from v8.2
207
+ bool has_jscvt = (hwcap & HWCAP_JSCVT ) != 0 ; // optional from v8.2
208
+
209
+ // ARMv8.4-A
210
+ bool has_dit = (hwcap & HWCAP_DIT ) != 0 ; // optional from v8.3
211
+ bool has_flagm = (hwcap & HWCAP_FLAGM ) != 0 ; // optional from v8.1
212
+ bool has_ilrcpc = (hwcap & HWCAP_ILRCPC ) != 0 ; // optional from v8.2
213
+
214
+ // ARMv8.5-A
215
+ bool has_bti = (hwcap2 & HWCAP2_BTI ) != 0 ; // optional from v8.4
216
+ bool has_sb = (hwcap & HWCAP_SB ) != 0 ; // optional from v8.0
217
+ bool has_dcpodp = (hwcap2 & HWCAP2_DCPODP ) != 0 ; // optional from v8.1
218
+ bool has_flagm2 = (hwcap2 & HWCAP2_FLAGM2 ) != 0 ; // optional from v8.4
219
+ bool has_frint = (hwcap2 & HWCAP2_FRINT ) != 0 ; // optional from v8.4
220
+
221
+ // ARMv9.0-A
222
+ bool has_sve2 = (hwcap2 & HWCAP2_SVE2 ) != 0 ;
223
+
224
+ // ARMv9.1-A
225
+ // ARMv8.6-A
226
+ bool has_bf16 = (hwcap2 & HWCAP2_BF16 ) != 0 ; // optional from v8.2
227
+ bool has_i8mm = (hwcap2 & HWCAP2_I8MM ) != 0 ; // optional from v8.1
228
+
229
+ // ARMv8.7-A
230
+ bool has_afp = (hwcap2 & HWCAP2_AFP ) != 0 ; // optional from v8.6
231
+
232
+ // ARMv9.2-A
233
+ bool has_sme = (hwcap2 & HWCAP2_SME ) != 0 ;
234
+
235
+ // ARMv9.3-A
236
+ bool has_sme2 = (hwcap2 & HWCAP2_SME2 ) != 0 ; // optional from v9.2
237
+
238
+ // ARMv8.8-A
239
+ bool has_mops = (hwcap2 & HWCAP2_MOPS ) != 0 ; // optional from v8.7
240
+
241
+ // ARMv8.9-A
242
+ bool has_cssc = (hwcap2 & HWCAP2_CSSC ) != 0 ; // optional from v8.7
243
+
244
+ // ARMv9.4-A
245
+ bool has_sme2p1 = (hwcap2 & HWCAP2_SME2P1 ) != 0 ; // optional from v9.2
246
+
247
+ // ARMv9.5-A
248
+ bool has_f8e4m3 = (hwcap2 & HWCAP2_F8E4M3 ) != 0 ; // optional from v9.2
249
+ bool has_f8e5m2 = (hwcap2 & HWCAP2_F8E5M2 ) != 0 ; // optional from v9.2
250
+
251
+ // ARMv9.6-A
252
+ bool has_cmpbr = (hwcap & HWCAP_CMPBR ) != 0 ; // optional from v9.5
253
+ bool has_fprcvt = (hwcap & HWCAP_FPRCVT ) != 0 ; // optional from v9.5
254
+
255
+ if (has_sve2 || has_sme ) {
256
+ // ARMv9
257
+ if (has_cmpbr && has_fprcvt ) {
258
+ cpu -> march = "ARMv9.6-A" ;
259
+ } else if (has_f8e5m2 && has_f8e4m3 ) {
260
+ cpu -> march = "ARMv9.5-A" ;
261
+ } else if (has_sme2p1 ) {
262
+ cpu -> march = "ARMv9.4-A" ;
263
+ } else if (has_sme2 ) {
264
+ cpu -> march = "ARMv9.3-A" ;
265
+ } else if (has_sme ) {
266
+ cpu -> march = "ARMv9.2-A" ;
267
+ } else if (has_i8mm && has_bf16 ) {
268
+ cpu -> march = "ARMv9.1-A" ;
269
+ } else {
270
+ cpu -> march = "ARMv9.0-A" ;
271
+ }
272
+ } else {
273
+ // ARMv8
274
+ if (has_cssc ) {
275
+ cpu -> march = "ARMv8.9-A" ;
276
+ } else if (has_mops ) {
277
+ cpu -> march = "ARMv8.8-A" ;
278
+ } else if (has_afp ) {
279
+ cpu -> march = "ARMv8.7-A" ;
280
+ } else if (has_i8mm && has_bf16 ) {
281
+ cpu -> march = "ARMv8.6-A" ;
282
+ } else if (has_bti && has_sb && has_dcpodp && has_flagm2 && has_frint ) {
283
+ cpu -> march = "ARMv8.5-A" ;
284
+ } else if (has_dit && has_flagm && has_ilrcpc ) {
285
+ cpu -> march = "ARMv8.4-A" ;
286
+ } else if (has_paca && has_lrcpc && has_fcma && has_jscvt ) {
287
+ cpu -> march = "ARMv8.3-A" ;
288
+ } else if (has_fphp && has_dcpop ) {
289
+ cpu -> march = "ARMv8.2-A" ;
290
+ } else if (has_atomics && has_crc32 && has_asimdrdm ) {
291
+ cpu -> march = "ARMv8.1-A" ;
292
+ } else if (has_asimd && has_fp ) {
293
+ cpu -> march = "ARMv8-A" ;
294
+ }
295
+ }
296
+ }
297
+ #else
298
+ void ffCPUDetectByCpuid (FF_MAYBE_UNUSED FFCPUResult * cpu )
299
+ {
300
+ // Unsupported platform
301
+ }
302
+ #endif // __linux__
303
+
304
+ #else
305
+
306
+ void ffCPUDetectByCpuid (FF_MAYBE_UNUSED FFCPUResult * cpu )
307
+ {
308
+ // Unsupported platform
309
+ }
310
+
311
+ #endif
0 commit comments