Skip to content

Commit 68bbb29

Browse files
committed
CPU (macOS): detects march on Apple Silicon
1 parent dd1a799 commit 68bbb29

File tree

2 files changed

+128
-4
lines changed

2 files changed

+128
-4
lines changed

src/detection/cpu/cpu.c

Lines changed: 127 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -294,11 +294,135 @@ void ffCPUDetectByCpuid(FFCPUResult* cpu)
294294
}
295295
}
296296
}
297-
#else
298-
void ffCPUDetectByCpuid(FF_MAYBE_UNUSED FFCPUResult* cpu)
297+
#elif __APPLE__
298+
#include <sys/sysctl.h>
299+
#include <arm/cpu_capabilities_public.h>
300+
301+
#ifndef CAP_BIT_AdvSIMD
302+
#define CAP_BIT_AdvSIMD 49
303+
#endif
304+
#ifndef CAP_BIT_AdvSIMD_HPFPCvt
305+
#define CAP_BIT_AdvSIMD_HPFPCvt 50
306+
#endif
307+
#ifndef CAP_BIT_FEAT_CRC32
308+
#define CAP_BIT_FEAT_CRC32 51
309+
#endif
310+
#ifndef CAP_BIT_FEAT_HBC
311+
#define CAP_BIT_FEAT_HBC 64
312+
#endif
313+
#ifndef CAP_BIT_FEAT_CSSC
314+
#define CAP_BIT_FEAT_CSSC 67
315+
#endif
316+
317+
void ffCPUDetectByCpuid(FFCPUResult* cpu)
299318
{
300-
// Unsupported platform
319+
uint64_t caps[2] = {0}; // 80-bit capability mask, split into two 64-bit values
320+
size_t size = sizeof(caps);
321+
322+
if (sysctlbyname("hw.optional.arm.caps", caps, &size, NULL, 0) != 0) return;
323+
324+
// Helper macro to test bit in 80-bit capability mask
325+
#define FF_HAS_CAP(bit) \
326+
(((bit) < 64) ? ((caps[0] >> (bit)) & 1ULL) : ((caps[1] >> ((bit) - 64U)) & 1ULL))
327+
328+
cpu->march = "unknown";
329+
330+
// ARMv8-A
331+
bool has_fp = FF_HAS_CAP(CAP_BIT_AdvSIMD_HPFPCvt); // Full FP16 support (implies FP/ASIMD)
332+
bool has_asimd = FF_HAS_CAP(CAP_BIT_AdvSIMD); // Advanced SIMD (NEON)
333+
334+
// ARMv8.1-A
335+
bool has_lse = FF_HAS_CAP(CAP_BIT_FEAT_LSE); // Large System Extensions, optional in v8.0
336+
bool has_crc32 = FF_HAS_CAP(CAP_BIT_FEAT_CRC32); // CRC32 instructions, optional in v8.0
337+
bool has_rdm = FF_HAS_CAP(CAP_BIT_FEAT_RDM); // AdvSIMD rounding double multiply accumulate, optional in v8.0
338+
339+
// ARMv8.2-A
340+
bool has_fp16 = FF_HAS_CAP(CAP_BIT_FEAT_FP16); // Half-precision FP support, optional
341+
bool has_dpb = FF_HAS_CAP(CAP_BIT_FEAT_DPB); // DC CVAP, optional from v8.1
342+
343+
// ARMv8.3-A
344+
bool has_pauth = FF_HAS_CAP(CAP_BIT_FEAT_PAuth); // Pointer Authentication (PAC), optional from v8.2
345+
bool has_lrcpc = FF_HAS_CAP(CAP_BIT_FEAT_LRCPC); // LDAPR/LR with RCPC semantics, optional from v8.2
346+
bool has_fcma = FF_HAS_CAP(CAP_BIT_FEAT_FCMA); // Complex number multiply-add, optional from v8.2
347+
bool has_jscvt = FF_HAS_CAP(CAP_BIT_FEAT_JSCVT); // JavaScript-style conversion (FJCVTZS), optional from v8.2
348+
349+
// ARMv8.4-A
350+
bool has_lse2 = FF_HAS_CAP(CAP_BIT_FEAT_LSE2); // Large System Extensions version 2, optional from v8.2
351+
bool has_dit = FF_HAS_CAP(CAP_BIT_FEAT_DIT); // Data Independent Timing, optional from v8.3
352+
bool has_flagm = FF_HAS_CAP(CAP_BIT_FEAT_FlagM); // Flag manipulation (FMOV/FCVT), optional from v8.1
353+
bool has_lrcpc2 = FF_HAS_CAP(CAP_BIT_FEAT_LRCPC2); // Enhanced RCPC (LDAPUR/LDAPST), optional from v8.2
354+
355+
// ARMv8.5-A
356+
bool has_bti = FF_HAS_CAP(CAP_BIT_FEAT_BTI); // Branch Target Identification, optional from v8.4
357+
bool has_sb = FF_HAS_CAP(CAP_BIT_FEAT_SB); // Speculative Barrier, optional from v8.0
358+
bool has_dpb2 = FF_HAS_CAP(CAP_BIT_FEAT_DPB2); // DC CVADP (DPB2), optional from v8.1
359+
bool has_flagm2 = FF_HAS_CAP(CAP_BIT_FEAT_FlagM2); // Enhanced FlagM, optional from v8.4
360+
bool has_frintts = FF_HAS_CAP(CAP_BIT_FEAT_FRINTTS); // Floating-point to integer instructions, optional from v8.4
361+
362+
// ARMv9.0-A
363+
bool has_sve2 = false; // Not exposed and not supported by Apple M4
364+
365+
// ARMv9.1-A
366+
// ARMv8.6-A
367+
bool has_bf16 = FF_HAS_CAP(CAP_BIT_FEAT_BF16); // Brain float16, optional from v8.2
368+
bool has_i8mm = FF_HAS_CAP(CAP_BIT_FEAT_I8MM); // Int8 Matrix Multiply, optional from v8.1
369+
370+
// ARMv8.7-A
371+
bool has_afp = FF_HAS_CAP(CAP_BIT_FEAT_AFP); // Alternate FP16 (FEXPA), optional from v8.6
372+
373+
// ARMv9.2-A
374+
bool has_sme = FF_HAS_CAP(CAP_BIT_FEAT_SME); // Scalable Matrix Extension, optional from v9.2
375+
376+
// ARMv9.3-A
377+
bool has_sme2 = FF_HAS_CAP(CAP_BIT_FEAT_SME2); // SME2, optional from v9.2
378+
379+
// ARMv8.8-A
380+
bool has_hbc = FF_HAS_CAP(CAP_BIT_FEAT_HBC); // Hinted conditional branches, optional from v8.7
381+
382+
// ARMv8.9-A
383+
bool has_cssc = FF_HAS_CAP(CAP_BIT_FEAT_CSSC); // Common Short String Compare, optional from v8.7
384+
385+
// ARMv9.4-A+ are not exposed yet
386+
387+
if (has_sve2 || has_sme) {
388+
// ARMv9 family
389+
if (has_sme2) {
390+
cpu->march = "ARMv9.3-A";
391+
} else if (has_sme) {
392+
cpu->march = "ARMv9.2-A";
393+
} else if (has_i8mm && has_bf16) {
394+
cpu->march = "ARMv9.1-A";
395+
} else {
396+
cpu->march = "ARMv9.0-A";
397+
}
398+
} else {
399+
// ARMv8 family
400+
if (has_cssc) {
401+
cpu->march = "ARMv8.9-A";
402+
} else if (has_hbc) {
403+
cpu->march = "ARMv8.8-A";
404+
} else if (has_afp) {
405+
cpu->march = "ARMv8.7-A";
406+
} else if (has_i8mm && has_bf16) {
407+
cpu->march = "ARMv8.6-A";
408+
} else if (has_bti && has_sb && has_dpb2 && has_flagm2 && has_frintts) {
409+
cpu->march = "ARMv8.5-A";
410+
} else if (has_lse2 && has_dit && has_flagm && has_lrcpc2) {
411+
cpu->march = "ARMv8.4-A";
412+
} else if (has_pauth && has_lrcpc && has_fcma && has_jscvt) {
413+
cpu->march = "ARMv8.3-A";
414+
} else if (has_fp16 && has_dpb) {
415+
cpu->march = "ARMv8.2-A";
416+
} else if (has_lse && has_crc32 && has_rdm) {
417+
cpu->march = "ARMv8.1-A";
418+
} else if (has_asimd && has_fp) {
419+
cpu->march = "ARMv8-A";
420+
}
421+
}
422+
423+
#undef HAS_CAP
301424
}
425+
#else
302426
#endif // __linux__
303427

304428
#else

src/detection/cpu/cpu_apple.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,6 @@ static const char* detectFrequency(FFCPUResult* cpu)
7272
#else
7373
static const char* detectFrequency(FFCPUResult* cpu)
7474
{
75-
ffCPUDetectByCpuid(cpu);
7675
cpu->frequencyBase = (uint32_t) (ffSysctlGetInt64("hw.cpufrequency", 0) / 1000 / 1000);
7776
cpu->frequencyMax = (uint32_t) (ffSysctlGetInt64("hw.cpufrequency_max", 0) / 1000 / 1000);
7877
if(cpu->frequencyBase == 0)
@@ -127,6 +126,7 @@ const char* ffDetectCPUImpl(const FFCPUOptions* options, FFCPUResult* cpu)
127126
if(cpu->coresOnline == 1)
128127
cpu->coresOnline = (uint16_t) ffSysctlGetInt("hw.activecpu", 1);
129128

129+
ffCPUDetectByCpuid(cpu);
130130
detectFrequency(cpu);
131131
if (options->showPeCoreCount) detectCoreCount(cpu);
132132

0 commit comments

Comments
 (0)