|
| 1 | +/* |
| 2 | + * Naive CPU SIMD features detection. |
| 3 | + * |
| 4 | + * See Modules/black2module.c. |
| 5 | + */ |
| 6 | + |
| 7 | +#include "Python.h" |
| 8 | +#include "pycore_cpuinfo.h" |
| 9 | + |
| 10 | +#include <stdbool.h> |
| 11 | + |
| 12 | +#if defined(__x86_64__) && defined(__GNUC__) |
| 13 | +#include <cpuid.h> |
| 14 | +#elif defined(_M_X64) |
| 15 | +#include <intrin.h> |
| 16 | +#endif |
| 17 | + |
| 18 | +// AVX2 cannot be compiled on macOS ARM64 (yet it can be compiled on x86_64). |
| 19 | +// However, since autoconf incorrectly assumes so when compiling a universal2 |
| 20 | +// binary, we disable all AVX-related instructions. |
| 21 | +#if defined(__APPLE__) && defined(__arm64__) |
| 22 | +# undef CAN_COMPILE_SIMD_AVX_INSTRUCTIONS |
| 23 | +# undef CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS |
| 24 | +# undef CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS |
| 25 | +#endif |
| 26 | + |
| 27 | +#define EDX1_SSE (1 << 25) // sse, EDX, page 1, bit 25 |
| 28 | +#define EDX1_SSE2 (1 << 26) // sse2, EDX, page 1, bit 26 |
| 29 | +#define ECX1_SSE3 (1 << 9) // sse3, ECX, page 1, bit 0 |
| 30 | +#define ECX1_SSE4_1 (1 << 19) // sse4.1, ECX, page 1, bit 19 |
| 31 | +#define ECX1_SSE4_2 (1 << 20) // sse4.2, ECX, page 1, bit 20 |
| 32 | +#define ECX1_AVX (1 << 28) // avx, ECX, page 1, bit 28 |
| 33 | +#define EBX7_AVX2 (1 << 5) // avx2, EBX, page 7, bit 5 |
| 34 | +#define ECX7_AVX512_VBMI (1 << 1) // avx512-vbmi, ECX, page 7, bit 1 |
| 35 | + |
| 36 | +void |
| 37 | +detect_cpu_simd_features(cpu_simd_flags *flags) |
| 38 | +{ |
| 39 | + if (flags->done) { |
| 40 | + return; |
| 41 | + } |
| 42 | + |
| 43 | + int eax1 = 0, ebx1 = 0, ecx1 = 0, edx1 = 0; |
| 44 | + int eax7 = 0, ebx7 = 0, ecx7 = 0, edx7 = 0; |
| 45 | +#if defined(__x86_64__) && defined(__GNUC__) |
| 46 | + __cpuid_count(1, 0, eax1, ebx1, ecx1, edx1); |
| 47 | + __cpuid_count(7, 0, eax7, ebx7, ecx7, edx7); |
| 48 | +#elif defined(_M_X64) |
| 49 | + int info1[4] = {0}; |
| 50 | + __cpuidex(info1, 1, 0); |
| 51 | + eax1 = info1[0]; |
| 52 | + ebx1 = info1[1]; |
| 53 | + ecx1 = info1[2]; |
| 54 | + edx1 = info1[3]; |
| 55 | + |
| 56 | + int info7[4] = {0}; |
| 57 | + __cpuidex(info7, 7, 0); |
| 58 | + eax7 = info7[0]; |
| 59 | + ebx7 = info7[1]; |
| 60 | + ecx7 = info7[2]; |
| 61 | + edx7 = info7[3]; |
| 62 | +#else |
| 63 | + // use (void) expressions to avoid warnings |
| 64 | + (void) eax1; (void) ebx1; (void) ecx1; (void) edx1; |
| 65 | + (void) eax7; (void) ebx7; (void) ecx7; (void) edx7; |
| 66 | +#endif |
| 67 | + |
| 68 | +#ifdef CAN_COMPILE_SIMD_SSE_INSTRUCTIONS |
| 69 | + flags->sse = (edx1 & EDX1_SSE) != 0; |
| 70 | +#else |
| 71 | + flags->sse = false; |
| 72 | +#endif |
| 73 | +#ifdef CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS |
| 74 | + flags->sse2 = (edx1 & EDX1_SSE2) != 0; |
| 75 | +#else |
| 76 | + flags->sse2 = false; |
| 77 | +#endif |
| 78 | +#ifdef CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS |
| 79 | + flags->sse3 = (ecx1 & ECX1_SSE3) != 0; |
| 80 | + #else |
| 81 | +#endif |
| 82 | + flags->sse3 = false; |
| 83 | +#ifdef CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS |
| 84 | + flags->sse41 = (ecx1 & ECX1_SSE4_1) != 0; |
| 85 | +#else |
| 86 | + flags->sse41 = false; |
| 87 | +#endif |
| 88 | +#ifdef CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS |
| 89 | + flags->sse42 = (ecx1 & ECX1_SSE4_2) != 0; |
| 90 | +#else |
| 91 | + flags->sse42 = false; |
| 92 | +#endif |
| 93 | +#ifdef CAN_COMPILE_SIMD_AVX_INSTRUCTIONS |
| 94 | + flags->avx = (ecx1 & ECX1_AVX) != 0; |
| 95 | +#else |
| 96 | + flags->avx = false; |
| 97 | +#endif |
| 98 | +#ifdef CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS |
| 99 | + flags->avx2 = (ebx7 & EBX7_AVX2) != 0; |
| 100 | +#else |
| 101 | + flags->avx2 = false; |
| 102 | +#endif |
| 103 | +#ifdef CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS |
| 104 | + flags->avx512vbmi = (ecx7 & ECX7_AVX512_VBMI) != 0; |
| 105 | +#else |
| 106 | + flags->avx512vbmi = false; |
| 107 | +#endif |
| 108 | + |
| 109 | + flags->done = true; |
| 110 | +} |
0 commit comments