Skip to content

Commit 9a6ccb5

Browse files
committed
support simple SIMD detection
1 parent bd393ae commit 9a6ccb5

File tree

8 files changed

+595
-0
lines changed

8 files changed

+595
-0
lines changed

Include/internal/pycore_cpuinfo.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#ifndef Py_INTERNAL_CPUINFO_H
2+
#define Py_INTERNAL_CPUINFO_H
3+
4+
#ifdef __cplusplus
5+
extern "C" {
6+
#endif
7+
8+
#ifndef Py_BUILD_CORE
9+
# error "this header requires Py_BUILD_CORE define"
10+
#endif
11+
12+
#include <stdbool.h>
13+
14+
typedef struct {
15+
bool sse, sse2, sse3, sse41, sse42, avx, avx2, avx512vbmi;
16+
bool done;
17+
} cpu_simd_flags;
18+
19+
extern void
20+
detect_cpu_simd_features(cpu_simd_flags *flags);
21+
22+
#ifdef __cplusplus
23+
}
24+
#endif
25+
26+
#endif /* !Py_INTERNAL_CPUINFO_H */

Makefile.pre.in

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,7 @@ PYTHON_OBJS= \
434434
Python/context.o \
435435
Python/critical_section.o \
436436
Python/crossinterp.o \
437+
Python/cpuinfo.o \
437438
Python/dynamic_annotations.o \
438439
Python/errors.o \
439440
Python/flowgraph.o \
@@ -1191,6 +1192,7 @@ PYTHON_HEADERS= \
11911192
$(srcdir)/Include/internal/pycore_complexobject.h \
11921193
$(srcdir)/Include/internal/pycore_condvar.h \
11931194
$(srcdir)/Include/internal/pycore_context.h \
1195+
$(srcdir)/Include/internal/pycore_cpuinfo.h \
11941196
$(srcdir)/Include/internal/pycore_critical_section.h \
11951197
$(srcdir)/Include/internal/pycore_crossinterp.h \
11961198
$(srcdir)/Include/internal/pycore_descrobject.h \

PCbuild/pythoncore.vcxproj

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,7 @@
225225
<ClInclude Include="..\Include\internal\pycore_complexobject.h" />
226226
<ClInclude Include="..\Include\internal\pycore_condvar.h" />
227227
<ClInclude Include="..\Include\internal\pycore_context.h" />
228+
<ClInclude Include="..\Include\internal\pycore_cpuinfo.h" />
228229
<ClInclude Include="..\Include\internal\pycore_critical_section.h" />
229230
<ClInclude Include="..\Include\internal\pycore_crossinterp.h" />
230231
<ClInclude Include="..\Include\internal\pycore_descrobject.h" />
@@ -584,6 +585,7 @@
584585
<ClCompile Include="..\Python\codegen.c" />
585586
<ClCompile Include="..\Python\compile.c" />
586587
<ClCompile Include="..\Python\context.c" />
588+
<ClCompile Include="..\Python\cpuinfo.c" />
587589
<ClCompile Include="..\Python\critical_section.c" />
588590
<ClCompile Include="..\Python\crossinterp.c" />
589591
<ClCompile Include="..\Python\dynamic_annotations.c" />

PCbuild/pythoncore.vcxproj.filters

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -597,6 +597,9 @@
597597
<ClInclude Include="..\Include\internal\pycore_context.h">
598598
<Filter>Include\internal</Filter>
599599
</ClInclude>
600+
<ClInclude Include="..\Include\internal\pycore_cpuinfo.h">
601+
<Filter>Include\cpython</Filter>
602+
</ClInclude>
600603
<ClInclude Include="..\Include\internal\pycore_critical_section.h">
601604
<Filter>Include\internal</Filter>
602605
</ClInclude>
@@ -1304,6 +1307,9 @@
13041307
<ClCompile Include="..\Python\compile.c">
13051308
<Filter>Python</Filter>
13061309
</ClCompile>
1310+
<ClCompile Include="..\Python\cpuinfo.c">
1311+
<Filter>Source Files</Filter>
1312+
</ClCompile>
13071313
<ClCompile Include="..\Python\critical_section.c">
13081314
<Filter>Python</Filter>
13091315
</ClCompile>

Python/cpuinfo.c

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
/*
2+
* Naive CPU SIMD features detection.
3+
*
4+
* See Modules/black2module.c.
5+
*/
6+
7+
#include "Python.h"
8+
#include "pycore_cpuinfo.h"
9+
10+
#include <stdbool.h>
11+
12+
#if defined(__x86_64__) && defined(__GNUC__)
13+
#include <cpuid.h>
14+
#elif defined(_M_X64)
15+
#include <intrin.h>
16+
#endif
17+
18+
// AVX2 cannot be compiled on macOS ARM64 (yet it can be compiled on x86_64).
19+
// However, since autoconf incorrectly assumes so when compiling a universal2
20+
// binary, we disable all AVX-related instructions.
21+
#if defined(__APPLE__) && defined(__arm64__)
22+
# undef CAN_COMPILE_SIMD_AVX_INSTRUCTIONS
23+
# undef CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS
24+
# undef CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS
25+
#endif
26+
27+
#define EDX1_SSE (1 << 25) // sse, EDX, page 1, bit 25
28+
#define EDX1_SSE2 (1 << 26) // sse2, EDX, page 1, bit 26
29+
#define ECX1_SSE3 (1 << 9) // sse3, ECX, page 1, bit 0
30+
#define ECX1_SSE4_1 (1 << 19) // sse4.1, ECX, page 1, bit 19
31+
#define ECX1_SSE4_2 (1 << 20) // sse4.2, ECX, page 1, bit 20
32+
#define ECX1_AVX (1 << 28) // avx, ECX, page 1, bit 28
33+
#define EBX7_AVX2 (1 << 5) // avx2, EBX, page 7, bit 5
34+
#define ECX7_AVX512_VBMI (1 << 1) // avx512-vbmi, ECX, page 7, bit 1
35+
36+
void
37+
detect_cpu_simd_features(cpu_simd_flags *flags)
38+
{
39+
if (flags->done) {
40+
return;
41+
}
42+
43+
int eax1 = 0, ebx1 = 0, ecx1 = 0, edx1 = 0;
44+
int eax7 = 0, ebx7 = 0, ecx7 = 0, edx7 = 0;
45+
#if defined(__x86_64__) && defined(__GNUC__)
46+
__cpuid_count(1, 0, eax1, ebx1, ecx1, edx1);
47+
__cpuid_count(7, 0, eax7, ebx7, ecx7, edx7);
48+
#elif defined(_M_X64)
49+
int info1[4] = {0};
50+
__cpuidex(info1, 1, 0);
51+
eax1 = info1[0];
52+
ebx1 = info1[1];
53+
ecx1 = info1[2];
54+
edx1 = info1[3];
55+
56+
int info7[4] = {0};
57+
__cpuidex(info7, 7, 0);
58+
eax7 = info7[0];
59+
ebx7 = info7[1];
60+
ecx7 = info7[2];
61+
edx7 = info7[3];
62+
#else
63+
// use (void) expressions to avoid warnings
64+
(void) eax1; (void) ebx1; (void) ecx1; (void) edx1;
65+
(void) eax7; (void) ebx7; (void) ecx7; (void) edx7;
66+
#endif
67+
68+
#ifdef CAN_COMPILE_SIMD_SSE_INSTRUCTIONS
69+
flags->sse = (edx1 & EDX1_SSE) != 0;
70+
#else
71+
flags->sse = false;
72+
#endif
73+
#ifdef CAN_COMPILE_SIMD_SSE2_INSTRUCTIONS
74+
flags->sse2 = (edx1 & EDX1_SSE2) != 0;
75+
#else
76+
flags->sse2 = false;
77+
#endif
78+
#ifdef CAN_COMPILE_SIMD_SSE3_INSTRUCTIONS
79+
flags->sse3 = (ecx1 & ECX1_SSE3) != 0;
80+
#else
81+
#endif
82+
flags->sse3 = false;
83+
#ifdef CAN_COMPILE_SIMD_SSE4_1_INSTRUCTIONS
84+
flags->sse41 = (ecx1 & ECX1_SSE4_1) != 0;
85+
#else
86+
flags->sse41 = false;
87+
#endif
88+
#ifdef CAN_COMPILE_SIMD_SSE4_2_INSTRUCTIONS
89+
flags->sse42 = (ecx1 & ECX1_SSE4_2) != 0;
90+
#else
91+
flags->sse42 = false;
92+
#endif
93+
#ifdef CAN_COMPILE_SIMD_AVX_INSTRUCTIONS
94+
flags->avx = (ecx1 & ECX1_AVX) != 0;
95+
#else
96+
flags->avx = false;
97+
#endif
98+
#ifdef CAN_COMPILE_SIMD_AVX2_INSTRUCTIONS
99+
flags->avx2 = (ebx7 & EBX7_AVX2) != 0;
100+
#else
101+
flags->avx2 = false;
102+
#endif
103+
#ifdef CAN_COMPILE_SIMD_AVX512_VBMI_INSTRUCTIONS
104+
flags->avx512vbmi = (ecx7 & ECX7_AVX512_VBMI) != 0;
105+
#else
106+
flags->avx512vbmi = false;
107+
#endif
108+
109+
flags->done = true;
110+
}

0 commit comments

Comments
 (0)