11/*
2+ * @author Bénédikt Tran
3+ *
24 * Interface for detecting the different CPUID flags in an opaque manner.
35 * See https://en.wikipedia.org/wiki/CPUID for details on the bit values.
46 *
@@ -22,182 +24,8 @@ extern "C" {
2224#endif
2325
2426#include "Python.h"
25-
26- /*
27- * The enumeration describes masks to apply on CPUID output registers.
28- *
29- * Member names are Py_CPUID_MASK_<REGISTER>_L<LEAF>[S<SUBLEAF>]_<FEATURE>,
30- * where <> (resp. []) denotes a required (resp. optional) group and:
31- *
32- * - REGISTER is EAX, EBX, ECX or EDX,
33- * - LEAF is the initial value of the EAX register (1 or 7),
34- * - SUBLEAF is the initial value of the ECX register (omitted if 0), and
35- * - FEATURE is a SIMD feature (with one or more specialized instructions).
36- *
37- * For maintainability, the flags are ordered by registers, leafs, subleafs,
38- * and bits. See https://en.wikipedia.org/wiki/CPUID for the values.
39- *
40- * Note 1: The LEAF is also called the 'page' or the 'level'.
41- * Note 2: The SUBLEAF is also referred to as the 'count'.
42- *
43- * The LEAF value should only 1 or 7 as other values may have different
44- * meanings depending on the underlying architecture.
45- */
46- // fmt: off
47- typedef enum py_cpuid_feature_mask {
48- /*[python input]
49- # {(LEAF, SUBLEAF, REGISTRY): {FEATURE: BIT}}
50- data = {
51- (1, 0, 'ECX'): {
52- 'SSE3': 0,
53- 'PCLMULQDQ': 1,
54- 'SSSE3': 9,
55- 'FMA': 12,
56- 'SSE4_1': 19,
57- 'SSE4_2': 20,
58- 'POPCNT': 23,
59- 'XSAVE': 26,
60- 'OSXSAVE': 27,
61- 'AVX': 28,
62- },
63- (1, 0, 'EDX'): {
64- 'CMOV': 15,
65- 'SSE': 25,
66- 'SSE2': 26,
67- },
68- (7, 0, 'EBX'): {
69- 'AVX2': 5,
70- 'AVX512_F': 16,
71- 'AVX512_DQ': 17,
72- 'AVX512_IFMA': 21,
73- 'AVX512_PF': 26,
74- 'AVX512_ER': 27,
75- 'AVX512_CD': 28,
76- 'AVX512_BW': 30,
77- 'AVX512_VL': 31,
78- },
79- (7, 0, 'ECX'): {
80- 'AVX512_VBMI': 1,
81- 'AVX512_VBMI2': 6,
82- 'AVX512_VNNI': 11,
83- 'AVX512_BITALG': 12,
84- 'AVX512_VPOPCNTDQ': 14,
85- },
86- (7, 0, 'EDX'): {
87- 'AVX512_4VNNIW': 2,
88- 'AVX512_4FMAPS': 3,
89- 'AVX512_VP2INTERSECT': 8,
90- },
91- (7, 1, 'EAX'): {
92- 'AVX_VNNI': 4,
93- 'AVX_IFMA': 23,
94- },
95- (7, 1, 'EDX'): {
96- 'AVX_VNNI_INT8': 4,
97- 'AVX_NE_CONVERT': 5,
98- 'AVX_VNNI_INT16': 10,
99- },
100- }
101-
102- def get_member_name(leaf, subleaf, registry, name):
103- node = f'L{leaf}S{subleaf}' if subleaf else f'L{leaf}'
104- return f'Py_CPUID_MASK_{registry}_{node}_{name}'
105-
106- def get_member_mask(bit):
107- val = format(1 << bit, '008x')
108- return f'= 0x{val},'
109-
110- # The enumeration is rendered as follows:
111- #
112- # <INDENT><MEMBER_NAME> <TAB>= 0x<MASK>, <TAB>// bit = BIT
113- # ^ ^ ^ ^ ^ ^ ^
114- #
115- # where ^ indicates a column that is a multiple of 4, <MASK> has
116- # exactly 8 characters and <BIT> has at most 2 characters.
117-
118- INDENT = ' ' * 4
119- # BUG(picnixz): Clinic does not like when '/' and '*' are put together.
120- COMMENT = '/' + '* '
121-
122- def next_block(w):
123- """Compute the smallest multiple of 4 strictly larger than *w*."""
124- return ((w + 3) & ~0x03) if (w % 4) else (w + 4)
125-
126- NAMESIZE = next_block(max(
127- len(get_member_name(*group, name))
128- for group, values in data.items()
129- for name in values
130- ))
131- MASKSIZE = 8 + next_block(len('= 0x,'))
132-
133- for group, values in data.items():
134- title = 'CPUID (LEAF={}, SUBLEAF={}) [{}]'.format(*group)
135- print(INDENT, *COMMENT, title, *COMMENT[::-1], sep='')
136- for name, bit in values.items():
137- assert name, f"invalid entry in {group}"
138- key = get_member_name(*group, name)
139- assert 0 <= bit < 32, f"invalid bit value for {name!r}"
140- val = get_member_mask(bit)
141-
142- member_name = key.ljust(NAMESIZE)
143- member_mask = val.ljust(MASKSIZE)
144-
145- print(INDENT, member_name, member_mask, f'// bit = {bit}', sep='')
146- [python start generated code]*/
147- /* CPUID (LEAF=1, SUBLEAF=0) [ECX] */
148- Py_CPUID_MASK_ECX_L1_SSE3 = 0x00000001 , // bit = 0
149- Py_CPUID_MASK_ECX_L1_PCLMULQDQ = 0x00000002 , // bit = 1
150- Py_CPUID_MASK_ECX_L1_SSSE3 = 0x00000200 , // bit = 9
151- Py_CPUID_MASK_ECX_L1_FMA = 0x00001000 , // bit = 12
152- Py_CPUID_MASK_ECX_L1_SSE4_1 = 0x00080000 , // bit = 19
153- Py_CPUID_MASK_ECX_L1_SSE4_2 = 0x00100000 , // bit = 20
154- Py_CPUID_MASK_ECX_L1_POPCNT = 0x00800000 , // bit = 23
155- Py_CPUID_MASK_ECX_L1_XSAVE = 0x04000000 , // bit = 26
156- Py_CPUID_MASK_ECX_L1_OSXSAVE = 0x08000000 , // bit = 27
157- Py_CPUID_MASK_ECX_L1_AVX = 0x10000000 , // bit = 28
158- /* CPUID (LEAF=1, SUBLEAF=0) [EDX] */
159- Py_CPUID_MASK_EDX_L1_CMOV = 0x00008000 , // bit = 15
160- Py_CPUID_MASK_EDX_L1_SSE = 0x02000000 , // bit = 25
161- Py_CPUID_MASK_EDX_L1_SSE2 = 0x04000000 , // bit = 26
162- /* CPUID (LEAF=7, SUBLEAF=0) [EBX] */
163- Py_CPUID_MASK_EBX_L7_AVX2 = 0x00000020 , // bit = 5
164- Py_CPUID_MASK_EBX_L7_AVX512_F = 0x00010000 , // bit = 16
165- Py_CPUID_MASK_EBX_L7_AVX512_DQ = 0x00020000 , // bit = 17
166- Py_CPUID_MASK_EBX_L7_AVX512_IFMA = 0x00200000 , // bit = 21
167- Py_CPUID_MASK_EBX_L7_AVX512_PF = 0x04000000 , // bit = 26
168- Py_CPUID_MASK_EBX_L7_AVX512_ER = 0x08000000 , // bit = 27
169- Py_CPUID_MASK_EBX_L7_AVX512_CD = 0x10000000 , // bit = 28
170- Py_CPUID_MASK_EBX_L7_AVX512_BW = 0x40000000 , // bit = 30
171- Py_CPUID_MASK_EBX_L7_AVX512_VL = 0x80000000 , // bit = 31
172- /* CPUID (LEAF=7, SUBLEAF=0) [ECX] */
173- Py_CPUID_MASK_ECX_L7_AVX512_VBMI = 0x00000002 , // bit = 1
174- Py_CPUID_MASK_ECX_L7_AVX512_VBMI2 = 0x00000040 , // bit = 6
175- Py_CPUID_MASK_ECX_L7_AVX512_VNNI = 0x00000800 , // bit = 11
176- Py_CPUID_MASK_ECX_L7_AVX512_BITALG = 0x00001000 , // bit = 12
177- Py_CPUID_MASK_ECX_L7_AVX512_VPOPCNTDQ = 0x00004000 , // bit = 14
178- /* CPUID (LEAF=7, SUBLEAF=0) [EDX] */
179- Py_CPUID_MASK_EDX_L7_AVX512_4VNNIW = 0x00000004 , // bit = 2
180- Py_CPUID_MASK_EDX_L7_AVX512_4FMAPS = 0x00000008 , // bit = 3
181- Py_CPUID_MASK_EDX_L7_AVX512_VP2INTERSECT = 0x00000100 , // bit = 8
182- /* CPUID (LEAF=7, SUBLEAF=1) [EAX] */
183- Py_CPUID_MASK_EAX_L7S1_AVX_VNNI = 0x00000010 , // bit = 4
184- Py_CPUID_MASK_EAX_L7S1_AVX_IFMA = 0x00800000 , // bit = 23
185- /* CPUID (LEAF=7, SUBLEAF=1) [EDX] */
186- Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT8 = 0x00000010 , // bit = 4
187- Py_CPUID_MASK_EDX_L7S1_AVX_NE_CONVERT = 0x00000020 , // bit = 5
188- Py_CPUID_MASK_EDX_L7S1_AVX_VNNI_INT16 = 0x00000400 , // bit = 10
189- /*[python end generated code: output=e53c5376296af250 input=4102387db46d5787]*/
190- } py_cpuid_feature_mask ;
191- // fmt: on
192-
193- /* XSAVE state components (XCR0 control register) */
194- typedef enum py_xsave_feature_mask {
195- Py_XSAVE_MASK_XCR0_SSE = 0x00000002 , // bit = 1
196- Py_XSAVE_MASK_XCR0_AVX = 0x00000004 , // bit = 2
197- Py_XSAVE_MASK_XCR0_AVX512_OPMASK = 0x00000020 , // bit = 5
198- Py_XSAVE_MASK_XCR0_AVX512_ZMM_HI256 = 0x00000040 , // bit = 6
199- Py_XSAVE_MASK_XCR0_AVX512_HI16_ZMM = 0x00000080 , // bit = 7
200- } py_xsave_feature_mask ;
27+ #include "pycore_cpuinfo_cpuid_features.h"
28+ #include "pycore_cpuinfo_xsave_features.h"
20129
20230typedef struct py_cpuid_features {
20331 uint32_t maxleaf ;
0 commit comments