@@ -33,65 +33,67 @@ namespace xsimd
3333 {
3434 struct supported_arch
3535 {
36- unsigned sse2 : 1 ;
37- unsigned sse3 : 1 ;
38- unsigned ssse3 : 1 ;
39- unsigned sse4_1 : 1 ;
40- unsigned sse4_2 : 1 ;
41- unsigned sse4a : 1 ;
42- unsigned fma3_sse : 1 ;
43- unsigned fma4 : 1 ;
44- unsigned xop : 1 ;
45- unsigned avx : 1 ;
46- unsigned fma3_avx : 1 ;
47- unsigned avx2 : 1 ;
48- unsigned avxvnni : 1 ;
49- unsigned fma3_avx2 : 1 ;
50- unsigned avx512f : 1 ;
51- unsigned avx512cd : 1 ;
52- unsigned avx512dq : 1 ;
53- unsigned avx512bw : 1 ;
54- unsigned avx512er : 1 ;
55- unsigned avx512pf : 1 ;
56- unsigned avx512ifma : 1 ;
57- unsigned avx512vbmi : 1 ;
58- unsigned avx512vnni_bw : 1 ;
59- unsigned avx512vnni_vbmi : 1 ;
60- unsigned neon : 1 ;
61- unsigned neon64 : 1 ;
62- unsigned sve : 1 ;
63- unsigned rvv : 1 ;
64-
65- // version number of the best arch available
66- unsigned best;
36+
37+ #define ARCH_FIELD_EX (arch, field_name ) \
38+ unsigned field_name; \
39+ inline bool has (::xsimd::arch) const { return this ->field_name ; }
40+ #define ARCH_FIELD (name ) ARCH_FIELD_EX(name, name)
41+
42+ ARCH_FIELD (sse2)
43+ ARCH_FIELD (sse3)
44+
45+ ARCH_FIELD (ssse3)
46+ ARCH_FIELD (sse4_1)
47+ ARCH_FIELD (sse4_2)
48+ // ARCH_FIELD(sse4a)
49+ ARCH_FIELD_EX (fma3<::xsimd::sse4_2>, fma3_sse42)
50+ ARCH_FIELD (fma4)
51+ // ARCH_FIELD(xop)
52+ ARCH_FIELD (avx)
53+ ARCH_FIELD_EX (fma3<::xsimd::avx>, fma3_avx)
54+ ARCH_FIELD (avx2)
55+ ARCH_FIELD (avxvnni)
56+ ARCH_FIELD_EX (fma3<::xsimd::avx2>, fma3_avx2)
57+ ARCH_FIELD (avx512f)
58+ ARCH_FIELD (avx512cd)
59+ ARCH_FIELD (avx512dq)
60+ ARCH_FIELD (avx512bw)
61+ ARCH_FIELD (avx512er)
62+ ARCH_FIELD (avx512pf)
63+ ARCH_FIELD (avx512ifma)
64+ ARCH_FIELD (avx512vbmi)
65+ ARCH_FIELD_EX (avx512vnni<::xsimd::avx512bw>, avx512vnni_bw)
66+ ARCH_FIELD_EX (avx512vnni<::xsimd::avx512vbmi>, avx512vnni_vbmi)
67+ ARCH_FIELD (neon)
68+ ARCH_FIELD (neon64)
69+ ARCH_FIELD (sve)
70+ ARCH_FIELD (rvv)
71+ ARCH_FIELD (wasm)
72+
73+ #undef ARCH_FIELD
6774
6875 inline supported_arch () noexcept
6976 {
7077 memset (this , 0 , sizeof (supported_arch));
7178
79+ #if XSIMD_WITH_WASM
80+ wasm = 1 ;
81+ #endif
82+
7283#if defined(__aarch64__) || defined(_M_ARM64)
7384 neon = 1 ;
7485 neon64 = 1 ;
75- best = neon64::version ();
7686#elif defined(__ARM_NEON) || defined(_M_ARM)
7787
7888#if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18)
7989 neon = bool (getauxval (AT_HWCAP) & HWCAP_NEON);
80- #else
81- // that's very conservative :-/
82- neon = 0 ;
8390#endif
84- neon64 = 0 ;
85- best = neon::version () * neon;
8691
8792#elif defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_SVE_BITS) && __ARM_FEATURE_SVE_BITS > 0
8893
8994#if defined(__linux__) && (!defined(__ANDROID_API__) || __ANDROID_API__ >= 18)
9095 sve = bool (getauxval (AT_HWCAP) & HWCAP_SVE);
91- #else
92- sve = 0 ;
9396#endif
94- best = sve::version () * sve;
9597
9698#elif defined(__riscv_vector) && defined(__riscv_v_fixed_vlen) && __riscv_v_fixed_vlen > 0
9799
@@ -100,11 +102,8 @@ namespace xsimd
100102#define HWCAP_V (1 << (' V' - ' A' ))
101103#endif
102104 rvv = bool (getauxval (AT_HWCAP) & HWCAP_V);
103- #else
104- rvv = 0 ;
105105#endif
106106
107- best = ::xsimd::rvv::version () * rvv;
108107#elif defined(__x86_64__) || defined(__i386__) || defined(_M_AMD64) || defined(_M_IX86)
109108 auto get_cpuid = [](int reg[4 ], int level, int count = 0 ) noexcept
110109 {
@@ -122,14 +121,12 @@ namespace xsimd
122121 __asm__ (" xchg{l}\t {%%}ebx, %1\n\t "
123122 " cpuid\n\t "
124123 " xchg{l}\t {%%}ebx, %1\n\t "
125- : " =a" (reg[0 ]), " =r" (reg[1 ]), " =c" (reg[2 ]),
126- " =d" (reg[3 ])
124+ : " =a" (reg[0 ]), " =r" (reg[1 ]), " =c" (reg[2 ]), " =d" (reg[3 ])
127125 : " 0" (level), " 2" (count));
128126
129127#else
130128 __asm__ (" cpuid\n\t "
131- : " =a" (reg[0 ]), " =b" (reg[1 ]), " =c" (reg[2 ]),
132- " =d" (reg[3 ])
129+ : " =a" (reg[0 ]), " =b" (reg[1 ]), " =c" (reg[2 ]), " =d" (reg[3 ])
133130 : " 0" (level), " 2" (count));
134131#endif
135132
@@ -143,87 +140,47 @@ namespace xsimd
143140 get_cpuid (regs1, 0x1 );
144141
145142 sse2 = regs1[3 ] >> 26 & 1 ;
146- best = std::max (best, sse2::version () * sse2);
147-
148143 sse3 = regs1[2 ] >> 0 & 1 ;
149- best = std::max (best, sse3::version () * sse3);
150-
151144 ssse3 = regs1[2 ] >> 9 & 1 ;
152- best = std::max (best, ssse3::version () * ssse3);
153-
154145 sse4_1 = regs1[2 ] >> 19 & 1 ;
155- best = std::max (best, sse4_1::version () * sse4_1);
156-
157146 sse4_2 = regs1[2 ] >> 20 & 1 ;
158- best = std::max (best, sse4_2::version () * sse4_2);
159-
160- fma3_sse = regs1[2 ] >> 12 & 1 ;
161- if (sse4_2)
162- best = std::max (best, fma3<xsimd::sse4_2>::version () * fma3_sse);
147+ fma3_sse42 = regs1[2 ] >> 12 & 1 ;
163148
164149 avx = regs1[2 ] >> 28 & 1 ;
165- best = std::max (best, avx::version () * avx);
166-
167- fma3_avx = avx && fma3_sse;
168- best = std::max (best, fma3<xsimd::avx>::version () * fma3_avx);
150+ fma3_avx = avx && fma3_sse42;
169151
170152 int regs8[4 ];
171153 get_cpuid (regs8, 0x80000001 );
172154 fma4 = regs8[2 ] >> 16 & 1 ;
173- best = std::max (best, fma4::version () * fma4);
174155
175156 // sse4a = regs[2] >> 6 & 1;
176- // best = std::max(best, XSIMD_X86_AMD_SSE4A_VERSION * sse4a);
177157
178158 // xop = regs[2] >> 11 & 1;
179- // best = std::max(best, XSIMD_X86_AMD_XOP_VERSION * xop);
180159
181160 int regs7[4 ];
182161 get_cpuid (regs7, 0x7 );
183162 avx2 = regs7[1 ] >> 5 & 1 ;
184- best = std::max (best, avx2::version () * avx2);
185163
186164 int regs7a[4 ];
187165 get_cpuid (regs7a, 0x7 , 0x1 );
188166 avxvnni = regs7a[0 ] >> 4 & 1 ;
189- best = std::max (best, avxvnni::version () * avxvnni * avx2);
190167
191- fma3_avx2 = avx2 && fma3_sse;
192- best = std::max (best, fma3<xsimd::avx2>::version () * fma3_avx2);
168+ fma3_avx2 = avx2 && fma3_sse42;
193169
194170 avx512f = regs7[1 ] >> 16 & 1 ;
195- best = std::max (best, avx512f::version () * avx512f);
196-
197171 avx512cd = regs7[1 ] >> 28 & 1 ;
198- best = std::max (best, avx512cd::version () * avx512cd * avx512f);
199-
200172 avx512dq = regs7[1 ] >> 17 & 1 ;
201- best = std::max (best, avx512dq::version () * avx512dq * avx512cd * avx512f);
202-
203173 avx512bw = regs7[1 ] >> 30 & 1 ;
204- best = std::max (best, avx512bw::version () * avx512bw * avx512dq * avx512cd * avx512f);
205-
206174 avx512er = regs7[1 ] >> 27 & 1 ;
207- best = std::max (best, avx512er::version () * avx512er * avx512cd * avx512f);
208-
209175 avx512pf = regs7[1 ] >> 26 & 1 ;
210- best = std::max (best, avx512pf::version () * avx512pf * avx512er * avx512cd * avx512f);
211-
212176 avx512ifma = regs7[1 ] >> 21 & 1 ;
213- best = std::max (best, avx512ifma::version () * avx512ifma * avx512bw * avx512dq * avx512cd * avx512f);
214-
215177 avx512vbmi = regs7[2 ] >> 1 & 1 ;
216- best = std::max (best, avx512vbmi::version () * avx512vbmi * avx512ifma * avx512bw * avx512dq * avx512cd * avx512f);
217-
218178 avx512vnni_bw = regs7[2 ] >> 11 & 1 ;
219- best = std::max (best, avx512vnni<xsimd::avx512bw>::version () * avx512vnni_bw * avx512bw * avx512dq * avx512cd * avx512f);
220-
221179 avx512vnni_vbmi = avx512vbmi && avx512vnni_bw;
222- best = std::max (best, avx512vnni<xsimd::avx512vbmi>::version () * avx512vnni_vbmi);
223180#endif
224181 }
225182 };
226- }
183+ } // namespace detail
227184
228185 inline detail::supported_arch available_architectures () noexcept
229186 {
0 commit comments