Skip to content

Commit 1532653

Browse files
ethomagmr-c
authored andcommitted
x86: Added some bmi1 and bmi2 intrinsics
1 parent 99fef32 commit 1532653

File tree

6 files changed

+601
-0
lines changed

6 files changed

+601
-0
lines changed

meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,7 @@ if not meson.is_subproject()
501501
'simde/x86/avx512.h',
502502
'simde/x86/avx2.h',
503503
'simde/x86/avx.h',
504+
'simde/x86/bmi.h',
504505
'simde/x86/clmul.h',
505506
'simde/x86/fma.h',
506507
'simde/x86/f16c.h',

simde/simde-arch.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,9 +166,15 @@
166166
#if defined(__ARM_FEATURE_SVE) && __ARM_FEATURE_SVE
167167
# define SIMDE_ARCH_ARM_SVE
168168
#endif
169+
#if defined(__ARM_FEATURE_SVE2) && __ARM_FEATURE_SVE2
170+
# define SIMDE_ARCH_ARM_SVE2
171+
#endif
169172
#if defined(__ARM_FEATURE_QRDMX) && __ARM_FEATURE_QRDMX
170173
# define SIMDE_ARCH_ARM_QRDMX
171174
#endif
175+
#if defined(__ARM_FEATURE_SVE2_BITPERM) && __ARM_FEATURE_SVE2_BITPERM
176+
# define SIMDE_ARCH_ARM_SVE2_BITPERM
177+
#endif
172178

173179
/* Blackfin
174180
<https://en.wikipedia.org/wiki/Blackfin> */
@@ -387,6 +393,12 @@
387393
# if defined(__AES__)
388394
# define SIMDE_ARCH_X86_AES 1
389395
# endif
396+
# if defined(__BMI__)
397+
# define SIMDE_ARCH_X86_BMI1 1
398+
# endif
399+
# if defined(__BMI2__)
400+
# define SIMDE_ARCH_X86_BMI2 1
401+
# endif
390402
#endif
391403

392404
/* Itanium

simde/simde-features.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,18 @@
299299
#endif
300300
#endif
301301

302+
#if !defined(SIMDE_X86_BMI1_NATIVE) && !defined(SIMDE_X86_BMI1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
303+
#if defined(SIMDE_ARCH_X86_BMI1)
304+
#define SIMDE_X86_BMI1_NATIVE
305+
#endif
306+
#endif
307+
308+
#if !defined(SIMDE_X86_BMI2_NATIVE) && !defined(SIMDE_X86_BMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
309+
#if defined(SIMDE_ARCH_X86_BMI2)
310+
#define SIMDE_X86_BMI2_NATIVE
311+
#endif
312+
#endif
313+
302314
#if defined(HEDLEY_MSVC_VERSION)
303315
#pragma warning(push)
304316
#pragma warning(disable:4799)
@@ -379,6 +391,13 @@
379391
#endif
380392
#endif
381393

394+
#if !defined(SIMDE_ARM_SVE2_NATIVE) && !defined(SIMDE_ARM_SVE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
395+
#if defined(SIMDE_ARCH_ARM_SVE2)
396+
#define SIMDE_ARM_SVE2_NATIVE
397+
#include <arm_sve.h>
398+
#endif
399+
#endif
400+
382401
#if !defined(SIMDE_RISCV_V_NATIVE) && !defined(SIMDE_RISCV_V_NO_NATIVE) && !defined(SIMDE_NO_NATIVE)
383402
#if defined(SIMDE_ARCH_RISCV_V) && defined(__riscv_v_fixed_vlen)
384403
#define SIMDE_RISCV_V_NATIVE
@@ -680,6 +699,12 @@
680699
#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE)
681700
#define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES
682701
#endif
702+
#if !defined(SIMDE_X86_BMI1_NATIVE)
703+
#define SIMDE_X86_BMI1_ENABLE_NATIVE_ALIASES
704+
#endif
705+
#if !defined(SIMDE_X86_BMI2_NATIVE)
706+
#define SIMDE_X86_BMI2_ENABLE_NATIVE_ALIASES
707+
#endif
683708
#if !defined(SIMDE_X86_F16C_NATIVE)
684709
#define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES
685710
#endif

simde/x86/bmi.h

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
/* SPDX-License-Identifier: MIT
2+
*
3+
* Permission is hereby granted, free of charge, to any person
4+
* obtaining a copy of this software and associated documentation
5+
* files (the "Software"), to deal in the Software without
6+
* restriction, including without limitation the rights to use, copy,
7+
* modify, merge, publish, distribute, sublicense, and/or sell copies
8+
* of the Software, and to permit persons to whom the Software is
9+
* furnished to do so, subject to the following conditions:
10+
*
11+
* The above copyright notice and this permission notice shall be
12+
* included in all copies or substantial portions of the Software.
13+
*
14+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16+
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17+
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18+
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19+
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20+
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
* SOFTWARE.
22+
*
23+
* Copyright:
24+
* 2025 Thomas Magnusson <herr.thomas.magnusson@gmail.com>
25+
*/
26+
27+
#if !defined(SIMDE_X86_BMI_H)
28+
#define SIMDE_X86_BMI_H
29+
30+
SIMDE_FUNCTION_ATTRIBUTES
31+
uint32_t
32+
simde_pext_u32(uint32_t a, uint32_t mask)
33+
{
34+
#if defined(SIMDE_ARCH_ARM_SVE2_BITPERM) && defined(SIMDE_ARM_SVE2_NATIVE)
35+
svuint32_t va = svdup_u32(a);
36+
svuint32_t r = svbext_n_u32(va, mask);
37+
return svlastb_u32(svptrue_b32(), r);
38+
#else
39+
uint32_t r = 0;
40+
int k = 0;
41+
for (uint32_t m = mask; m; m >>= 1, a >>= 1) {
42+
if (m & 1) {
43+
r |= (a & 1) << k++;
44+
}
45+
}
46+
return r;
47+
#endif
48+
}
49+
#if defined(SIMDE_X86_BMI2_NATIVE)
50+
#define simde_pext_u32(a, mask) _pext_u32(a, mask)
51+
#endif
52+
#if defined(SIMDE_X86_BMI2_ENABLE_NATIVE_ALIASES)
53+
#undef _pext_u32
54+
#define _pext_u32(a, mask) simde_pext_u32(a, mask)
55+
#endif
56+
57+
SIMDE_FUNCTION_ATTRIBUTES
58+
uint64_t
59+
simde_pext_u64(uint64_t a, uint64_t mask)
60+
{
61+
#if defined(SIMDE_ARCH_ARM_SVE2_BITPERM) && defined(SIMDE_ARM_SVE2_NATIVE)
62+
svuint64_t va = svdup_u64(a);
63+
svuint64_t r = svbext_n_u64(va, mask);
64+
return svlastb_u64(svptrue_b64(), r);
65+
#else
66+
uint64_t r = 0;
67+
int k = 0;
68+
for (uint64_t m = mask; m; m >>= 1, a >>= 1) {
69+
if (m & 1) {
70+
r |= (a & 1) << k++;
71+
}
72+
}
73+
return r;
74+
#endif
75+
}
76+
#if defined(SIMDE_X86_BMI2_NATIVE)
77+
#define simde_pext_u64(a, mask) _pext_u64(a, mask)
78+
#endif
79+
#if defined(SIMDE_X86_BMI2_ENABLE_NATIVE_ALIASES)
80+
#undef _pext_u64
81+
#define _pext_u64(a, mask) simde_pext_u64(a, mask)
82+
#endif
83+
84+
SIMDE_FUNCTION_ATTRIBUTES
85+
uint32_t
86+
simde_pdep_u32(uint32_t a, uint32_t mask)
87+
{
88+
#if defined(SIMDE_ARCH_ARM_SVE2_BITPERM) && defined(SIMDE_ARM_SVE2_NATIVE)
89+
svuint32_t va = svdup_u32(a);
90+
svuint32_t r = svbdep_n_u32(va, mask);
91+
return svlastb_u32(svptrue_b32(), r);
92+
#else
93+
uint32_t r = 0;
94+
int k = 0;
95+
for (uint32_t m = mask; m; m >>= 1, k++) {
96+
if (m & 1) {
97+
r |= (a & 1) << k;
98+
a >>= 1;
99+
}
100+
}
101+
return r;
102+
#endif
103+
}
104+
#if defined(SIMDE_X86_BMI2_NATIVE)
105+
#define simde_pdep_u32(a, mask) _pdep_u32(a, mask)
106+
#endif
107+
#if defined(SIMDE_X86_BMI2_ENABLE_NATIVE_ALIASES)
108+
#undef _pdep_u32
109+
#define _pdep_u32(a, mask) simde_pdep_u32(a, mask)
110+
#endif
111+
112+
SIMDE_FUNCTION_ATTRIBUTES
113+
uint64_t
114+
simde_pdep_u64(uint64_t a, uint64_t mask)
115+
{
116+
#if defined(SIMDE_ARCH_ARM_SVE2_BITPERM) && defined(SIMDE_ARM_SVE2_NATIVE)
117+
svuint64_t va = svdup_u64(a);
118+
svuint64_t r = svbdep_n_u64(va, mask);
119+
return svlastb_u64(svptrue_b64(), r);
120+
#else
121+
uint64_t r = 0;
122+
int k = 0;
123+
for (uint64_t m = mask; m; m >>= 1, k++) {
124+
if (m & 1) {
125+
r |= (a & 1) << k;
126+
a >>= 1;
127+
}
128+
}
129+
return r;
130+
#endif
131+
}
132+
#if defined(SIMDE_X86_BMI2_NATIVE)
133+
#define simde_pdep_u64(a, mask) _pdep_u64(a, mask)
134+
#endif
135+
#if defined(SIMDE_X86_BMI2_ENABLE_NATIVE_ALIASES)
136+
#undef _pdep_u64
137+
#define _pdep_u64(a, mask) simde_pdep_u64(a, mask)
138+
#endif
139+
140+
SIMDE_FUNCTION_ATTRIBUTES
141+
uint32_t
142+
simde_bextr_u32(uint32_t a, unsigned int start, unsigned int len) {
143+
const uint32_t mask = 0xffffffffu;
144+
if (len == 0 || start >= 32) return 0;
145+
if (len > 32 - start) len = 32 - start;
146+
return (a >> start) & (mask >> (32 - len));
147+
}
148+
#if defined(SIMDE_X86_BMI1_NATIVE)
149+
#define simde_bextr_u32(a, start, len) _bextr_u32(a, start, len)
150+
#endif
151+
#if defined(SIMDE_X86_BMI1_ENABLE_NATIVE_ALIASES)
152+
#undef _bextr_u32
153+
#define _bextr_u32(a, start, len) simde_bextr_u32(a, start, len)
154+
#endif
155+
156+
SIMDE_FUNCTION_ATTRIBUTES
157+
uint64_t
158+
simde_bextr_u64(uint64_t a, unsigned int start, unsigned int len) {
159+
const uint64_t mask = 0xffffffffffffffffull;
160+
if (len == 0 || start >= 64) return 0;
161+
if (len > 64 - start) len = 64 - start;
162+
return (a >> start) & (mask >> (64 - len));
163+
}
164+
#if defined(SIMDE_X86_BMI1_NATIVE)
165+
#define simde_bextr_u64(a, start, len) _bextr_u64(a, start, len)
166+
#endif
167+
#if defined(SIMDE_X86_BMI1_ENABLE_NATIVE_ALIASES)
168+
#undef _bextr_u64
169+
#define _bextr_u64(a, start, len) simde_bextr_u64(a, start, len)
170+
#endif
171+
172+
#endif /* !defined(SIMDE_X86_BMI_H) */

0 commit comments

Comments
 (0)