Skip to content

Commit 3a76747

Browse files
authored
[X86] Allow XOP rotate intrinsics to be used in constexpr (#157643)
Now that they wrap the __builtin_elementwise_fshl/fshr builtin intrinsics this is pretty trivial. Another step towards #153152 - I'll handle the AVX512 rotates next
1 parent 96d5567 commit 3a76747

File tree

2 files changed

+8
-4
lines changed

2 files changed

+8
-4
lines changed

clang/lib/Headers/xopintrin.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -208,25 +208,25 @@ _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
208208
return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);
209209
}
210210

211-
static __inline__ __m128i __DEFAULT_FN_ATTRS
211+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
212212
_mm_rot_epi8(__m128i __A, __m128i __B)
213213
{
214214
return (__m128i)__builtin_elementwise_fshl((__v16qu)__A, (__v16qu)__A, (__v16qu)__B);
215215
}
216216

217-
static __inline__ __m128i __DEFAULT_FN_ATTRS
217+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
218218
_mm_rot_epi16(__m128i __A, __m128i __B)
219219
{
220220
return (__m128i)__builtin_elementwise_fshl((__v8hu)__A, (__v8hu)__A, (__v8hu)__B);
221221
}
222222

223-
static __inline__ __m128i __DEFAULT_FN_ATTRS
223+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
224224
_mm_rot_epi32(__m128i __A, __m128i __B)
225225
{
226226
return (__m128i)__builtin_elementwise_fshl((__v4su)__A, (__v4su)__A, (__v4su)__B);
227227
}
228228

229-
static __inline__ __m128i __DEFAULT_FN_ATTRS
229+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
230230
_mm_rot_epi64(__m128i __A, __m128i __B)
231231
{
232232
return (__m128i)__builtin_elementwise_fshl((__v2du)__A, (__v2du)__A, (__v2du)__B);

clang/test/CodeGen/X86/xop-builtins.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,24 +215,28 @@ __m128i test_mm_rot_epi8(__m128i a, __m128i b) {
215215
// CHECK: call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
216216
return _mm_rot_epi8(a, b);
217217
}
218+
TEST_CONSTEXPR(match_v16qi(_mm_rot_epi8((__m128i)(__v16qs){15, -14, -13, -12, 11, 10, 9, 8, 7, 6, 5, -4, 3, -2, 1, 0}, (__m128i)(__v16qs){0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15}), 15, -27, -4, -89, -80, 65, 36, 4, 7, 12, 65, -25, 48, -33, 4, 0));
218219

219220
__m128i test_mm_rot_epi16(__m128i a, __m128i b) {
220221
// CHECK-LABEL: test_mm_rot_epi16
221222
// CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
222223
return _mm_rot_epi16(a, b);
223224
}
225+
TEST_CONSTEXPR(match_v8hi(_mm_rot_epi16((__m128i)(__v8hi){7, 6, 5, -4, 3, -2, 1, 0}, (__m128i)(__v8hi){0, 1, -2, 3, -4, 5, -6, 7}), 7, 12, 16385, -25, 12288, -33, 1024, 0));
224226

225227
__m128i test_mm_rot_epi32(__m128i a, __m128i b) {
226228
// CHECK-LABEL: test_mm_rot_epi32
227229
// CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
228230
return _mm_rot_epi32(a, b);
229231
}
232+
TEST_CONSTEXPR(match_v4si(_mm_rot_epi32((__m128i)(__v4si){3, -2, 1, 0}, (__m128i)(__v4si){0, 1, -2, 3}), 3, -3, 1073741824, 0));
230233

231234
__m128i test_mm_rot_epi64(__m128i a, __m128i b) {
232235
// CHECK-LABEL: test_mm_rot_epi64
233236
// CHECK: call {{.*}}<2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
234237
return _mm_rot_epi64(a, b);
235238
}
239+
TEST_CONSTEXPR(match_v2di(_mm_rot_epi64((__m128i)(__v2di){99, -55}, (__m128i)(__v2di){1, -2}), 198, 9223372036854775794LL));
236240

237241
__m128i test_mm_roti_epi8(__m128i a) {
238242
// CHECK-LABEL: test_mm_roti_epi8

0 commit comments

Comments
 (0)