Skip to content

Commit 9adc4f9

Browse files
authored
[X86] Enable MMX unpcklo/unpckhi intrinsics in constexpr (#154149)
Matches behaviour in SSE/AVX/AVX512 intrinsics - was missed in #153028
1 parent ddcd3fd commit 9adc4f9

File tree

2 files changed

+26
-28
lines changed

2 files changed

+26
-28
lines changed

clang/lib/Headers/mmintrin.h

Lines changed: 20 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -242,11 +242,10 @@ _mm_packs_pu16(__m64 __m1, __m64 __m2)
242242
/// Bits [63:56] are written to bits [63:56] of the result.
243243
/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
244244
/// values.
245-
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
246-
_mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
247-
{
248-
return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2,
249-
4, 12, 5, 13, 6, 14, 7, 15);
245+
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
246+
_mm_unpackhi_pi8(__m64 __m1, __m64 __m2) {
247+
return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2, 4, 12, 5,
248+
13, 6, 14, 7, 15);
250249
}
251250

252251
/// Unpacks the upper 32 bits from two 64-bit integer vectors of
@@ -266,11 +265,9 @@ _mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
266265
/// Bits [63:48] are written to bits [63:48] of the result.
267266
/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
268267
/// values.
269-
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
270-
_mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
271-
{
272-
return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2,
273-
2, 6, 3, 7);
268+
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
269+
_mm_unpackhi_pi16(__m64 __m1, __m64 __m2) {
270+
return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2, 2, 6, 3, 7);
274271
}
275272

276273
/// Unpacks the upper 32 bits from two 64-bit integer vectors of
@@ -288,10 +285,9 @@ _mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
288285
/// the upper 32 bits of the result.
289286
/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
290287
/// values.
291-
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
292-
_mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
293-
{
294-
return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 1, 3);
288+
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
289+
_mm_unpackhi_pi32(__m64 __m1, __m64 __m2) {
290+
return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 1, 3);
295291
}
296292

297293
/// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]
@@ -315,11 +311,10 @@ _mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
315311
/// Bits [31:24] are written to bits [63:56] of the result.
316312
/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
317313
/// values.
318-
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
319-
_mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
320-
{
321-
return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2,
322-
0, 8, 1, 9, 2, 10, 3, 11);
314+
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
315+
_mm_unpacklo_pi8(__m64 __m1, __m64 __m2) {
316+
return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2, 0, 8, 1, 9,
317+
2, 10, 3, 11);
323318
}
324319

325320
/// Unpacks the lower 32 bits from two 64-bit integer vectors of
@@ -339,11 +334,9 @@ _mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
339334
/// Bits [31:16] are written to bits [63:48] of the result.
340335
/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
341336
/// values.
342-
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
343-
_mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
344-
{
345-
return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2,
346-
0, 4, 1, 5);
337+
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
338+
_mm_unpacklo_pi16(__m64 __m1, __m64 __m2) {
339+
return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2, 0, 4, 1, 5);
347340
}
348341

349342
/// Unpacks the lower 32 bits from two 64-bit integer vectors of
@@ -361,10 +354,9 @@ _mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
361354
/// the upper 32 bits of the result.
362355
/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
363356
/// values.
364-
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
365-
_mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
366-
{
367-
return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 0, 2);
357+
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
358+
_mm_unpacklo_pi32(__m64 __m1, __m64 __m2) {
359+
return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 0, 2);
368360
}
369361

370362
/// Adds each 8-bit integer element of the first 64-bit integer vector

clang/test/CodeGen/X86/mmx-builtins.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -764,36 +764,42 @@ __m64 test_mm_unpackhi_pi8(__m64 a, __m64 b) {
764764
// CHECK: shufflevector <8 x i8> {{%.*}}, <8 x i8> {{%.*}}, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
765765
return _mm_unpackhi_pi8(a, b);
766766
}
767+
TEST_CONSTEXPR(match_v8qi(_mm_unpackhi_pi8((__m64)(__v8qi){0, 1, 2, 3, 4, 5, 6, 7}, (__m64)(__v8qi){8, 9, 10, 11, 12, 13, 14, 15}), 4, 12, 5, 13, 6, 14, 7, 15));
767768

768769
__m64 test_mm_unpackhi_pi16(__m64 a, __m64 b) {
769770
// CHECK-LABEL: test_mm_unpackhi_pi16
770771
// CHECK: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
771772
return _mm_unpackhi_pi16(a, b);
772773
}
774+
TEST_CONSTEXPR(match_v4hi(_mm_unpackhi_pi16((__m64)(__v4hi){0, 1, 2, 3}, (__m64)(__v4hi){ 4, 5, 6, 7}), 2, 6, 3, 7));
773775

774776
__m64 test_mm_unpackhi_pi32(__m64 a, __m64 b) {
775777
// CHECK-LABEL: test_mm_unpackhi_pi32
776778
// CHECK: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <2 x i32> <i32 1, i32 3>
777779
return _mm_unpackhi_pi32(a, b);
778780
}
781+
TEST_CONSTEXPR(match_v2si(_mm_unpackhi_pi32((__m64)(__v2si){0, 1}, (__m64)(__v2si){2, 3}), 1, 3));
779782

780783
__m64 test_mm_unpacklo_pi8(__m64 a, __m64 b) {
781784
// CHECK-LABEL: test_mm_unpacklo_pi8
782785
// CHECK: shufflevector <8 x i8> {{%.*}}, <8 x i8> {{%.*}}, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
783786
return _mm_unpacklo_pi8(a, b);
784787
}
788+
TEST_CONSTEXPR(match_v8qi(_mm_unpacklo_pi8((__m64)(__v8qi){0, 1, 2, 3, 4, 5, 6, 7}, (__m64)(__v8qi){8, 9, 10, 11, 12, 13, 14, 15}), 0, 8, 1, 9, 2, 10, 3, 11));
785789

786790
__m64 test_mm_unpacklo_pi16(__m64 a, __m64 b) {
787791
// CHECK-LABEL: test_mm_unpacklo_pi16
788792
// CHECK: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
789793
return _mm_unpacklo_pi16(a, b);
790794
}
795+
TEST_CONSTEXPR(match_v4hi(_mm_unpacklo_pi16((__m64)(__v4hi){0, 1, 2, 3}, (__m64)(__v4hi){ 4, 5, 6, 7}), 0, 4, 1, 5));
791796

792797
__m64 test_mm_unpacklo_pi32(__m64 a, __m64 b) {
793798
// CHECK-LABEL: test_mm_unpacklo_pi32
794799
// CHECK: shufflevector <2 x i32> {{%.*}}, <2 x i32> {{%.*}}, <2 x i32> <i32 0, i32 2>
795800
return _mm_unpacklo_pi32(a, b);
796801
}
802+
TEST_CONSTEXPR(match_v2si(_mm_unpacklo_pi32((__m64)(__v2si){0, 1}, (__m64)(__v2si){2, 3}), 0, 2));
797803

798804
__m64 test_mm_xor_si64(__m64 a, __m64 b) {
799805
// CHECK-LABEL: test_mm_xor_si64

0 commit comments

Comments
 (0)