Skip to content

Commit 3e462e9

Browse files
committed
Allow SSE/AVX/AVX512 unpck intrinsics to be used in constexpr
1 parent 40f4e9e commit 3e462e9

File tree

6 files changed

+60
-52
lines changed

6 files changed

+60
-52
lines changed

clang/lib/Headers/avx2intrin.h

Lines changed: 16 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2720,9 +2720,8 @@ _mm256_subs_epu16(__m256i __a, __m256i __b)
27202720
/// A 256-bit integer vector used as the source for the odd-numbered bytes
27212721
/// of the result.
27222722
/// \returns A 256-bit integer vector containing the result.
2723-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
2724-
_mm256_unpackhi_epi8(__m256i __a, __m256i __b)
2725-
{
2723+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2724+
_mm256_unpackhi_epi8(__m256i __a, __m256i __b) {
27262725
return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 8, 32+8, 9, 32+9, 10, 32+10, 11, 32+11, 12, 32+12, 13, 32+13, 14, 32+14, 15, 32+15, 24, 32+24, 25, 32+25, 26, 32+26, 27, 32+27, 28, 32+28, 29, 32+29, 30, 32+30, 31, 32+31);
27272726
}
27282727

@@ -2755,9 +2754,8 @@ _mm256_unpackhi_epi8(__m256i __a, __m256i __b)
27552754
/// A 256-bit vector of [16 x i16] used as the source for the odd-numbered
27562755
/// elements of the result.
27572756
/// \returns A 256-bit vector of [16 x i16] containing the result.
2758-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
2759-
_mm256_unpackhi_epi16(__m256i __a, __m256i __b)
2760-
{
2757+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2758+
_mm256_unpackhi_epi16(__m256i __a, __m256i __b) {
27612759
return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
27622760
}
27632761

@@ -2789,9 +2787,8 @@ _mm256_unpackhi_epi16(__m256i __a, __m256i __b)
27892787
/// A 256-bit vector of [8 x i32] used as the source for the odd-numbered
27902788
/// elements of the result.
27912789
/// \returns A 256-bit vector of [8 x i32] containing the result.
2792-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
2793-
_mm256_unpackhi_epi32(__m256i __a, __m256i __b)
2794-
{
2790+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2791+
_mm256_unpackhi_epi32(__m256i __a, __m256i __b) {
27952792
return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 2, 8+2, 3, 8+3, 6, 8+6, 7, 8+7);
27962793
}
27972794

@@ -2819,9 +2816,8 @@ _mm256_unpackhi_epi32(__m256i __a, __m256i __b)
28192816
/// A 256-bit vector of [4 x i64] used as the source for the odd-numbered
28202817
/// elements of the result.
28212818
/// \returns A 256-bit vector of [4 x i64] containing the result.
2822-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
2823-
_mm256_unpackhi_epi64(__m256i __a, __m256i __b)
2824-
{
2819+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2820+
_mm256_unpackhi_epi64(__m256i __a, __m256i __b) {
28252821
return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 1, 4+1, 3, 4+3);
28262822
}
28272823

@@ -2853,9 +2849,8 @@ _mm256_unpackhi_epi64(__m256i __a, __m256i __b)
28532849
/// A 256-bit integer vector used as the source for the odd-numbered bytes
28542850
/// of the result.
28552851
/// \returns A 256-bit integer vector containing the result.
2856-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
2857-
_mm256_unpacklo_epi8(__m256i __a, __m256i __b)
2858-
{
2852+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2853+
_mm256_unpacklo_epi8(__m256i __a, __m256i __b) {
28592854
return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 0, 32+0, 1, 32+1, 2, 32+2, 3, 32+3, 4, 32+4, 5, 32+5, 6, 32+6, 7, 32+7, 16, 32+16, 17, 32+17, 18, 32+18, 19, 32+19, 20, 32+20, 21, 32+21, 22, 32+22, 23, 32+23);
28602855
}
28612856

@@ -2888,9 +2883,8 @@ _mm256_unpacklo_epi8(__m256i __a, __m256i __b)
28882883
/// A 256-bit vector of [16 x i16] used as the source for the odd-numbered
28892884
/// elements of the result.
28902885
/// \returns A 256-bit vector of [16 x i16] containing the result.
2891-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
2892-
_mm256_unpacklo_epi16(__m256i __a, __m256i __b)
2893-
{
2886+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2887+
_mm256_unpacklo_epi16(__m256i __a, __m256i __b) {
28942888
return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11);
28952889
}
28962890

@@ -2922,9 +2916,8 @@ _mm256_unpacklo_epi16(__m256i __a, __m256i __b)
29222916
/// A 256-bit vector of [8 x i32] used as the source for the odd-numbered
29232917
/// elements of the result.
29242918
/// \returns A 256-bit vector of [8 x i32] containing the result.
2925-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
2926-
_mm256_unpacklo_epi32(__m256i __a, __m256i __b)
2927-
{
2919+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2920+
_mm256_unpacklo_epi32(__m256i __a, __m256i __b) {
29282921
return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 0, 8+0, 1, 8+1, 4, 8+4, 5, 8+5);
29292922
}
29302923

@@ -2952,9 +2945,8 @@ _mm256_unpacklo_epi32(__m256i __a, __m256i __b)
29522945
/// A 256-bit vector of [4 x i64] used as the source for the odd-numbered
29532946
/// elements of the result.
29542947
/// \returns A 256-bit vector of [4 x i64] containing the result.
2955-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
2956-
_mm256_unpacklo_epi64(__m256i __a, __m256i __b)
2957-
{
2948+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
2949+
_mm256_unpacklo_epi64(__m256i __a, __m256i __b) {
29582950
return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 0, 4+0, 2, 4+2);
29592951
}
29602952

clang/lib/Headers/avx512fintrin.h

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4203,9 +4203,8 @@ _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
42034203
(__v16sf)_mm512_setzero_ps());
42044204
}
42054205

4206-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
4207-
_mm512_unpackhi_epi32(__m512i __A, __m512i __B)
4208-
{
4206+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4207+
_mm512_unpackhi_epi32(__m512i __A, __m512i __B) {
42094208
return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
42104209
2, 18, 3, 19,
42114210
2+4, 18+4, 3+4, 19+4,
@@ -4229,9 +4228,8 @@ _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
42294228
(__v16si)_mm512_setzero_si512());
42304229
}
42314230

4232-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
4233-
_mm512_unpacklo_epi32(__m512i __A, __m512i __B)
4234-
{
4231+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4232+
_mm512_unpacklo_epi32(__m512i __A, __m512i __B) {
42354233
return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
42364234
0, 16, 1, 17,
42374235
0+4, 16+4, 1+4, 17+4,
@@ -4255,9 +4253,8 @@ _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
42554253
(__v16si)_mm512_setzero_si512());
42564254
}
42574255

4258-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
4259-
_mm512_unpackhi_epi64(__m512i __A, __m512i __B)
4260-
{
4256+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4257+
_mm512_unpackhi_epi64(__m512i __A, __m512i __B) {
42614258
return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
42624259
1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
42634260
}
@@ -4278,9 +4275,8 @@ _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
42784275
(__v8di)_mm512_setzero_si512());
42794276
}
42804277

4281-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
4282-
_mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
4283-
{
4278+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
4279+
_mm512_unpacklo_epi64(__m512i __A, __m512i __B) {
42844280
return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
42854281
0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
42864282
}

clang/lib/Headers/emmintrin.h

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4417,8 +4417,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a) {
44174417
/// Bits [119:112] are written to bits [111:104] of the result. \n
44184418
/// Bits [127:120] are written to bits [127:120] of the result.
44194419
/// \returns A 128-bit vector of [16 x i8] containing the interleaved values.
4420-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi8(__m128i __a,
4421-
__m128i __b) {
4420+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
4421+
_mm_unpackhi_epi8(__m128i __a, __m128i __b) {
44224422
return (__m128i)__builtin_shufflevector(
44234423
(__v16qi)__a, (__v16qi)__b, 8, 16 + 8, 9, 16 + 9, 10, 16 + 10, 11,
44244424
16 + 11, 12, 16 + 12, 13, 16 + 13, 14, 16 + 14, 15, 16 + 15);
@@ -4445,8 +4445,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi8(__m128i __a,
44454445
/// Bits [111:96] are written to bits [95:80] of the result. \n
44464446
/// Bits [127:112] are written to bits [127:112] of the result.
44474447
/// \returns A 128-bit vector of [8 x i16] containing the interleaved values.
4448-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi16(__m128i __a,
4449-
__m128i __b) {
4448+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
4449+
_mm_unpackhi_epi16(__m128i __a, __m128i __b) {
44504450
return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8 + 4, 5,
44514451
8 + 5, 6, 8 + 6, 7, 8 + 7);
44524452
}
@@ -4468,8 +4468,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi16(__m128i __a,
44684468
/// Bits [95:64] are written to bits [64:32] of the destination. \n
44694469
/// Bits [127:96] are written to bits [127:96] of the destination.
44704470
/// \returns A 128-bit vector of [4 x i32] containing the interleaved values.
4471-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi32(__m128i __a,
4472-
__m128i __b) {
4471+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
4472+
_mm_unpackhi_epi32(__m128i __a, __m128i __b) {
44734473
return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4 + 2, 3,
44744474
4 + 3);
44754475
}
@@ -4489,8 +4489,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi32(__m128i __a,
44894489
/// A 128-bit vector of [2 x i64]. \n
44904490
/// Bits [127:64] are written to bits [127:64] of the destination.
44914491
/// \returns A 128-bit vector of [2 x i64] containing the interleaved values.
4492-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi64(__m128i __a,
4493-
__m128i __b) {
4492+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
4493+
_mm_unpackhi_epi64(__m128i __a, __m128i __b) {
44944494
return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 1, 2 + 1);
44954495
}
44964496

@@ -4523,8 +4523,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi64(__m128i __a,
45234523
/// Bits [55:48] are written to bits [111:104] of the result. \n
45244524
/// Bits [63:56] are written to bits [127:120] of the result.
45254525
/// \returns A 128-bit vector of [16 x i8] containing the interleaved values.
4526-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi8(__m128i __a,
4527-
__m128i __b) {
4526+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
4527+
_mm_unpacklo_epi8(__m128i __a, __m128i __b) {
45284528
return (__m128i)__builtin_shufflevector(
45294529
(__v16qi)__a, (__v16qi)__b, 0, 16 + 0, 1, 16 + 1, 2, 16 + 2, 3, 16 + 3, 4,
45304530
16 + 4, 5, 16 + 5, 6, 16 + 6, 7, 16 + 7);
@@ -4552,8 +4552,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi8(__m128i __a,
45524552
/// Bits [47:32] are written to bits [95:80] of the result. \n
45534553
/// Bits [63:48] are written to bits [127:112] of the result.
45544554
/// \returns A 128-bit vector of [8 x i16] containing the interleaved values.
4555-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi16(__m128i __a,
4556-
__m128i __b) {
4555+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
4556+
_mm_unpacklo_epi16(__m128i __a, __m128i __b) {
45574557
return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8 + 0, 1,
45584558
8 + 1, 2, 8 + 2, 3, 8 + 3);
45594559
}
@@ -4575,8 +4575,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi16(__m128i __a,
45754575
/// Bits [31:0] are written to bits [64:32] of the destination. \n
45764576
/// Bits [63:32] are written to bits [127:96] of the destination.
45774577
/// \returns A 128-bit vector of [4 x i32] containing the interleaved values.
4578-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi32(__m128i __a,
4579-
__m128i __b) {
4578+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
4579+
_mm_unpacklo_epi32(__m128i __a, __m128i __b) {
45804580
return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4 + 0, 1,
45814581
4 + 1);
45824582
}
@@ -4596,8 +4596,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi32(__m128i __a,
45964596
/// A 128-bit vector of [2 x i64]. \n
45974597
/// Bits [63:0] are written to bits [127:64] of the destination. \n
45984598
/// \returns A 128-bit vector of [2 x i64] containing the interleaved values.
4599-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi64(__m128i __a,
4600-
__m128i __b) {
4599+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
4600+
_mm_unpacklo_epi64(__m128i __a, __m128i __b) {
46014601
return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 0, 2 + 0);
46024602
}
46034603

clang/test/CodeGen/X86/avx2-builtins.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1319,48 +1319,56 @@ __m256i test_mm256_unpackhi_epi8(__m256i a, __m256i b) {
13191319
// CHECK: shufflevector <32 x i8> %{{.*}}, <32 x i8> %{{.*}}, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
13201320
return _mm256_unpackhi_epi8(a, b);
13211321
}
1322+
TEST_CONSTEXPR(match_v32qi(_mm256_unpackhi_epi8((__m256i)(__v32qi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, (__m256i)(__v32qi){32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}), 8, 40, 9, 41, 10, 42, 11, 43, 12, 44, 13, 45, 14, 46, 15, 47, 24, 56, 25, 57, 26, 58, 27, 59, 28, 60, 29, 61, 30, 62, 31, 63));
13221323

13231324
__m256i test_mm256_unpackhi_epi16(__m256i a, __m256i b) {
13241325
// CHECK-LABEL: test_mm256_unpackhi_epi16
13251326
// CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
13261327
return _mm256_unpackhi_epi16(a, b);
13271328
}
1329+
TEST_CONSTEXPR(match_v16hi(_mm256_unpackhi_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m256i)(__v16hi){16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}), 4, 20, 5, 21, 6, 22, 7, 23, 12, 28, 13, 29, 14, 30, 15, 31));
13281330

13291331
__m256i test_mm256_unpackhi_epi32(__m256i a, __m256i b) {
13301332
// CHECK-LABEL: test_mm256_unpackhi_epi32
13311333
// CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
13321334
return _mm256_unpackhi_epi32(a, b);
13331335
}
1336+
TEST_CONSTEXPR(match_v8si(_mm256_unpackhi_epi32((__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, (__m256i)(__v8si){8, 9, 10, 11, 12, 13, 14, 15}), 2, 10, 3, 11, 6, 14, 7, 15));
13341337

13351338
__m256i test_mm256_unpackhi_epi64(__m256i a, __m256i b) {
13361339
// CHECK-LABEL: test_mm256_unpackhi_epi64
13371340
// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
13381341
return _mm256_unpackhi_epi64(a, b);
13391342
}
1343+
TEST_CONSTEXPR(match_v4di(_mm256_unpackhi_epi64((__m256i)(__v4di){0, 1, 2, 3}, (__m256i)(__v4di){ 4, 5, 6, 7}), 1, 5, 3, 7));
13401344

13411345
__m256i test_mm256_unpacklo_epi8(__m256i a, __m256i b) {
13421346
// CHECK-LABEL: test_mm256_unpacklo_epi8
13431347
// CHECK: shufflevector <32 x i8> %{{.*}}, <32 x i8> %{{.*}}, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
13441348
return _mm256_unpacklo_epi8(a, b);
13451349
}
1350+
TEST_CONSTEXPR(match_v32qi(_mm256_unpacklo_epi8((__m256i)(__v32qi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, (__m256i)(__v32qi){32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}), 0, 32, 1, 33, 2, 34, 3, 35, 4, 36, 5, 37, 6, 38, 7, 39, 16, 48, 17, 49, 18, 50, 19, 51, 20, 52, 21, 53, 22, 54, 23, 55));
13461351

13471352
__m256i test_mm256_unpacklo_epi16(__m256i a, __m256i b) {
13481353
// CHECK-LABEL: test_mm256_unpacklo_epi16
13491354
// CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
13501355
return _mm256_unpacklo_epi16(a, b);
13511356
}
1357+
TEST_CONSTEXPR(match_v16hi(_mm256_unpacklo_epi16((__m256i)(__v16hi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m256i)(__v16hi){16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}), 0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27));
13521358

13531359
__m256i test_mm256_unpacklo_epi32(__m256i a, __m256i b) {
13541360
// CHECK-LABEL: test_mm256_unpacklo_epi32
13551361
// CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
13561362
return _mm256_unpacklo_epi32(a, b);
13571363
}
1364+
TEST_CONSTEXPR(match_v8si(_mm256_unpacklo_epi32((__m256i)(__v8si){0, 1, 2, 3, 4, 5, 6, 7}, (__m256i)(__v8si){ 8, 9, 10, 11, 12, 13, 14, 15}), 0, 8, 1, 9, 4, 12, 5, 13));
13581365

13591366
__m256i test_mm256_unpacklo_epi64(__m256i a, __m256i b) {
13601367
// CHECK-LABEL: test_mm256_unpacklo_epi64
13611368
// CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
13621369
return _mm256_unpacklo_epi64(a, b);
13631370
}
1371+
TEST_CONSTEXPR(match_v4di(_mm256_unpacklo_epi64((__m256i)(__v4di){0, 1, 2, 3}, (__m256i)(__v4di){ 4, 5, 6, 7}), 0, 4, 2, 6));
13641372

13651373
__m256i test_mm256_xor_si256(__m256i a, __m256i b) {
13661374
// CHECK-LABEL: test_mm256_xor_si256

clang/test/CodeGen/X86/avx512f-builtins.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4691,6 +4691,7 @@ __m512i test_mm512_unpackhi_epi32(__m512i __A, __m512i __B) {
46914691
// CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
46924692
return _mm512_unpackhi_epi32(__A, __B);
46934693
}
4694+
TEST_CONSTEXPR(match_v16si(_mm512_unpackhi_epi32((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m512i)(__v16si){16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}), 2, 18, 3, 19, 6, 22, 7, 23, 10, 26, 11, 27, 14, 30, 15, 31));
46944695

46954696
__m512d test_mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B) {
46964697
// CHECK-LABEL: test_mm512_maskz_unpackhi_pd
@@ -5477,6 +5478,7 @@ __m512i test_mm512_unpackhi_epi64(__m512i __A, __m512i __B) {
54775478
// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
54785479
return _mm512_unpackhi_epi64(__A, __B);
54795480
}
5481+
TEST_CONSTEXPR(match_m512i(_mm512_unpackhi_epi64((__m512i){0, 1, 2, 3, 4, 5, 6, 7}, (__m512i){8, 9, 10, 11, 12, 13, 14, 15}), 1, 9, 3, 11, 5, 13, 7, 15));
54805482

54815483
__m512i test_mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
54825484
// CHECK-LABEL: test_mm512_mask_unpackhi_epi64
@@ -5497,6 +5499,7 @@ __m512i test_mm512_unpacklo_epi32(__m512i __A, __m512i __B) {
54975499
// CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
54985500
return _mm512_unpacklo_epi32(__A, __B);
54995501
}
5502+
TEST_CONSTEXPR(match_v16si(_mm512_unpacklo_epi32((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, (__m512i)(__v16si){16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}), 0, 16, 1, 17, 4, 20, 5, 21, 8, 24, 9, 25, 12, 28, 13, 29));
55005503

55015504
__m512i test_mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
55025505
// CHECK-LABEL: test_mm512_mask_unpacklo_epi32
@@ -5517,6 +5520,7 @@ __m512i test_mm512_unpacklo_epi64(__m512i __A, __m512i __B) {
55175520
// CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
55185521
return _mm512_unpacklo_epi64(__A, __B);
55195522
}
5523+
TEST_CONSTEXPR(match_m512i(_mm512_unpacklo_epi64((__m512i){0, 1, 2, 3, 4, 5, 6, 7}, (__m512i){8, 9, 10, 11, 12, 13, 14, 15}), 0, 8, 2, 10, 4, 12, 6, 14));
55205524

55215525
__m512i test_mm512_mask_unpacklo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
55225526
// CHECK-LABEL: test_mm512_mask_unpacklo_epi64

0 commit comments

Comments
 (0)