Skip to content

Commit 604c482

Browse files
stomfaiggithub-actions[bot]
authored andcommitted
Automerge: [Clang][X86] allow VPERMILPD/S imm intrinsics to be used in constexpr (#168044)
Resolves #166529
2 parents 234b9fb + 50791c3 commit 604c482

File tree

6 files changed

+139
-6
lines changed

6 files changed

+139
-6
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -512,7 +512,7 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in
512512
def vperm2f128_si256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">;
513513
}
514514

515-
let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
515+
let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
516516
def vpermilpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Constant int)">;
517517
def vpermilps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant int)">;
518518
}
@@ -528,6 +528,8 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
528528
def vinsertf128_pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<2, double>, _Constant int)">;
529529
def vinsertf128_ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<4, float>, _Constant int)">;
530530
def vinsertf128_si256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>, _Constant int)">;
531+
def vpermilpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">;
532+
def vpermilps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int)">;
531533

532534
foreach Op = ["hadd", "hsub"] in {
533535
def Op#pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">;
@@ -536,8 +538,6 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
536538
}
537539

538540
let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
539-
def vpermilpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">;
540-
def vpermilps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int)">;
541541
def sqrtpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>)">;
542542
def sqrtps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">;
543543
def rsqrtps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">;
@@ -2375,10 +2375,12 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>
23752375
def vcvttss2si32 : X86Builtin<"int(_Vector<4, float>, _Constant int)">;
23762376
def vcvttss2usi32 : X86Builtin<"unsigned int(_Vector<4, float>, _Constant int)">;
23772377
}
2378-
2379-
let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
2378+
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
23802379
def vpermilpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Constant int)">;
23812380
def vpermilps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int)">;
2381+
}
2382+
2383+
let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
23822384
def vpermilvarpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, long long int>)">;
23832385
def vpermilvarps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, int>)">;
23842386
}

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4620,6 +4620,9 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
46204620
case X86::BI__builtin_ia32_pshufd:
46214621
case X86::BI__builtin_ia32_pshufd256:
46224622
case X86::BI__builtin_ia32_pshufd512:
4623+
case X86::BI__builtin_ia32_vpermilps:
4624+
case X86::BI__builtin_ia32_vpermilps256:
4625+
case X86::BI__builtin_ia32_vpermilps512:
46234626
return interp__builtin_ia32_shuffle_generic(
46244627
S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
46254628
unsigned LaneBase = (DstIdx / 4) * 4;
@@ -4628,6 +4631,22 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
46284631
return std::make_pair(0, static_cast<int>(LaneBase + Sel));
46294632
});
46304633

4634+
case X86::BI__builtin_ia32_vpermilpd:
4635+
case X86::BI__builtin_ia32_vpermilpd256:
4636+
case X86::BI__builtin_ia32_vpermilpd512:
4637+
return interp__builtin_ia32_shuffle_generic(
4638+
S, OpPC, Call, [](unsigned DstIdx, unsigned Control) {
4639+
unsigned NumElemPerLane = 2;
4640+
unsigned BitsPerElem = 1;
4641+
unsigned MaskBits = 8;
4642+
unsigned IndexMask = 0x1;
4643+
unsigned Lane = DstIdx / NumElemPerLane;
4644+
unsigned LaneOffset = Lane * NumElemPerLane;
4645+
unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
4646+
unsigned Index = (Control >> BitIndex) & IndexMask;
4647+
return std::make_pair(0, static_cast<int>(LaneOffset + Index));
4648+
});
4649+
46314650
case X86::BI__builtin_ia32_kandqi:
46324651
case X86::BI__builtin_ia32_kandhi:
46334652
case X86::BI__builtin_ia32_kandsi:

clang/lib/AST/ExprConstant.cpp

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13023,7 +13023,10 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1302313023

1302413024
case X86::BI__builtin_ia32_pshufd:
1302513025
case X86::BI__builtin_ia32_pshufd256:
13026-
case X86::BI__builtin_ia32_pshufd512: {
13026+
case X86::BI__builtin_ia32_pshufd512:
13027+
case X86::BI__builtin_ia32_vpermilps:
13028+
case X86::BI__builtin_ia32_vpermilps256:
13029+
case X86::BI__builtin_ia32_vpermilps512: {
1302713030
APValue R;
1302813031
if (!evalShuffleGeneric(
1302913032
Info, E, R,
@@ -13040,6 +13043,25 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1304013043
return Success(R, E);
1304113044
}
1304213045

13046+
case X86::BI__builtin_ia32_vpermilpd:
13047+
case X86::BI__builtin_ia32_vpermilpd256:
13048+
case X86::BI__builtin_ia32_vpermilpd512: {
13049+
APValue R;
13050+
if (!evalShuffleGeneric(Info, E, R, [](unsigned DstIdx, unsigned Control) {
13051+
unsigned NumElemPerLane = 2;
13052+
unsigned BitsPerElem = 1;
13053+
unsigned MaskBits = 8;
13054+
unsigned IndexMask = 0x1;
13055+
unsigned Lane = DstIdx / NumElemPerLane;
13056+
unsigned LaneOffset = Lane * NumElemPerLane;
13057+
unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
13058+
unsigned Index = (Control >> BitIndex) & IndexMask;
13059+
return std::make_pair(0, static_cast<int>(LaneOffset + Index));
13060+
}))
13061+
return false;
13062+
return Success(R, E);
13063+
}
13064+
1304313065
case X86::BI__builtin_ia32_phminposuw128: {
1304413066
APValue Source;
1304513067
if (!Evaluate(Source, Info, E->getArg(0)))

clang/test/CodeGen/X86/avx-builtins.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1400,31 +1400,36 @@ __m128d test_mm_permute_pd(__m128d A) {
14001400
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> <i32 1, i32 0>
14011401
return _mm_permute_pd(A, 1);
14021402
}
1403+
TEST_CONSTEXPR(match_m128d(_mm_permute_pd(((__m128d){1.0, 2.0}), 1), 2.0, 1.0));
14031404

14041405
__m256d test_mm256_permute_pd(__m256d A) {
14051406
// CHECK-LABEL: test_mm256_permute_pd
14061407
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
14071408
return _mm256_permute_pd(A, 5);
14081409
}
1410+
TEST_CONSTEXPR(match_m256d(_mm256_permute_pd(((__m256d){1.0f, 2.0f, 3.0f, 4.0f}), 5), 2.0f, 1.0f, 4.0f, 3.0f));
14091411

14101412
__m128 test_mm_permute_ps(__m128 A) {
14111413
// CHECK-LABEL: test_mm_permute_ps
14121414
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
14131415
return _mm_permute_ps(A, 0x1b);
14141416
}
1417+
TEST_CONSTEXPR(match_m128(_mm_permute_ps(((__m128){1.0, 2.0, 3.0, 4.0}), 0x1b), 4.0, 3.0, 2.0, 1.0));
14151418

14161419
// Test case for PR12401
14171420
__m128 test2_mm_permute_ps(__m128 a) {
14181421
// CHECK-LABEL: test2_mm_permute_ps
14191422
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <4 x i32> <i32 2, i32 1, i32 2, i32 3>
14201423
return _mm_permute_ps(a, 0xe6);
14211424
}
1425+
TEST_CONSTEXPR(match_m128(_mm_permute_ps(((__m128){1.0, 2.0, 3.0, 4.0}), 0xe6), 3.0, 2.0, 3.0, 4.0));
14221426

14231427
__m256 test_mm256_permute_ps(__m256 A) {
14241428
// CHECK-LABEL: test_mm256_permute_ps
14251429
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
14261430
return _mm256_permute_ps(A, 0x1b);
14271431
}
1432+
TEST_CONSTEXPR(match_m256(_mm256_permute_ps(((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}), 0x1b), 4.0, 3.0, 2.0, 1.0, 8.0, 7.0, 6.0, 5.0));
14281433

14291434
__m256d test_mm256_permute2f128_pd(__m256d A, __m256d B) {
14301435
// CHECK-LABEL: test_mm256_permute2f128_pd

clang/test/CodeGen/X86/avx512f-builtins.c

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5516,40 +5516,72 @@ __m512d test_mm512_permute_pd(__m512d __X) {
55165516
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
55175517
return _mm512_permute_pd(__X, 2);
55185518
}
5519+
TEST_CONSTEXPR(match_m512d(_mm512_permute_pd(((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}), 2), 0.0, 1.0, 2.0, 2.0, 4.0, 4.0, 6.0, 6.0));
55195520

55205521
__m512d test_mm512_mask_permute_pd(__m512d __W, __mmask8 __U, __m512d __X) {
55215522
// CHECK-LABEL: test_mm512_mask_permute_pd
55225523
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
55235524
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
55245525
return _mm512_mask_permute_pd(__W, __U, __X, 2);
55255526
}
5527+
TEST_CONSTEXPR(match_m512d(_mm512_mask_permute_pd(
5528+
((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
5529+
(__mmask8)0b01010100,
5530+
((__m512d){8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}),
5531+
2),
5532+
0.0, 1.0, 10.0, 3.0, 12.0, 5.0, 14.0, 7.0
5533+
));
55265534

55275535
__m512d test_mm512_maskz_permute_pd(__mmask8 __U, __m512d __X) {
55285536
// CHECK-LABEL: test_mm512_maskz_permute_pd
55295537
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
55305538
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
55315539
return _mm512_maskz_permute_pd(__U, __X, 2);
55325540
}
5541+
TEST_CONSTEXPR(match_m512d(_mm512_maskz_permute_pd(
5542+
(__mmask8)0b01010100,
5543+
((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
5544+
2),
5545+
0.0, 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0
5546+
));
55335547

55345548
__m512 test_mm512_permute_ps(__m512 __X) {
55355549
// CHECK-LABEL: test_mm512_permute_ps
55365550
// CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> poison, <16 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4, i32 10, i32 8, i32 8, i32 8, i32 14, i32 12, i32 12, i32 12>
55375551
return _mm512_permute_ps(__X, 2);
55385552
}
5553+
TEST_CONSTEXPR(match_m512(_mm512_permute_ps(
5554+
((__m512){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}),
5555+
2),
5556+
2, 0, 0, 0, 6, 4, 4, 4, 10, 8, 8, 8, 14, 12, 12, 12
5557+
));
55395558

55405559
__m512 test_mm512_mask_permute_ps(__m512 __W, __mmask16 __U, __m512 __X) {
55415560
// CHECK-LABEL: test_mm512_mask_permute_ps
55425561
// CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> poison, <16 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4, i32 10, i32 8, i32 8, i32 8, i32 14, i32 12, i32 12, i32 12>
55435562
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
55445563
return _mm512_mask_permute_ps(__W, __U, __X, 2);
55455564
}
5565+
TEST_CONSTEXPR(match_m512(_mm512_mask_permute_ps(
5566+
((__m512){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}),
5567+
(__mmask16)0b1010101010101010,
5568+
((__m512){16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}),
5569+
2),
5570+
0, 16, 2, 16, 4, 20, 6, 20, 8, 24, 10, 24, 12, 28, 14, 28
5571+
));
55465572

55475573
__m512 test_mm512_maskz_permute_ps(__mmask16 __U, __m512 __X) {
55485574
// CHECK-LABEL: test_mm512_maskz_permute_ps
55495575
// CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> poison, <16 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4, i32 10, i32 8, i32 8, i32 8, i32 14, i32 12, i32 12, i32 12>
55505576
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
55515577
return _mm512_maskz_permute_ps(__U, __X, 2);
55525578
}
5579+
TEST_CONSTEXPR(match_m512(_mm512_maskz_permute_ps(
5580+
(__mmask16)0b1010101010101010,
5581+
((__m512){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}),
5582+
2),
5583+
0, 0, 0, 0, 0, 4, 0, 4, 0, 8, 0, 8, 0, 12, 0, 12
5584+
));
55535585

55545586
__m512d test_mm512_permutevar_pd(__m512d __A, __m512i __C) {
55555587
// CHECK-LABEL: test_mm512_permutevar_pd

clang/test/CodeGen/X86/avx512vl-builtins.c

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8022,55 +8022,108 @@ __m128d test_mm_mask_permute_pd(__m128d __W, __mmask8 __U, __m128d __X) {
80228022
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
80238023
return _mm_mask_permute_pd(__W, __U, __X, 1);
80248024
}
8025+
TEST_CONSTEXPR(match_m128d(_mm_mask_permute_pd(
8026+
((__m128d){0.0, 1.0}),
8027+
(__mmask8)0b10,
8028+
((__m128d){2.0, 3.0}),
8029+
1),
8030+
0.0, 2.0
8031+
));
80258032

80268033
__m128d test_mm_maskz_permute_pd(__mmask8 __U, __m128d __X) {
80278034
// CHECK-LABEL: test_mm_maskz_permute_pd
80288035
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> <i32 1, i32 0>
80298036
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
80308037
return _mm_maskz_permute_pd(__U, __X, 1);
80318038
}
8039+
TEST_CONSTEXPR(match_m128d(_mm_maskz_permute_pd(
8040+
(__mmask8)0b10,
8041+
((__m128d){1.0, 2.0}),
8042+
1),
8043+
0.0, 1.0
8044+
));
80328045

80338046
__m256d test_mm256_mask_permute_pd(__m256d __W, __mmask8 __U, __m256d __X) {
80348047
// CHECK-LABEL: test_mm256_mask_permute_pd
80358048
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
80368049
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
80378050
return _mm256_mask_permute_pd(__W, __U, __X, 5);
80388051
}
8052+
TEST_CONSTEXPR(match_m256d(_mm256_mask_permute_pd(
8053+
((__m256d){0.0, 1.0, 2.0, 3.0}),
8054+
(__mmask8)0b1010,
8055+
((__m256d){4.0, 5.0, 6.0, 7.0}),
8056+
5),
8057+
0.0, 4.0, 2.0, 6.0
8058+
));
80398059

80408060
__m256d test_mm256_maskz_permute_pd(__mmask8 __U, __m256d __X) {
80418061
// CHECK-LABEL: test_mm256_maskz_permute_pd
80428062
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
80438063
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
80448064
return _mm256_maskz_permute_pd(__U, __X, 5);
80458065
}
8066+
TEST_CONSTEXPR(match_m256d(_mm256_maskz_permute_pd(
8067+
(__mmask8)0b1010,
8068+
((__m256d){4.0, 5.0, 6.0, 7.0}),
8069+
5),
8070+
0.0, 4.0, 0.0, 6.0
8071+
));
80468072

80478073
__m128 test_mm_mask_permute_ps(__m128 __W, __mmask8 __U, __m128 __X) {
80488074
// CHECK-LABEL: test_mm_mask_permute_ps
80498075
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
80508076
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
80518077
return _mm_mask_permute_ps(__W, __U, __X, 0x1b);
80528078
}
8079+
TEST_CONSTEXPR(match_m128(_mm_mask_permute_ps(
8080+
((__m128){0.0, 1.0, 2.0, 3.0}),
8081+
(__mmask8)0b1010,
8082+
((__m128){4.0, 5.0, 6.0, 7.0}),
8083+
0x1b),
8084+
0, 6.0, 2.0, 4.0
8085+
));
80538086

80548087
__m128 test_mm_maskz_permute_ps(__mmask8 __U, __m128 __X) {
80558088
// CHECK-LABEL: test_mm_maskz_permute_ps
80568089
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
80578090
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
80588091
return _mm_maskz_permute_ps(__U, __X, 0x1b);
80598092
}
8093+
TEST_CONSTEXPR(match_m128(_mm_maskz_permute_ps(
8094+
(__mmask8)0b1010,
8095+
((__m128){4.0, 5.0, 6.0, 7.0}),
8096+
0x1b),
8097+
0.0, 6.0, 0.0, 4.0
8098+
));
8099+
80608100

80618101
__m256 test_mm256_mask_permute_ps(__m256 __W, __mmask8 __U, __m256 __X) {
80628102
// CHECK-LABEL: test_mm256_mask_permute_ps
80638103
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
80648104
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
80658105
return _mm256_mask_permute_ps(__W, __U, __X, 0x1b);
80668106
}
8107+
TEST_CONSTEXPR(match_m256(_mm256_mask_permute_ps(
8108+
((__m256){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
8109+
(__mmask8)0b10101010,
8110+
((__m256){8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}),
8111+
0x1b),
8112+
0.0, 10.0, 2.0, 8.0, 4.0, 14.0, 6.0, 12.0
8113+
));
80678114

80688115
__m256 test_mm256_maskz_permute_ps(__mmask8 __U, __m256 __X) {
80698116
// CHECK-LABEL: test_mm256_maskz_permute_ps
80708117
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
80718118
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
80728119
return _mm256_maskz_permute_ps(__U, __X, 0x1b);
80738120
}
8121+
TEST_CONSTEXPR(match_m256(_mm256_maskz_permute_ps(
8122+
(__mmask8)0b10101010,
8123+
((__m256){8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}),
8124+
0x1b),
8125+
0.0, 10.0, 0.0, 8.0, 0.0, 14.0, 0.0, 12.0
8126+
));
80748127

80758128
__m128d test_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C) {
80768129
// CHECK-LABEL: test_mm_mask_permutevar_pd

0 commit comments

Comments
 (0)