Skip to content

Commit 952b123

Browse files
authored
[X86] Allow PSHUFD/PSHUFLW/PSHUFW intrinsics in constexpr. (#161210)
The i16/i32 shuffle intrinsics (`pshufw`, `pshuflw`, `pshufhw`, `pshufd`) currently cannot be used in constant expressions. This patch adds support in both bytecode interpreter (InterpBuiltin.cpp) and constant evaluator (ExprConstant.cpp) for pshuf intrinsics, enabling their use in constant expressions. ## Intrinsics covered - `_mm_shuffle_pi16` (MMX `pshufw`) - `_mm_shufflelo_epi16` / `_mm_shufflehi_epi16` - `_mm_shuffle_epi32` - Their AVX2/AVX512 vector-width variants - Masked and maskz forms (handled indirectly via `__builtin_ia32_select*`) Fixes #156611
1 parent 3d81008 commit 952b123

File tree

10 files changed

+245
-21
lines changed

10 files changed

+245
-21
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -216,10 +216,13 @@ let Features = "sse2", Attributes = [NoThrow] in {
216216
def movnti : X86Builtin<"void(int *, int)">;
217217
}
218218

219-
let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
220-
def pshufd : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
219+
let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
221220
def pshuflw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int)">;
221+
def pshufd : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
222222
def pshufhw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int)">;
223+
}
224+
225+
let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
223226
def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>, _Vector<16, char>)">;
224227
def sqrtpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
225228
def sqrtsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
@@ -584,9 +587,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
584587
def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
585588
def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
586589
def pshufb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
587-
def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
588-
def pshuflw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
589-
def pshufhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
590590
def psignb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
591591
def psignw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
592592
def psignd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
@@ -647,6 +647,10 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
647647
def packsswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, _Vector<16, short>)">;
648648
def packssdw256 : X86Builtin<"_Vector<16, short>(_Vector<8, int>, _Vector<8, int>)">;
649649
def packuswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, _Vector<16, short>)">;
650+
651+
def pshuflw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
652+
def pshufhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">;
653+
def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">;
650654
}
651655

652656
let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
@@ -1017,6 +1021,7 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>
10171021
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
10181022
def pmuldq512 : X86Builtin<"_Vector<8, long long int>(_Vector<16, int>, _Vector<16, int>)">;
10191023
def pmuludq512 : X86Builtin<"_Vector<8, long long int>(_Vector<16, int>, _Vector<16, int>)">;
1024+
def pshufd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">;
10201025
}
10211026

10221027
let Features = "avx512f", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
@@ -1990,13 +1995,13 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVect
19901995
}
19911996

19921997
let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
1993-
def pshufhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
1994-
def pshuflw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
19951998
def psllw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">;
19961999
}
19972000

19982001
let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
19992002
def psllv32hi : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">;
2003+
def pshufhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
2004+
def pshuflw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
20002005
}
20012006

20022007
let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
@@ -2026,8 +2031,7 @@ let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, Req
20262031
def psrlv8hi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
20272032
}
20282033

2029-
let Features = "avx512f",
2030-
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
2034+
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
20312035
def psrlwi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, int)">;
20322036
def psrldi512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, int)">;
20332037
def psrlqi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, int)">;
@@ -3266,7 +3270,6 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>
32663270
}
32673271

32683272
let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
3269-
def pshufd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">;
32703273
def expanddf512_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, unsigned char)">;
32713274
def expanddi512_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, unsigned char)">;
32723275
}

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2773,6 +2773,50 @@ static bool interp__builtin_blend(InterpState &S, CodePtr OpPC,
27732773
return true;
27742774
}
27752775

2776+
static bool interp__builtin_ia32_pshuf(InterpState &S, CodePtr OpPC,
2777+
const CallExpr *Call, bool IsShufHW) {
2778+
assert(Call->getNumArgs() == 2 && "masked forms handled via select*");
2779+
APSInt ControlImm = popToAPSInt(S, Call->getArg(1));
2780+
const Pointer &Src = S.Stk.pop<Pointer>();
2781+
const Pointer &Dst = S.Stk.peek<Pointer>();
2782+
2783+
unsigned NumElems = Dst.getNumElems();
2784+
PrimType ElemT = Dst.getFieldDesc()->getPrimType();
2785+
2786+
unsigned ElemBits = static_cast<unsigned>(primSize(ElemT) * 8);
2787+
if (ElemBits != 16 && ElemBits != 32)
2788+
return false;
2789+
2790+
unsigned LaneElts = 128u / ElemBits;
2791+
assert(LaneElts && (NumElems % LaneElts == 0));
2792+
2793+
uint8_t Ctl = static_cast<uint8_t>(ControlImm.getZExtValue());
2794+
2795+
for (unsigned Idx = 0; Idx != NumElems; Idx++) {
2796+
unsigned LaneBase = (Idx / LaneElts) * LaneElts;
2797+
unsigned LaneIdx = Idx % LaneElts;
2798+
unsigned SrcIdx = Idx;
2799+
unsigned Sel = (Ctl >> (2 * LaneIdx)) & 0x3;
2800+
if (ElemBits == 32) {
2801+
SrcIdx = LaneBase + Sel;
2802+
} else {
2803+
constexpr unsigned HalfSize = 4;
2804+
bool InHigh = LaneIdx >= HalfSize;
2805+
if (!IsShufHW && !InHigh) {
2806+
SrcIdx = LaneBase + Sel;
2807+
} else if (IsShufHW && InHigh) {
2808+
unsigned Rel = LaneIdx - HalfSize;
2809+
Sel = (Ctl >> (2 * Rel)) & 0x3;
2810+
SrcIdx = LaneBase + HalfSize + Sel;
2811+
}
2812+
}
2813+
2814+
INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(Idx) = Src.elem<T>(SrcIdx); });
2815+
}
2816+
Dst.initializeAllElements();
2817+
return true;
2818+
}
2819+
27762820
static bool interp__builtin_elementwise_triop(
27772821
InterpState &S, CodePtr OpPC, const CallExpr *Call,
27782822
llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &)>
@@ -3661,6 +3705,21 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
36613705
case X86::BI__builtin_ia32_selectpd_512:
36623706
return interp__builtin_select(S, OpPC, Call);
36633707

3708+
case X86::BI__builtin_ia32_pshuflw:
3709+
case X86::BI__builtin_ia32_pshuflw256:
3710+
case X86::BI__builtin_ia32_pshuflw512:
3711+
return interp__builtin_ia32_pshuf(S, OpPC, Call, false);
3712+
3713+
case X86::BI__builtin_ia32_pshufhw:
3714+
case X86::BI__builtin_ia32_pshufhw256:
3715+
case X86::BI__builtin_ia32_pshufhw512:
3716+
return interp__builtin_ia32_pshuf(S, OpPC, Call, true);
3717+
3718+
case X86::BI__builtin_ia32_pshufd:
3719+
case X86::BI__builtin_ia32_pshufd256:
3720+
case X86::BI__builtin_ia32_pshufd512:
3721+
return interp__builtin_ia32_pshuf(S, OpPC, Call, false);
3722+
36643723
case X86::BI__builtin_ia32_kandqi:
36653724
case X86::BI__builtin_ia32_kandhi:
36663725
case X86::BI__builtin_ia32_kandsi:

clang/lib/AST/ExprConstant.cpp

Lines changed: 82 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11615,6 +11615,60 @@ static bool evalPackBuiltin(const CallExpr *E, EvalInfo &Info, APValue &Result,
1161511615
return true;
1161611616
}
1161711617

11618+
static bool evalPshufBuiltin(EvalInfo &Info, const CallExpr *Call,
11619+
bool IsShufHW, APValue &Out) {
11620+
APValue Vec;
11621+
APSInt Imm;
11622+
if (!EvaluateAsRValue(Info, Call->getArg(0), Vec))
11623+
return false;
11624+
if (!EvaluateInteger(Call->getArg(1), Imm, Info))
11625+
return false;
11626+
11627+
const auto *VT = Call->getType()->getAs<VectorType>();
11628+
if (!VT)
11629+
return false;
11630+
11631+
QualType ElemT = VT->getElementType();
11632+
unsigned ElemBits = Info.Ctx.getTypeSize(ElemT);
11633+
unsigned NumElts = VT->getNumElements();
11634+
11635+
unsigned LaneBits = 128u;
11636+
unsigned LaneElts = LaneBits / ElemBits;
11637+
if (!LaneElts || (NumElts % LaneElts) != 0)
11638+
return false;
11639+
11640+
uint8_t Ctl = static_cast<uint8_t>(Imm.getZExtValue());
11641+
11642+
SmallVector<APValue, 32> ResultElements;
11643+
ResultElements.reserve(NumElts);
11644+
11645+
for (unsigned Idx = 0; Idx != NumElts; Idx++) {
11646+
unsigned LaneBase = (Idx / LaneElts) * LaneElts;
11647+
unsigned LaneIdx = Idx % LaneElts;
11648+
unsigned SrcIdx = Idx;
11649+
unsigned Sel = (Ctl >> (2 * LaneIdx)) & 0x3;
11650+
11651+
if (ElemBits == 32) {
11652+
SrcIdx = LaneBase + Sel;
11653+
} else {
11654+
constexpr unsigned HalfSize = 4;
11655+
bool InHigh = LaneIdx >= HalfSize;
11656+
if (!IsShufHW && !InHigh) {
11657+
SrcIdx = LaneBase + Sel;
11658+
} else if (IsShufHW && InHigh) {
11659+
unsigned Rel = LaneIdx - HalfSize;
11660+
Sel = (Ctl >> (2 * Rel)) & 0x3;
11661+
SrcIdx = LaneBase + HalfSize + Sel;
11662+
}
11663+
}
11664+
11665+
ResultElements.push_back(Vec.getVectorElt(SrcIdx));
11666+
}
11667+
11668+
Out = APValue(ResultElements.data(), ResultElements.size());
11669+
return true;
11670+
}
11671+
1161811672
bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1161911673
if (!IsConstantEvaluatedBuiltinCall(E))
1162011674
return ExprEvaluatorBaseTy::VisitCallExpr(E);
@@ -11868,7 +11922,6 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1186811922

1186911923
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
1187011924
}
11871-
1187211925
case clang::X86::BI__builtin_ia32_vprotbi:
1187311926
case clang::X86::BI__builtin_ia32_vprotdi:
1187411927
case clang::X86::BI__builtin_ia32_vprotqi:
@@ -12087,6 +12140,34 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1208712140

1208812141
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
1208912142
}
12143+
12144+
case X86::BI__builtin_ia32_pshuflw:
12145+
case X86::BI__builtin_ia32_pshuflw256:
12146+
case X86::BI__builtin_ia32_pshuflw512: {
12147+
APValue R;
12148+
if (!evalPshufBuiltin(Info, E, false, R))
12149+
return false;
12150+
return Success(R, E);
12151+
}
12152+
12153+
case X86::BI__builtin_ia32_pshufhw:
12154+
case X86::BI__builtin_ia32_pshufhw256:
12155+
case X86::BI__builtin_ia32_pshufhw512: {
12156+
APValue R;
12157+
if (!evalPshufBuiltin(Info, E, true, R))
12158+
return false;
12159+
return Success(R, E);
12160+
}
12161+
12162+
case X86::BI__builtin_ia32_pshufd:
12163+
case X86::BI__builtin_ia32_pshufd256:
12164+
case X86::BI__builtin_ia32_pshufd512: {
12165+
APValue R;
12166+
if (!evalPshufBuiltin(Info, E, false, R))
12167+
return false;
12168+
return Success(R, E);
12169+
}
12170+
1209012171
case Builtin::BI__builtin_elementwise_clzg:
1209112172
case Builtin::BI__builtin_elementwise_ctzg: {
1209212173
APValue SourceLHS;

clang/test/CodeGen/X86/avx2-builtins.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1109,19 +1109,19 @@ __m256i test_mm256_shuffle_epi32(__m256i a) {
11091109
// CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> poison, <8 x i32> <i32 3, i32 3, i32 0, i32 0, i32 7, i32 7, i32 4, i32 4>
11101110
return _mm256_shuffle_epi32(a, 15);
11111111
}
1112-
1112+
TEST_CONSTEXPR(match_v8si(_mm256_shuffle_epi32((((__m256i)(__v8si){0,1,2,3,4,5,6,7})), 15), 3,3,0,0, 7,7,4,4));
11131113
__m256i test_mm256_shufflehi_epi16(__m256i a) {
11141114
// CHECK-LABEL: test_mm256_shufflehi_epi16
11151115
// CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 15, i32 14, i32 14, i32 13>
11161116
return _mm256_shufflehi_epi16(a, 107);
11171117
}
1118-
1118+
TEST_CONSTEXPR(match_v16hi(_mm256_shufflehi_epi16((((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15})), 107), 0,1,2,3, 7,6,6,5, 8,9,10,11, 15,14,14,13));
11191119
__m256i test_mm256_shufflelo_epi16(__m256i a) {
11201120
// CHECK-LABEL: test_mm256_shufflelo_epi16
11211121
// CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> <i32 3, i32 0, i32 1, i32 1, i32 4, i32 5, i32 6, i32 7, i32 11, i32 8, i32 9, i32 9, i32 12, i32 13, i32 14, i32 15>
11221122
return _mm256_shufflelo_epi16(a, 83);
11231123
}
1124-
1124+
TEST_CONSTEXPR(match_v16hi(_mm256_shufflelo_epi16(((__m256i)(__v16hi){ 0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15}), 83), 3,0,1,1, 4,5,6,7, 11,8,9,9, 12,13,14,15) );
11251125
__m256i test_mm256_sign_epi8(__m256i a, __m256i b) {
11261126
// CHECK-LABEL: test_mm256_sign_epi8
11271127
// CHECK: call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})

clang/test/CodeGen/X86/avx512bw-builtins.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1876,40 +1876,49 @@ __m512i test_mm512_shufflehi_epi16(__m512i __A) {
18761876
// CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 4, i32 4, i32 8, i32 9, i32 10, i32 11, i32 13, i32 13, i32 12, i32 12, i32 16, i32 17, i32 18, i32 19, i32 21, i32 21, i32 20, i32 20, i32 24, i32 25, i32 26, i32 27, i32 29, i32 29, i32 28, i32 28>
18771877
return _mm512_shufflehi_epi16(__A, 5);
18781878
}
1879-
1879+
TEST_CONSTEXPR(match_v32hi(_mm512_shufflehi_epi16((((__m512i)(__v32hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31})), 5), 0,1,2,3, 5,5,4,4, 8,9,10,11, 13,13,12,12, 16,17,18,19, 21,21,20,20, 24,25,26,27, 29,29,28,28));
18801880
__m512i test_mm512_mask_shufflehi_epi16(__m512i __W, __mmask32 __U, __m512i __A) {
18811881
// CHECK-LABEL: test_mm512_mask_shufflehi_epi16
18821882
// CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 4, i32 4, i32 8, i32 9, i32 10, i32 11, i32 13, i32 13, i32 12, i32 12, i32 16, i32 17, i32 18, i32 19, i32 21, i32 21, i32 20, i32 20, i32 24, i32 25, i32 26, i32 27, i32 29, i32 29, i32 28, i32 28>
18831883
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
18841884
return _mm512_mask_shufflehi_epi16(__W, __U, __A, 5);
18851885
}
1886+
TEST_CONSTEXPR(match_v32hi(_mm512_mask_shufflehi_epi16((((__m512i)(__v32hi){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131})), 0xFFFF0000u, (((__m512i)(__v32hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31})), 5), 100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115, 16,17,18,19,21,21,20,20, 24,25,26,27,29,29,28,28));
1887+
TEST_CONSTEXPR(match_v32hi(_mm512_mask_shufflehi_epi16(((__m512i)(__v32hi){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131}), 0x0000FFFFu, ((__m512i)(__v32hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}), 5), 0,1,2,3,5,5,4,4, 8,9,10,11,13,13,12,12, 116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131));
18861888

18871889
__m512i test_mm512_maskz_shufflehi_epi16(__mmask32 __U, __m512i __A) {
18881890
// CHECK-LABEL: test_mm512_maskz_shufflehi_epi16
18891891
// CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 4, i32 4, i32 8, i32 9, i32 10, i32 11, i32 13, i32 13, i32 12, i32 12, i32 16, i32 17, i32 18, i32 19, i32 21, i32 21, i32 20, i32 20, i32 24, i32 25, i32 26, i32 27, i32 29, i32 29, i32 28, i32 28>
18901892
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
18911893
return _mm512_maskz_shufflehi_epi16(__U, __A, 5);
18921894
}
1895+
TEST_CONSTEXPR(match_v32hi(_mm512_maskz_shufflehi_epi16(0xAAAAAAAAu, (((__m512i)(__v32hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31})), 5), 0,1,0,3,0,5,0,4, 0,9,0,11,0,13,0,12, 0,17,0,19,0,21,0,20, 0,25,0,27,0,29,0,28));
1896+
TEST_CONSTEXPR(match_v32hi(_mm512_maskz_shufflehi_epi16(0x0000FFFFu, ((__m512i)(__v32hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}), 5), 0,1,2,3,5,5,4,4, 8,9,10,11,13,13,12,12, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0));
18931897

18941898
__m512i test_mm512_shufflelo_epi16(__m512i __A) {
18951899
// CHECK-LABEL: test_mm512_shufflelo_epi16
18961900
// CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> poison, <32 x i32> <i32 1, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 9, i32 8, i32 8, i32 12, i32 13, i32 14, i32 15, i32 17, i32 17, i32 16, i32 16, i32 20, i32 21, i32 22, i32 23, i32 25, i32 25, i32 24, i32 24, i32 28, i32 29, i32 30, i32 31>
18971901
return _mm512_shufflelo_epi16(__A, 5);
18981902
}
1903+
TEST_CONSTEXPR( match_v32hi(_mm512_shufflelo_epi16(((__m512i)(__v32hi){ 0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15, 16,17,18,19, 20,21,22,23, 24,25,26,27, 28,29,30,31}), 5), 1,1,0,0, 4,5,6,7, 9,9,8,8, 12,13,14,15, 17,17,16,16, 20,21,22,23, 25,25,24,24, 28,29,30,31));
18991904

19001905
__m512i test_mm512_mask_shufflelo_epi16(__m512i __W, __mmask32 __U, __m512i __A) {
19011906
// CHECK-LABEL: test_mm512_mask_shufflelo_epi16
19021907
// CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> poison, <32 x i32> <i32 1, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 9, i32 8, i32 8, i32 12, i32 13, i32 14, i32 15, i32 17, i32 17, i32 16, i32 16, i32 20, i32 21, i32 22, i32 23, i32 25, i32 25, i32 24, i32 24, i32 28, i32 29, i32 30, i32 31>
19031908
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
19041909
return _mm512_mask_shufflelo_epi16(__W, __U, __A, 5);
19051910
}
1911+
TEST_CONSTEXPR(match_v32hi(_mm512_mask_shufflelo_epi16((((__m512i)(__v32hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31})), 0xFFFFFFFF, (((__m512i)(__v32hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31})), 5), 1,1,0,0, 4,5,6,7, 9,9,8,8, 12,13,14,15, 17,17,16,16, 20,21,22,23, 25,25,24,24, 28,29,30,31));
1912+
TEST_CONSTEXPR(match_v32hi(_mm512_mask_shufflelo_epi16(((__m512i)(__v32hi){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131}), 0x0000FFFFu, ((__m512i)(__v32hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}), 5), 1,1,0,0,4,5,6,7, 9,9,8,8,12,13,14,15, 116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131));
19061913

19071914
__m512i test_mm512_maskz_shufflelo_epi16(__mmask32 __U, __m512i __A) {
19081915
// CHECK-LABEL: test_mm512_maskz_shufflelo_epi16
19091916
// CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> poison, <32 x i32> <i32 1, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 9, i32 8, i32 8, i32 12, i32 13, i32 14, i32 15, i32 17, i32 17, i32 16, i32 16, i32 20, i32 21, i32 22, i32 23, i32 25, i32 25, i32 24, i32 24, i32 28, i32 29, i32 30, i32 31>
19101917
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
19111918
return _mm512_maskz_shufflelo_epi16(__U, __A, 5);
19121919
}
1920+
TEST_CONSTEXPR(match_v32hi(_mm512_maskz_shufflelo_epi16(0xFFFFFFFF, (((__m512i)(__v32hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31})), 5), 1,1,0,0, 4,5,6,7, 9,9,8,8, 12,13,14,15, 17,17,16,16, 20,21,22,23, 25,25,24,24, 28,29,30,31));
1921+
TEST_CONSTEXPR(match_v32hi(_mm512_maskz_shufflelo_epi16(0x0000FFFFu, ((__m512i)(__v32hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}), 5), 1,1,0,0,4,5,6,7, 9,9,8,8,12,13,14,15, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0));
19131922

19141923
__m512i test_mm512_sllv_epi16(__m512i __A, __m512i __B) {
19151924
// CHECK-LABEL: test_mm512_sllv_epi16

0 commit comments

Comments
 (0)