Skip to content

Commit bbee445

Browse files
committed
[Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - Allow shufps/pd shuffles intrinsics to be used in constexpr
Resolves #161208
1 parent c491c6e commit bbee445

File tree

8 files changed

+158
-7
lines changed

8 files changed

+158
-7
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -197,14 +197,17 @@ let Features = "sse", Header = "xmmintrin.h", Attributes = [NoThrow, RequireDecl
197197
def _mm_sfence : X86LibBuiltin<"void()">;
198198
}
199199

200+
let Features = "sse", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
201+
def shufps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">;
202+
}
203+
200204
let Features = "sse", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
201205
def rcpps : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
202206
def rcpss : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
203207
def rsqrtps : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
204208
def rsqrtss : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
205209
def sqrtps : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
206210
def sqrtss : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
207-
def shufps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">;
208211
}
209212

210213
let Features = "sse2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
@@ -224,13 +227,13 @@ let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
224227
def pshuflw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int)">;
225228
def pshufd : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
226229
def pshufhw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int)">;
230+
def shufpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
227231
}
228232

229233
let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
230234
def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>, _Vector<16, char>)">;
231235
def sqrtpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
232236
def sqrtsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
233-
def shufpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
234237
def cvtpd2dq : X86Builtin<"_Vector<2, long long int>(_Vector<2, double>)">;
235238
def cvtpd2ps : X86Builtin<"_Vector<4, float>(_Vector<2, double>)">;
236239
def cvttpd2dq : X86Builtin<"_Vector<4, int>(_Vector<2, double>)">;
@@ -488,13 +491,16 @@ let Features = "avx512f,vpclmulqdq", Attributes = [NoThrow, Const, RequiredVecto
488491
def pclmulqdq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant char)">;
489492
}
490493

494+
let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
495+
def shufpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int)">;
496+
def shufps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">;
497+
}
498+
491499
let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
492500
def vpermilvarpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, long long int>)">;
493501
def vpermilvarps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, int>)">;
494502
def vpermilvarpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, long long int>)">;
495503
def vpermilvarps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">;
496-
def shufpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int)">;
497-
def shufps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">;
498504
def dpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
499505
def cmppd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant char)">;
500506
def cmpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
@@ -2470,6 +2476,9 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>
24702476
def shuf_f64x2 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Constant int)">;
24712477
def shuf_i32x4 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Constant int)">;
24722478
def shuf_i64x2 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int)">;
2479+
}
2480+
2481+
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
24732482
def shufpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Constant int)">;
24742483
def shufps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Constant int)">;
24752484
}

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3128,6 +3128,52 @@ static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC,
31283128
return true;
31293129
}
31303130

3131+
static bool interp__builtin_ia32_shuf(InterpState &S, CodePtr OpPC,
3132+
const CallExpr *Call) {
3133+
assert(Call->getNumArgs() == 3);
3134+
3135+
unsigned ShuffleMask = popToAPSInt(S, Call->getArg(2)).getZExtValue();
3136+
QualType Arg0Type = Call->getArg(0)->getType();
3137+
const auto *VecT = Arg0Type->castAs<VectorType>();
3138+
PrimType ElemT = *S.getContext().classify(VecT->getElementType());
3139+
unsigned NumElems = VecT->getNumElements();
3140+
unsigned LaneWidth = S.getContext().getBitWidth(VecT->getElementType());
3141+
unsigned NumLanes = LaneWidth * NumElems / 128;
3142+
unsigned NumElemPerLane = 128 / LaneWidth;
3143+
3144+
const Pointer &B = S.Stk.pop<Pointer>();
3145+
const Pointer &A = S.Stk.pop<Pointer>();
3146+
const Pointer &Dst = S.Stk.peek<Pointer>();
3147+
3148+
unsigned NumSelectableElems = NumElemPerLane / 2;
3149+
unsigned BitsPerElem = NumSelectableElems == 1 ? 1 : 2;
3150+
unsigned IndexMask = BitsPerElem == 2 ? 0x3 : 0x1;
3151+
unsigned MaskBits = 8;
3152+
3153+
TYPE_SWITCH(ElemT, {
3154+
unsigned BitIndex = 0;
3155+
unsigned DstIdx = 0;
3156+
3157+
for (unsigned LaneId = 0; LaneId != NumLanes; ++LaneId) {
3158+
unsigned LaneOffset = LaneId * NumElemPerLane;
3159+
3160+
for (unsigned i = 0; i < NumSelectableElems; ++i) {
3161+
unsigned Index = (ShuffleMask >> BitIndex) & IndexMask;
3162+
Dst.elem<T>(DstIdx++) = A.elem<T>(LaneOffset + Index);
3163+
BitIndex = (BitIndex + BitsPerElem) % MaskBits;
3164+
}
3165+
3166+
for (unsigned i = 0; i < NumSelectableElems; ++i) {
3167+
unsigned Index = (ShuffleMask >> BitIndex) & IndexMask;
3168+
Dst.elem<T>(DstIdx++) = B.elem<T>(LaneOffset + Index);
3169+
BitIndex = (BitIndex + BitsPerElem) % MaskBits;
3170+
}
3171+
}
3172+
});
3173+
Dst.initializeAllElements();
3174+
return true;
3175+
}
3176+
31313177
bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
31323178
uint32_t BuiltinID) {
31333179
if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
@@ -4003,6 +4049,14 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
40034049
case X86::BI__builtin_ia32_selectpd_512:
40044050
return interp__builtin_select(S, OpPC, Call);
40054051

4052+
case X86::BI__builtin_ia32_shufps:
4053+
case X86::BI__builtin_ia32_shufps256:
4054+
case X86::BI__builtin_ia32_shufps512:
4055+
case X86::BI__builtin_ia32_shufpd:
4056+
case X86::BI__builtin_ia32_shufpd256:
4057+
case X86::BI__builtin_ia32_shufpd512:
4058+
return interp__builtin_ia32_shuf(S, OpPC, Call);
4059+
40064060
case X86::BI__builtin_ia32_pshufb128:
40074061
case X86::BI__builtin_ia32_pshufb256:
40084062
case X86::BI__builtin_ia32_pshufb512:

clang/lib/AST/ExprConstant.cpp

Lines changed: 66 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11618,6 +11618,61 @@ static bool evalPackBuiltin(const CallExpr *E, EvalInfo &Info, APValue &Result,
1161811618
return true;
1161911619
}
1162011620

11621+
static bool evalShufpspdBuiltin(EvalInfo &Info, const CallExpr *Call,
11622+
APValue &Out) {
11623+
APValue A, B;
11624+
APSInt ShuffleMask;
11625+
if (!EvaluateAsRValue(Info, Call->getArg(0), A) ||
11626+
!EvaluateAsRValue(Info, Call->getArg(1), B) ||
11627+
!EvaluateInteger(Call->getArg(2), ShuffleMask, Info))
11628+
return false;
11629+
11630+
const auto *VT = Call->getType()->getAs<VectorType>();
11631+
if (!VT)
11632+
return false;
11633+
11634+
QualType ElemT = VT->getElementType();
11635+
unsigned ElemBits = Info.Ctx.getTypeSize(ElemT);
11636+
unsigned NumElts = VT->getNumElements();
11637+
11638+
constexpr unsigned LaneBits = 128u;
11639+
unsigned NumElemPerLane = LaneBits / ElemBits;
11640+
if (!NumElemPerLane || (NumElts % NumElemPerLane) != 0)
11641+
return false;
11642+
11643+
unsigned NumLanes = NumElts / NumElemPerLane;
11644+
uint8_t Ctl = static_cast<uint8_t>(ShuffleMask.getZExtValue());
11645+
11646+
unsigned SelectableElts = NumElemPerLane / 2;
11647+
unsigned BitsPerSel = SelectableElts == 1 ? 1 : 2;
11648+
unsigned SelMask = (1u << BitsPerSel) - 1;
11649+
unsigned MaskBits = 8;
11650+
11651+
SmallVector<APValue, 16> ResultElements;
11652+
ResultElements.reserve(NumElts);
11653+
11654+
unsigned BitIdx = 0;
11655+
11656+
for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
11657+
unsigned LaneBase = Lane * NumElemPerLane;
11658+
11659+
for (unsigned i = 0; i < SelectableElts; ++i) {
11660+
unsigned SelIdx = (Ctl >> BitIdx) & SelMask;
11661+
ResultElements.push_back(A.getVectorElt(LaneBase + SelIdx));
11662+
BitIdx = (BitIdx + BitsPerSel) % MaskBits;
11663+
}
11664+
11665+
for (unsigned i = 0; i < SelectableElts; ++i) {
11666+
unsigned SelIdx = (Ctl >> BitIdx) & SelMask;
11667+
ResultElements.push_back(B.getVectorElt(LaneBase + SelIdx));
11668+
BitIdx = (BitIdx + BitsPerSel) % MaskBits;
11669+
}
11670+
}
11671+
11672+
Out = APValue(ResultElements.data(), ResultElements.size());
11673+
return true;
11674+
}
11675+
1162111676
static bool evalPshufbBuiltin(EvalInfo &Info, const CallExpr *Call,
1162211677
APValue &Out) {
1162311678
APValue SrcVec, ControlVec;
@@ -12308,7 +12363,17 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1230812363

1230912364
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
1231012365
}
12311-
12366+
case X86::BI__builtin_ia32_shufps:
12367+
case X86::BI__builtin_ia32_shufps256:
12368+
case X86::BI__builtin_ia32_shufps512:
12369+
case X86::BI__builtin_ia32_shufpd:
12370+
case X86::BI__builtin_ia32_shufpd256:
12371+
case X86::BI__builtin_ia32_shufpd512: {
12372+
APValue R;
12373+
if (!evalShufpspdBuiltin(Info, E, R))
12374+
return false;
12375+
return Success(R, E);
12376+
}
1231212377
case X86::BI__builtin_ia32_pshufb128:
1231312378
case X86::BI__builtin_ia32_pshufb256:
1231412379
case X86::BI__builtin_ia32_pshufb512: {

clang/test/CodeGen/X86/avx-builtins.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1881,12 +1881,16 @@ __m256d test_mm256_shuffle_pd(__m256d A, __m256d B) {
18811881
return _mm256_shuffle_pd(A, B, 0);
18821882
}
18831883

1884+
TEST_CONSTEXPR((match_m256d(_mm256_shuffle_pd(((__m256d)(__v4df){1.0, 2.0, 3.0, 4.0}), ((__m256d)(__v4df){5.0, 6.0, 7.0, 8.0}), 15), 2.0, 6.0, 4.0, 8.0)));
1885+
18841886
__m256 test_mm256_shuffle_ps(__m256 A, __m256 B) {
18851887
// CHECK-LABEL: test_mm256_shuffle_ps
18861888
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 0, i32 8, i32 8, i32 4, i32 4, i32 12, i32 12>
18871889
return _mm256_shuffle_ps(A, B, 0);
18881890
}
18891891

1892+
TEST_CONSTEXPR((match_m256(_mm256_shuffle_ps(((__m256)(__v8sf){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256)(__v8sf){9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}), 4), 1.0f, 2.0f, 9.0f, 9.0f, 5.0f, 6.0f, 13.0f, 13.0f)));
1893+
18901894
__m256d test_mm256_sqrt_pd(__m256d A) {
18911895
// CHECK-LABEL: test_mm256_sqrt_pd
18921896
// CHECK: call {{.*}}<4 x double> @llvm.sqrt.v4f64(<4 x double> %{{.*}})

clang/test/CodeGen/X86/avx512f-builtins.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6735,9 +6735,13 @@ __m512 test_mm512_maskz_shuffle_ps(__mmask16 __U, __m512 __M, __m512 __V) {
67356735
// CHECK-LABEL: test_mm512_maskz_shuffle_ps
67366736
// CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 4, i32 5, i32 20, i32 20, i32 8, i32 9, i32 24, i32 24, i32 12, i32 13, i32 28, i32 28>
67376737
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
6738-
return _mm512_maskz_shuffle_ps(__U, __M, __V, 4);
6738+
return _mm512_maskz_shuffle_ps(__U, __M, __V, 4);
67396739
}
67406740

6741+
TEST_CONSTEXPR((match_m512(_mm512_shuffle_ps(((__m512)(__v16sf){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}), ((__m512)(__v16sf){17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, 32.0f}), 4), 1.0f, 2.0f, 17.0f, 17.0f, 5.0f, 6.0f, 21.0f, 21.0f, 9.0f, 10.0f, 25.0f, 25.0f, 13.0f, 14.0f, 29.0f, 29.0f)));
6742+
TEST_CONSTEXPR((match_m512d(_mm512_shuffle_pd(((__m512d)(__v8df){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}), ((__m512d)(__v8df){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}), 48), 1.0, 9.0, 3.0, 11.0, 6.0, 14.0, 7.0, 15.0)));
6743+
TEST_CONSTEXPR((match_m512d(_mm512_maskz_shuffle_pd(0xFF, ((__m512d)(__v8df){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}), ((__m512d)(__v8df){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}), 48), 1.0, 9.0, 3.0, 11.0, 6.0, 14.0, 7.0, 15.0)));
6744+
67416745
__m128d test_mm_sqrt_round_sd(__m128d __A, __m128d __B) {
67426746
// CHECK-LABEL: test_mm_sqrt_round_sd
67436747
// CHECK: call {{.*}}<2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 -1, i32 11)

clang/test/CodeGen/X86/avx512vl-builtins.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8933,9 +8933,14 @@ __m256 test_mm256_maskz_shuffle_ps(__mmask8 __U, __m256 __A, __m256 __B) {
89338933
// CHECK-LABEL: test_mm256_maskz_shuffle_ps
89348934
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 8, i32 8, i32 4, i32 5, i32 12, i32 12>
89358935
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
8936-
return _mm256_maskz_shuffle_ps(__U, __A, __B, 4);
8936+
return _mm256_maskz_shuffle_ps(__U, __A, __B, 4);
89378937
}
89388938

8939+
TEST_CONSTEXPR((match_m128d(_mm_maskz_shuffle_pd(0x3, ((__m128d)(__v2df){1.0, 2.0}), ((__m128d)(__v2df){3.0, 4.0}), 3), 2.0, 4.0)));
8940+
TEST_CONSTEXPR((match_m256d(_mm256_maskz_shuffle_pd(0xF, ((__m256d)(__v4df){1.0, 2.0, 3.0, 4.0}), ((__m256d)(__v4df){5.0, 6.0, 7.0, 8.0}), 15), 2.0, 6.0, 4.0, 8.0)));
8941+
TEST_CONSTEXPR((match_m128(_mm_maskz_shuffle_ps(0xF, ((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){5.0f, 6.0f, 7.0f, 8.0f}), 4), 1.0f, 2.0f, 5.0f, 5.0f)));
8942+
TEST_CONSTEXPR((match_m256(_mm256_maskz_shuffle_ps(0xFF, ((__m256)(__v8sf){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256)(__v8sf){9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}), 4), 1.0f, 2.0f, 9.0f, 9.0f, 5.0f, 6.0f, 13.0f, 13.0f)));
8943+
89398944
__m128d test_mm_rsqrt14_pd(__m128d __A) {
89408945
// CHECK-LABEL: test_mm_rsqrt14_pd
89418946
// CHECK: @llvm.x86.avx512.rsqrt14.pd.128

clang/test/CodeGen/X86/sse-builtins.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -736,6 +736,11 @@ __m128 test_mm_shuffle_ps(__m128 A, __m128 B) {
736736
return _mm_shuffle_ps(A, B, 0);
737737
}
738738

739+
TEST_CONSTEXPR((match_m128(_mm_shuffle_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){5.0f, 6.0f, 7.0f, 8.0f}), 4), 1.0f, 2.0f, 5.0f, 5.0f)));
740+
TEST_CONSTEXPR((match_m128(_mm_shuffle_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){5.0f, 6.0f, 7.0f, 8.0f}), 0), 1.0f, 1.0f, 5.0f, 5.0f)));
741+
TEST_CONSTEXPR((match_m128(_mm_shuffle_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){5.0f, 6.0f, 7.0f, 8.0f}), 255), 4.0f, 4.0f, 8.0f, 8.0f)));
742+
TEST_CONSTEXPR((match_m128(_mm_shuffle_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){5.0f, 6.0f, 7.0f, 8.0f}), 27), 4.0f, 3.0f, 6.0f, 5.0f)));
743+
739744
__m128 test_mm_sqrt_ps(__m128 x) {
740745
// CHECK-LABEL: test_mm_sqrt_ps
741746
// CHECK: call {{.*}}<4 x float> @llvm.sqrt.v4f32(<4 x float> {{.*}})

clang/test/CodeGen/X86/sse2-builtins.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1309,6 +1309,11 @@ __m128d test_mm_shuffle_pd(__m128d A, __m128d B) {
13091309
return _mm_shuffle_pd(A, B, 1);
13101310
}
13111311

1312+
TEST_CONSTEXPR((match_m128d(_mm_shuffle_pd(((__m128d)(__v2df){1.0, 2.0}), ((__m128d)(__v2df){3.0, 4.0}), 3), 2.0, 4.0)));
1313+
TEST_CONSTEXPR((match_m128d(_mm_shuffle_pd(((__m128d)(__v2df){1.0, 2.0}), ((__m128d)(__v2df){3.0, 4.0}), 0), 1.0, 3.0)));
1314+
TEST_CONSTEXPR((match_m128d(_mm_shuffle_pd(((__m128d)(__v2df){1.0, 2.0}), ((__m128d)(__v2df){3.0, 4.0}), 1), 2.0, 3.0)));
1315+
TEST_CONSTEXPR((match_m128d(_mm_shuffle_pd(((__m128d)(__v2df){1.0, 2.0}), ((__m128d)(__v2df){3.0, 4.0}), 2), 1.0, 4.0)));
1316+
13121317
__m128i test_mm_shufflehi_epi16(__m128i A) {
13131318
// CHECK-LABEL: test_mm_shufflehi_epi16
13141319
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>

0 commit comments

Comments
 (0)