Skip to content

Commit 1f22c12

Browse files
committed
[Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - Allow shufps/pd shuffles intrinsics to be used in constexpr
* A generic shuffle helper function is introduced to reduce code duplication and facilitate future extensions to other shuffle intrinsics Resolves #161208
1 parent 11a24d6 commit 1f22c12

File tree

8 files changed

+226
-7
lines changed

8 files changed

+226
-7
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -198,14 +198,17 @@ let Features = "sse", Header = "xmmintrin.h", Attributes = [NoThrow, RequireDecl
198198
def _mm_sfence : X86LibBuiltin<"void()">;
199199
}
200200

201+
let Features = "sse", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
202+
def shufps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">;
203+
}
204+
201205
let Features = "sse", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
202206
def rcpps : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
203207
def rcpss : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
204208
def rsqrtps : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
205209
def rsqrtss : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
206210
def sqrtps : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
207211
def sqrtss : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
208-
def shufps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">;
209212
}
210213

211214
let Features = "sse2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
@@ -222,13 +225,13 @@ let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
222225
def pshufhw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int)">;
223226
def movmskpd : X86Builtin<"int(_Vector<2, double>)">;
224227
def pmovmskb128 : X86Builtin<"int(_Vector<16, char>)">;
228+
def shufpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
225229
}
226230

227231
let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
228232
def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>, _Vector<16, char>)">;
229233
def sqrtpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
230234
def sqrtsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
231-
def shufpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
232235
def cvtpd2dq : X86Builtin<"_Vector<2, long long int>(_Vector<2, double>)">;
233236
def cvtpd2ps : X86Builtin<"_Vector<4, float>(_Vector<2, double>)">;
234237
def cvttpd2dq : X86Builtin<"_Vector<4, int>(_Vector<2, double>)">;
@@ -487,13 +490,16 @@ let Features = "avx512f,vpclmulqdq", Attributes = [NoThrow, Const, RequiredVecto
487490
def pclmulqdq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant char)">;
488491
}
489492

493+
let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
494+
def shufpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int)">;
495+
def shufps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">;
496+
}
497+
490498
let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
491499
def vpermilvarpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, long long int>)">;
492500
def vpermilvarps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, int>)">;
493501
def vpermilvarpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, long long int>)">;
494502
def vpermilvarps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">;
495-
def shufpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int)">;
496-
def shufps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">;
497503
def dpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
498504
def cmppd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant char)">;
499505
def cmpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
@@ -2463,6 +2469,9 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>
24632469
def shuf_f64x2 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Constant int)">;
24642470
def shuf_i32x4 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Constant int)">;
24652471
def shuf_i64x2 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int)">;
2472+
}
2473+
2474+
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
24662475
def shufpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Constant int)">;
24672476
def shufps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, float>, _Constant int)">;
24682477
}

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3279,6 +3279,65 @@ static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC,
32793279
return true;
32803280
}
32813281

3282+
static bool interp__builtin_ia32_shuffle_generic(
3283+
InterpState &S, CodePtr OpPC, const CallExpr *Call,
3284+
llvm::function_ref<std::pair<unsigned, unsigned>(unsigned, unsigned,
3285+
unsigned)>
3286+
GetSourceIndex,
3287+
bool IsSingleSrc) {
3288+
3289+
assert(Call->getNumArgs() == (IsSingleSrc ? 2 : 3));
3290+
QualType MaskType = Call->getArg(IsSingleSrc ? 1 : 2)->getType();
3291+
bool IsMaskVector = MaskType->isVectorType();
3292+
3293+
unsigned ShuffleMask = 0;
3294+
Pointer MaskPtr;
3295+
if (IsMaskVector) {
3296+
MaskPtr = S.Stk.pop<Pointer>();
3297+
} else {
3298+
ShuffleMask =
3299+
popToAPSInt(S, Call->getArg(IsSingleSrc ? 1 : 2)).getZExtValue();
3300+
}
3301+
3302+
QualType Arg0Type = Call->getArg(0)->getType();
3303+
const auto *VecT = Arg0Type->castAs<VectorType>();
3304+
PrimType ElemT = *S.getContext().classify(VecT->getElementType());
3305+
unsigned NumElems = VecT->getNumElements();
3306+
unsigned ElemWidth = S.getContext().getBitWidth(VecT->getElementType());
3307+
3308+
Pointer Aptr;
3309+
Pointer Bptr;
3310+
if (IsSingleSrc) {
3311+
Aptr = S.Stk.pop<Pointer>();
3312+
Bptr = Aptr;
3313+
} else {
3314+
Bptr = S.Stk.pop<Pointer>();
3315+
Aptr = S.Stk.pop<Pointer>();
3316+
}
3317+
3318+
const Pointer &A = Aptr;
3319+
const Pointer &B = Bptr;
3320+
const Pointer &Dst = S.Stk.peek<Pointer>();
3321+
3322+
for (unsigned DstIdx = 0; DstIdx != NumElems; ++DstIdx) {
3323+
if (IsMaskVector) {
3324+
uint8_t Control = static_cast<uint8_t>(MaskPtr.elem<uint8_t>(DstIdx));
3325+
if (Control & 0x80) {
3326+
TYPE_SWITCH(ElemT, { Dst.elem<T>(DstIdx) = T(); });
3327+
continue;
3328+
}
3329+
ShuffleMask = Control;
3330+
}
3331+
3332+
auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask, ElemWidth);
3333+
const Pointer &Src = (SrcVecIdx == 0) ? A : B;
3334+
TYPE_SWITCH(ElemT, { Dst.elem<T>(DstIdx) = Src.elem<T>(SrcIdx); });
3335+
}
3336+
Dst.initializeAllElements();
3337+
3338+
return true;
3339+
}
3340+
32823341
bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
32833342
uint32_t BuiltinID) {
32843343
if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
@@ -4191,6 +4250,31 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
41914250
case X86::BI__builtin_ia32_selectpd_512:
41924251
return interp__builtin_select(S, OpPC, Call);
41934252

4253+
case X86::BI__builtin_ia32_shufps:
4254+
case X86::BI__builtin_ia32_shufps256:
4255+
case X86::BI__builtin_ia32_shufps512:
4256+
case X86::BI__builtin_ia32_shufpd:
4257+
case X86::BI__builtin_ia32_shufpd256:
4258+
case X86::BI__builtin_ia32_shufpd512:
4259+
return interp__builtin_ia32_shuffle_generic(
4260+
S, OpPC, Call,
4261+
[](unsigned DstIdx, unsigned ShuffleMask, unsigned ElementSize) {
4262+
unsigned NumElemPerLane = 128 / ElementSize;
4263+
unsigned NumSelectableElems = NumElemPerLane / 2;
4264+
unsigned BitsPerElem = NumSelectableElems == 1 ? 1 : 2;
4265+
unsigned IndexMask = BitsPerElem == 2 ? 0x3 : 0x1;
4266+
unsigned MaskBits = 8;
4267+
4268+
unsigned Lane = DstIdx / NumElemPerLane;
4269+
unsigned ElemInLane = DstIdx % NumElemPerLane;
4270+
unsigned LaneOffset = Lane * NumElemPerLane;
4271+
unsigned SrcIdx = ElemInLane >= NumSelectableElems ? 1 : 0;
4272+
unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
4273+
unsigned Index = (ShuffleMask >> BitIndex) & IndexMask;
4274+
return std::pair<unsigned, unsigned>{SrcIdx, LaneOffset + Index};
4275+
},
4276+
false);
4277+
41944278
case X86::BI__builtin_ia32_pshufb128:
41954279
case X86::BI__builtin_ia32_pshufb256:
41964280
case X86::BI__builtin_ia32_pshufb512:

clang/lib/AST/ExprConstant.cpp

Lines changed: 104 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11618,6 +11618,99 @@ static bool evalPackBuiltin(const CallExpr *E, EvalInfo &Info, APValue &Result,
1161811618
return true;
1161911619
}
1162011620

11621+
static bool evalShuffleGeneric(EvalInfo &Info, const CallExpr *Call,
11622+
APValue &Out,
11623+
llvm::function_ref<std::pair<unsigned, unsigned>(
11624+
unsigned, unsigned, unsigned)>
11625+
GetSourceIndex,
11626+
bool IsSingleSrc) {
11627+
11628+
const auto *VT = Call->getType()->getAs<VectorType>();
11629+
if (!VT)
11630+
return false;
11631+
11632+
QualType MaskType = Call->getArg(IsSingleSrc ? 1 : 2)->getType();
11633+
bool IsMaskVector = MaskType->isVectorType();
11634+
11635+
unsigned ShuffleMask = 0;
11636+
APValue MaskVec;
11637+
if (IsMaskVector) {
11638+
if (!EvaluateAsRValue(Info, Call->getArg(IsSingleSrc ? 1 : 2), MaskVec))
11639+
return false;
11640+
} else {
11641+
APSInt MaskImm;
11642+
if (!EvaluateInteger(Call->getArg(IsSingleSrc ? 1 : 2), MaskImm, Info))
11643+
return false;
11644+
ShuffleMask = static_cast<unsigned>(MaskImm.getZExtValue());
11645+
}
11646+
11647+
APValue A, B;
11648+
if (IsSingleSrc) {
11649+
if (!EvaluateAsRValue(Info, Call->getArg(0), A))
11650+
return false;
11651+
B = A;
11652+
} else {
11653+
if (!EvaluateAsRValue(Info, Call->getArg(0), A) ||
11654+
!EvaluateAsRValue(Info, Call->getArg(1), B))
11655+
return false;
11656+
}
11657+
11658+
QualType ElemT = VT->getElementType();
11659+
unsigned ElemBits = Info.Ctx.getTypeSize(ElemT);
11660+
unsigned NumElts = VT->getNumElements();
11661+
11662+
SmallVector<APValue, 16> ResultElements;
11663+
ResultElements.reserve(NumElts);
11664+
11665+
for (unsigned DstIdx = 0; DstIdx != NumElts; ++DstIdx) {
11666+
if (IsMaskVector) {
11667+
APValue CtlVal = MaskVec.getVectorElt(DstIdx);
11668+
APSInt CtlByte = CtlVal.getInt();
11669+
uint8_t Control = static_cast<uint8_t>(CtlByte.getZExtValue());
11670+
11671+
if (Control & 0x80) {
11672+
APValue Zero(Info.Ctx.MakeIntValue(0, ElemT));
11673+
ResultElements.push_back(Zero);
11674+
continue;
11675+
}
11676+
ShuffleMask = Control;
11677+
}
11678+
11679+
auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask, ElemBits);
11680+
const APValue &Src = (SrcVecIdx == 0) ? A : B;
11681+
ResultElements.push_back(Src.getVectorElt(SrcIdx));
11682+
}
11683+
11684+
Out = APValue(ResultElements.data(), ResultElements.size());
11685+
return true;
11686+
}
11687+
11688+
static bool evalShufpspdBuiltin(EvalInfo &Info, const CallExpr *Call,
11689+
APValue &Out) {
11690+
return evalShuffleGeneric(
11691+
Info, Call, Out,
11692+
[](unsigned DstIdx, unsigned ShuffleMask,
11693+
unsigned ElementSize) -> std::pair<unsigned, unsigned> {
11694+
constexpr unsigned LaneBits = 128u;
11695+
unsigned NumElemPerLane = LaneBits / ElementSize;
11696+
unsigned NumSelectableElems = NumElemPerLane / 2;
11697+
unsigned BitsPerElem = NumSelectableElems == 1 ? 1 : 2;
11698+
unsigned IndexMask = (1u << BitsPerElem) - 1;
11699+
unsigned MaskBits = 8;
11700+
11701+
unsigned Lane = DstIdx / NumElemPerLane;
11702+
unsigned ElemInLane = DstIdx % NumElemPerLane;
11703+
unsigned LaneOffset = Lane * NumElemPerLane;
11704+
11705+
unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
11706+
unsigned SrcIdx = (ElemInLane < NumSelectableElems) ? 0 : 1;
11707+
unsigned Index = (ShuffleMask >> BitIndex) & IndexMask;
11708+
11709+
return {SrcIdx, LaneOffset + Index};
11710+
},
11711+
false);
11712+
}
11713+
1162111714
static bool evalPshufbBuiltin(EvalInfo &Info, const CallExpr *Call,
1162211715
APValue &Out) {
1162311716
APValue SrcVec, ControlVec;
@@ -12383,7 +12476,17 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1238312476

1238412477
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
1238512478
}
12386-
12479+
case X86::BI__builtin_ia32_shufps:
12480+
case X86::BI__builtin_ia32_shufps256:
12481+
case X86::BI__builtin_ia32_shufps512:
12482+
case X86::BI__builtin_ia32_shufpd:
12483+
case X86::BI__builtin_ia32_shufpd256:
12484+
case X86::BI__builtin_ia32_shufpd512: {
12485+
APValue R;
12486+
if (!evalShufpspdBuiltin(Info, E, R))
12487+
return false;
12488+
return Success(R, E);
12489+
}
1238712490
case X86::BI__builtin_ia32_pshufb128:
1238812491
case X86::BI__builtin_ia32_pshufb256:
1238912492
case X86::BI__builtin_ia32_pshufb512: {

clang/test/CodeGen/X86/avx-builtins.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1891,12 +1891,16 @@ __m256d test_mm256_shuffle_pd(__m256d A, __m256d B) {
18911891
return _mm256_shuffle_pd(A, B, 0);
18921892
}
18931893

1894+
TEST_CONSTEXPR((match_m256d(_mm256_shuffle_pd(((__m256d)(__v4df){1.0, 2.0, 3.0, 4.0}), ((__m256d)(__v4df){5.0, 6.0, 7.0, 8.0}), 15), 2.0, 6.0, 4.0, 8.0)));
1895+
18941896
__m256 test_mm256_shuffle_ps(__m256 A, __m256 B) {
18951897
// CHECK-LABEL: test_mm256_shuffle_ps
18961898
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 0, i32 8, i32 8, i32 4, i32 4, i32 12, i32 12>
18971899
return _mm256_shuffle_ps(A, B, 0);
18981900
}
18991901

1902+
TEST_CONSTEXPR((match_m256(_mm256_shuffle_ps(((__m256)(__v8sf){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256)(__v8sf){9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}), 4), 1.0f, 2.0f, 9.0f, 9.0f, 5.0f, 6.0f, 13.0f, 13.0f)));
1903+
19001904
__m256d test_mm256_sqrt_pd(__m256d A) {
19011905
// CHECK-LABEL: test_mm256_sqrt_pd
19021906
// CHECK: call {{.*}}<4 x double> @llvm.sqrt.v4f64(<4 x double> %{{.*}})

clang/test/CodeGen/X86/avx512f-builtins.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6741,9 +6741,13 @@ __m512 test_mm512_maskz_shuffle_ps(__mmask16 __U, __m512 __M, __m512 __V) {
67416741
// CHECK-LABEL: test_mm512_maskz_shuffle_ps
67426742
// CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 4, i32 5, i32 20, i32 20, i32 8, i32 9, i32 24, i32 24, i32 12, i32 13, i32 28, i32 28>
67436743
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
6744-
return _mm512_maskz_shuffle_ps(__U, __M, __V, 4);
6744+
return _mm512_maskz_shuffle_ps(__U, __M, __V, 4);
67456745
}
67466746

6747+
TEST_CONSTEXPR((match_m512(_mm512_shuffle_ps(((__m512)(__v16sf){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}), ((__m512)(__v16sf){17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, 32.0f}), 4), 1.0f, 2.0f, 17.0f, 17.0f, 5.0f, 6.0f, 21.0f, 21.0f, 9.0f, 10.0f, 25.0f, 25.0f, 13.0f, 14.0f, 29.0f, 29.0f)));
6748+
TEST_CONSTEXPR((match_m512d(_mm512_shuffle_pd(((__m512d)(__v8df){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}), ((__m512d)(__v8df){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}), 48), 1.0, 9.0, 3.0, 11.0, 6.0, 14.0, 7.0, 15.0)));
6749+
TEST_CONSTEXPR((match_m512d(_mm512_maskz_shuffle_pd(0xFF, ((__m512d)(__v8df){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}), ((__m512d)(__v8df){9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0}), 48), 1.0, 9.0, 3.0, 11.0, 6.0, 14.0, 7.0, 15.0)));
6750+
67476751
__m128d test_mm_sqrt_round_sd(__m128d __A, __m128d __B) {
67486752
// CHECK-LABEL: test_mm_sqrt_round_sd
67496753
// CHECK: call {{.*}}<2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 -1, i32 11)

clang/test/CodeGen/X86/avx512vl-builtins.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8933,9 +8933,14 @@ __m256 test_mm256_maskz_shuffle_ps(__mmask8 __U, __m256 __A, __m256 __B) {
89338933
// CHECK-LABEL: test_mm256_maskz_shuffle_ps
89348934
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 8, i32 8, i32 4, i32 5, i32 12, i32 12>
89358935
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
8936-
return _mm256_maskz_shuffle_ps(__U, __A, __B, 4);
8936+
return _mm256_maskz_shuffle_ps(__U, __A, __B, 4);
89378937
}
89388938

8939+
TEST_CONSTEXPR((match_m128d(_mm_maskz_shuffle_pd(0x3, ((__m128d)(__v2df){1.0, 2.0}), ((__m128d)(__v2df){3.0, 4.0}), 3), 2.0, 4.0)));
8940+
TEST_CONSTEXPR((match_m256d(_mm256_maskz_shuffle_pd(0xF, ((__m256d)(__v4df){1.0, 2.0, 3.0, 4.0}), ((__m256d)(__v4df){5.0, 6.0, 7.0, 8.0}), 15), 2.0, 6.0, 4.0, 8.0)));
8941+
TEST_CONSTEXPR((match_m128(_mm_maskz_shuffle_ps(0xF, ((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){5.0f, 6.0f, 7.0f, 8.0f}), 4), 1.0f, 2.0f, 5.0f, 5.0f)));
8942+
TEST_CONSTEXPR((match_m256(_mm256_maskz_shuffle_ps(0xFF, ((__m256)(__v8sf){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256)(__v8sf){9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}), 4), 1.0f, 2.0f, 9.0f, 9.0f, 5.0f, 6.0f, 13.0f, 13.0f)));
8943+
89398944
__m128d test_mm_rsqrt14_pd(__m128d __A) {
89408945
// CHECK-LABEL: test_mm_rsqrt14_pd
89418946
// CHECK: @llvm.x86.avx512.rsqrt14.pd.128

clang/test/CodeGen/X86/sse-builtins.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -738,6 +738,11 @@ __m128 test_mm_shuffle_ps(__m128 A, __m128 B) {
738738
return _mm_shuffle_ps(A, B, 0);
739739
}
740740

741+
TEST_CONSTEXPR((match_m128(_mm_shuffle_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){5.0f, 6.0f, 7.0f, 8.0f}), 4), 1.0f, 2.0f, 5.0f, 5.0f)));
742+
TEST_CONSTEXPR((match_m128(_mm_shuffle_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){5.0f, 6.0f, 7.0f, 8.0f}), 0), 1.0f, 1.0f, 5.0f, 5.0f)));
743+
TEST_CONSTEXPR((match_m128(_mm_shuffle_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){5.0f, 6.0f, 7.0f, 8.0f}), 255), 4.0f, 4.0f, 8.0f, 8.0f)));
744+
TEST_CONSTEXPR((match_m128(_mm_shuffle_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){5.0f, 6.0f, 7.0f, 8.0f}), 27), 4.0f, 3.0f, 6.0f, 5.0f)));
745+
741746
__m128 test_mm_sqrt_ps(__m128 x) {
742747
// CHECK-LABEL: test_mm_sqrt_ps
743748
// CHECK: call {{.*}}<4 x float> @llvm.sqrt.v4f32(<4 x float> {{.*}})

clang/test/CodeGen/X86/sse2-builtins.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1314,6 +1314,11 @@ __m128d test_mm_shuffle_pd(__m128d A, __m128d B) {
13141314
return _mm_shuffle_pd(A, B, 1);
13151315
}
13161316

1317+
TEST_CONSTEXPR((match_m128d(_mm_shuffle_pd(((__m128d)(__v2df){1.0, 2.0}), ((__m128d)(__v2df){3.0, 4.0}), 3), 2.0, 4.0)));
1318+
TEST_CONSTEXPR((match_m128d(_mm_shuffle_pd(((__m128d)(__v2df){1.0, 2.0}), ((__m128d)(__v2df){3.0, 4.0}), 0), 1.0, 3.0)));
1319+
TEST_CONSTEXPR((match_m128d(_mm_shuffle_pd(((__m128d)(__v2df){1.0, 2.0}), ((__m128d)(__v2df){3.0, 4.0}), 1), 2.0, 3.0)));
1320+
TEST_CONSTEXPR((match_m128d(_mm_shuffle_pd(((__m128d)(__v2df){1.0, 2.0}), ((__m128d)(__v2df){3.0, 4.0}), 2), 1.0, 4.0)));
1321+
13171322
__m128i test_mm_shufflehi_epi16(__m128i A) {
13181323
// CHECK-LABEL: test_mm_shufflehi_epi16
13191324
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>

0 commit comments

Comments
 (0)