Skip to content

Commit 50bcf68

Browse files
brandonxintbaederrRKSimon
authored
[X86][bytecode] Allow SSE/AVX BLEND imm intrinsics to be used in constexpr (#157776)
This marks the following builtins as constexpr, which allows their corresponding intrinsics to be used in constexprs. | Intrinsics | X86 Builtins | CPUID Flags | Header | | -------------------- | --------------------------- | ----------- | ----------- | | `_mm_blend_pd` | `__builtin_ia32_blendpd` | SSE4.1 | smmintrin.h | | `_mm256_blend_pd` | `__builtin_ia32_blendpd256` | AVX | immintrin.h | | `_mm_blend_ps` | `__builtin_ia32_blendps` | SSE4.1 | smmintrin.h | | `_mm256_blend_ps` | `__builtin_ia32_blendps256` | AVX | immintrin.h | | `_mm_blend_epi16` | `__builtin_ia32_pblendw128` | SSE4.1 | smmintrin.h | | `_mm256_blend_epi16` | `__builtin_ia32_pblendw256` | AVX2 | immintrin.h | | `_mm_blend_epi32` | `__builtin_ia32_pblendd128` | AVX2 | immintrin.h | | `_mm256_blend_epi32` | `__builtin_ia32_pblendd256` | AVX2 | immintrin.h | Fixes #157065 --------- Co-authored-by: Timm Baeder <[email protected]> Co-authored-by: Simon Pilgrim <[email protected]>
1 parent d8c8c67 commit 50bcf68

File tree

6 files changed

+112
-9
lines changed

6 files changed

+112
-9
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -312,9 +312,6 @@ let Features = "ssse3", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
312312

313313
let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
314314
def insertps128 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">;
315-
def pblendw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant int)">;
316-
def blendpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
317-
def blendps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">;
318315
def packusdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">;
319316
def roundps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant int)">;
320317
def roundss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">;
@@ -333,6 +330,9 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
333330
}
334331

335332
let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
333+
def pblendw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant int)">;
334+
def blendpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
335+
def blendps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">;
336336
def blendvpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>)">;
337337
def blendvps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>)">;
338338
def pblendvb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Vector<16, char>)">;
@@ -469,8 +469,6 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in
469469
def vpermilvarps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, int>)">;
470470
def vpermilvarpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, long long int>)">;
471471
def vpermilvarps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">;
472-
def blendpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int)">;
473-
def blendps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">;
474472
def shufpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int)">;
475473
def shufps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">;
476474
def dpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
@@ -495,6 +493,8 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
495493
}
496494

497495
let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
496+
def blendpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int)">;
497+
def blendps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">;
498498
def blendvpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, double>)">;
499499
def blendvps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>)">;
500500
}
@@ -575,7 +575,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
575575
def packuswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, _Vector<16, short>)">;
576576
def packusdw256 : X86Builtin<"_Vector<16, short>(_Vector<8, int>, _Vector<8, int>)">;
577577
def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant int)">;
578-
def pblendw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Constant int)">;
579578
def phaddw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
580579
def phaddd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
581580
def phaddsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
@@ -604,8 +603,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
604603
def psrlw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
605604
def psrld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
606605
def psrlq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
607-
def pblendd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">;
608-
def pblendd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">;
609606
def permvarsi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
610607
def permdf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">;
611608
def permvarsf256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">;
@@ -619,6 +616,10 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
619616
def pavgb256 : X86Builtin<"_Vector<32, unsigned char>(_Vector<32, unsigned char>, _Vector<32, unsigned char>)">;
620617
def pavgw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned short>, _Vector<16, unsigned short>)">;
621618

619+
def pblendd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">;
620+
def pblendd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">;
621+
def pblendw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Constant int)">;
622+
622623
def pblendvb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Vector<32, char>)">;
623624

624625
def pmuldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2837,6 +2837,40 @@ static bool interp__builtin_select(InterpState &S, CodePtr OpPC,
28372837
return true;
28382838
}
28392839

2840+
static bool interp__builtin_blend(InterpState &S, CodePtr OpPC,
2841+
const CallExpr *Call) {
2842+
PrimType MaskT = *S.getContext().classify(Call->getArg(2));
2843+
APSInt Mask = popToAPSInt(S.Stk, MaskT);
2844+
const Pointer &TrueVec = S.Stk.pop<Pointer>();
2845+
const Pointer &FalseVec = S.Stk.pop<Pointer>();
2846+
const Pointer &Dst = S.Stk.peek<Pointer>();
2847+
2848+
assert(FalseVec.getNumElems() == TrueVec.getNumElems());
2849+
assert(FalseVec.getNumElems() == Dst.getNumElems());
2850+
unsigned NumElems = FalseVec.getNumElems();
2851+
PrimType ElemT = FalseVec.getFieldDesc()->getPrimType();
2852+
PrimType DstElemT = Dst.getFieldDesc()->getPrimType();
2853+
2854+
for (unsigned I = 0; I != NumElems; ++I) {
2855+
bool MaskBit = Mask[I % 8];
2856+
if (ElemT == PT_Float) {
2857+
assert(DstElemT == PT_Float);
2858+
Dst.elem<Floating>(I) =
2859+
MaskBit ? TrueVec.elem<Floating>(I) : FalseVec.elem<Floating>(I);
2860+
} else {
2861+
assert(DstElemT == ElemT);
2862+
INT_TYPE_SWITCH_NO_BOOL(DstElemT, {
2863+
Dst.elem<T>(I) =
2864+
static_cast<T>(MaskBit ? TrueVec.elem<T>(I).toAPSInt()
2865+
: FalseVec.elem<T>(I).toAPSInt());
2866+
});
2867+
}
2868+
}
2869+
Dst.initializeAllElements();
2870+
2871+
return true;
2872+
}
2873+
28402874
static bool interp__builtin_elementwise_triop(
28412875
InterpState &S, CodePtr OpPC, const CallExpr *Call,
28422876
llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &)>
@@ -3502,6 +3536,16 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
35023536
return llvm::APIntOps::fshr(Hi, Lo, Amt);
35033537
});
35043538

3539+
case clang::X86::BI__builtin_ia32_blendpd:
3540+
case clang::X86::BI__builtin_ia32_blendpd256:
3541+
case clang::X86::BI__builtin_ia32_blendps:
3542+
case clang::X86::BI__builtin_ia32_blendps256:
3543+
case clang::X86::BI__builtin_ia32_pblendw128:
3544+
case clang::X86::BI__builtin_ia32_pblendw256:
3545+
case clang::X86::BI__builtin_ia32_pblendd128:
3546+
case clang::X86::BI__builtin_ia32_pblendd256:
3547+
return interp__builtin_blend(S, OpPC, Call);
3548+
35053549
case clang::X86::BI__builtin_ia32_blendvpd:
35063550
case clang::X86::BI__builtin_ia32_blendvpd256:
35073551
case clang::X86::BI__builtin_ia32_blendvps:

clang/lib/AST/ExprConstant.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11926,6 +11926,33 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1192611926

1192711927
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
1192811928
}
11929+
case X86::BI__builtin_ia32_blendpd:
11930+
case X86::BI__builtin_ia32_blendpd256:
11931+
case X86::BI__builtin_ia32_blendps:
11932+
case X86::BI__builtin_ia32_blendps256:
11933+
case X86::BI__builtin_ia32_pblendw128:
11934+
case X86::BI__builtin_ia32_pblendw256:
11935+
case X86::BI__builtin_ia32_pblendd128:
11936+
case X86::BI__builtin_ia32_pblendd256: {
11937+
APValue SourceF, SourceT, SourceC;
11938+
if (!EvaluateAsRValue(Info, E->getArg(0), SourceF) ||
11939+
!EvaluateAsRValue(Info, E->getArg(1), SourceT) ||
11940+
!EvaluateAsRValue(Info, E->getArg(2), SourceC))
11941+
return false;
11942+
11943+
const APInt &C = SourceC.getInt();
11944+
unsigned SourceLen = SourceF.getVectorLength();
11945+
SmallVector<APValue, 32> ResultElements;
11946+
ResultElements.reserve(SourceLen);
11947+
for (unsigned EltNum = 0; EltNum != SourceLen; ++EltNum) {
11948+
const APValue &F = SourceF.getVectorElt(EltNum);
11949+
const APValue &T = SourceT.getVectorElt(EltNum);
11950+
ResultElements.push_back(C[EltNum % 8] ? T : F);
11951+
}
11952+
11953+
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
11954+
}
11955+
1192911956
case X86::BI__builtin_ia32_blendvpd:
1193011957
case X86::BI__builtin_ia32_blendvpd256:
1193111958
case X86::BI__builtin_ia32_blendvps:

clang/test/CodeGen/X86/avx-builtins.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,12 +87,20 @@ __m256d test_mm256_blend_pd(__m256d A, __m256d B) {
8787
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
8888
return _mm256_blend_pd(A, B, 0x05);
8989
}
90+
TEST_CONSTEXPR(match_m256d(_mm256_blend_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), 0x00), 1.0, 2.0, 3.0, 4.0));
91+
TEST_CONSTEXPR(match_m256d(_mm256_blend_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), 0x05), 5.0, 2.0, 7.0, 4.0));
92+
TEST_CONSTEXPR(match_m256d(_mm256_blend_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), 0x0A), 1.0, 6.0, 3.0, 8.0));
93+
TEST_CONSTEXPR(match_m256d(_mm256_blend_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m256d){5.0, 6.0, 7.0, 8.0}), 0x0F), 5.0, 6.0, 7.0, 8.0));
9094

9195
__m256 test_mm256_blend_ps(__m256 A, __m256 B) {
9296
// CHECK-LABEL: test_mm256_blend_ps
9397
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 13, i32 6, i32 7>
9498
return _mm256_blend_ps(A, B, 0x35);
9599
}
100+
TEST_CONSTEXPR(match_m256(_mm256_blend_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){-1.0f, -2.0f, -3.0f, -4.0f, -5.0f, -6.0f, -7.0f, -8.0f}), 0x00), 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f));
101+
TEST_CONSTEXPR(match_m256(_mm256_blend_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){-1.0f, -2.0f, -3.0f, -4.0f, -5.0f, -6.0f, -7.0f, -8.0f}), 0x35), -1.0f, 2.0f, -3.0f, 4.0f, -5.0f, -6.0f, 7.0f, 8.0f));
102+
TEST_CONSTEXPR(match_m256(_mm256_blend_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){-1.0f, -2.0f, -3.0f, -4.0f, -5.0f, -6.0f, -7.0f, -8.0f}), 0xAA), 1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f));
103+
TEST_CONSTEXPR(match_m256(_mm256_blend_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m256){-1.0f, -2.0f, -3.0f, -4.0f, -5.0f, -6.0f, -7.0f, -8.0f}), 0xFF), -1.0f, -2.0f, -3.0f, -4.0f, -5.0f, -6.0f, -7.0f, -8.0f));
96104

97105
__m256d test_mm256_blendv_pd(__m256d V1, __m256d V2, __m256d V3) {
98106
// CHECK-LABEL: test_mm256_blendv_pd

clang/test/CodeGen/X86/avx2-builtins.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,20 +146,32 @@ __m256i test_mm256_blend_epi16(__m256i a, __m256i b) {
146146
// CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i32> <i32 0, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 25, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
147147
return _mm256_blend_epi16(a, b, 2);
148148
}
149+
TEST_CONSTEXPR(match_v16hi(_mm256_blend_epi16(((__m256i)(__v16hi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}), ((__m256i)(__v16hi){-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16}), 0x00), 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16));
150+
TEST_CONSTEXPR(match_v16hi(_mm256_blend_epi16(((__m256i)(__v16hi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}), ((__m256i)(__v16hi){-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16}), 0x5A), 1,-2,3,-4,-5,6,-7,8,9,-10,11,-12,-13,14,-15,16));
151+
TEST_CONSTEXPR(match_v16hi(_mm256_blend_epi16(((__m256i)(__v16hi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}), ((__m256i)(__v16hi){-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16}), 0x94), 1,2,-3,4,-5,6,7,-8,9,10,-11,12,-13,14,15,-16));
152+
TEST_CONSTEXPR(match_v16hi(_mm256_blend_epi16(((__m256i)(__v16hi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}), ((__m256i)(__v16hi){-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16}), 0xFF), -1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16));
149153

150154
__m128i test_mm_blend_epi32(__m128i a, __m128i b) {
151155
// CHECK-LABEL: test_mm_blend_epi32
152156
// CHECK-NOT: @llvm.x86.avx2.pblendd.128
153157
// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
154158
return _mm_blend_epi32(a, b, 0x05);
155159
}
160+
TEST_CONSTEXPR(match_v4si(_mm_blend_epi32(((__m128i)(__v4si){1,2,3,4}), ((__m128i)(__v4si){-1,-2,-3,-4}), 0x0), 1,2,3,4));
161+
TEST_CONSTEXPR(match_v4si(_mm_blend_epi32(((__m128i)(__v4si){1,2,3,4}), ((__m128i)(__v4si){-1,-2,-3,-4}), 0x5), -1,2,-3,4));
162+
TEST_CONSTEXPR(match_v4si(_mm_blend_epi32(((__m128i)(__v4si){1,2,3,4}), ((__m128i)(__v4si){-1,-2,-3,-4}), 0xA), 1,-2,3,-4));
163+
TEST_CONSTEXPR(match_v4si(_mm_blend_epi32(((__m128i)(__v4si){1,2,3,4}), ((__m128i)(__v4si){-1,-2,-3,-4}), 0xF), -1,-2,-3,-4));
156164

157165
__m256i test_mm256_blend_epi32(__m256i a, __m256i b) {
158166
// CHECK-LABEL: test_mm256_blend_epi32
159167
// CHECK-NOT: @llvm.x86.avx2.pblendd.256
160168
// CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 13, i32 6, i32 7>
161169
return _mm256_blend_epi32(a, b, 0x35);
162170
}
171+
TEST_CONSTEXPR(match_v8si(_mm256_blend_epi32(((__m256i)(__v8si){1,2,3,4,5,6,7,8}), ((__m256i)(__v8si){-1,-2,-3,-4,-5,-6,-7,-8}), 0x00), 1,2,3,4,5,6,7,8));
172+
TEST_CONSTEXPR(match_v8si(_mm256_blend_epi32(((__m256i)(__v8si){1,2,3,4,5,6,7,8}), ((__m256i)(__v8si){-1,-2,-3,-4,-5,-6,-7,-8}), 0xA5), -1,2,-3,4,5,-6,7,-8));
173+
TEST_CONSTEXPR(match_v8si(_mm256_blend_epi32(((__m256i)(__v8si){1,2,3,4,5,6,7,8}), ((__m256i)(__v8si){-1,-2,-3,-4,-5,-6,-7,-8}), 0x94), 1,2,-3,4,-5,6,7,-8));
174+
TEST_CONSTEXPR(match_v8si(_mm256_blend_epi32(((__m256i)(__v8si){1,2,3,4,5,6,7,8}), ((__m256i)(__v8si){-1,-2,-3,-4,-5,-6,-7,-8}), 0xFF), -1,-2,-3,-4,-5,-6,-7,-8));
163175

164176
__m256i test_mm256_blendv_epi8(__m256i a, __m256i b, __m256i m) {
165177
// CHECK-LABEL: test_mm256_blendv_epi8

clang/test/CodeGen/X86/sse41-builtins.c

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,18 +27,30 @@ __m128i test_mm_blend_epi16(__m128i V1, __m128i V2) {
2727
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 7>
2828
return _mm_blend_epi16(V1, V2, 42);
2929
}
30+
TEST_CONSTEXPR(match_v8hi(_mm_blend_epi16(((__m128i)(__v8hi){1,2,3,4,5,6,7,8}),((__m128i)(__v8hi){-1,-2,-3,-4,-5,-6,-7,-8}),0x00),1,2,3,4,5,6,7,8));
31+
TEST_CONSTEXPR(match_v8hi(_mm_blend_epi16(((__m128i)(__v8hi){1,2,3,4,5,6,7,8}),((__m128i)(__v8hi){-1,-2,-3,-4,-5,-6,-7,-8}),0x5A),1,-2,3,-4,-5,6,-7,8));
32+
TEST_CONSTEXPR(match_v8hi(_mm_blend_epi16(((__m128i)(__v8hi){1,2,3,4,5,6,7,8}),((__m128i)(__v8hi){-1,-2,-3,-4,-5,-6,-7,-8}),0x94),1,2,-3,4,-5,6,7,-8));
33+
TEST_CONSTEXPR(match_v8hi(_mm_blend_epi16(((__m128i)(__v8hi){1,2,3,4,5,6,7,8}),((__m128i)(__v8hi){-1,-2,-3,-4,-5,-6,-7,-8}),0xFF),-1,-2,-3,-4,-5,-6,-7,-8));
3034

3135
__m128d test_mm_blend_pd(__m128d V1, __m128d V2) {
3236
// CHECK-LABEL: test_mm_blend_pd
3337
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 0, i32 3>
3438
return _mm_blend_pd(V1, V2, 2);
3539
}
40+
TEST_CONSTEXPR(match_m128d(_mm_blend_pd(((__m128d){1.0, 2.0}), ((__m128d){3.0, 4.0}), 0), 1.0, 2.0));
41+
TEST_CONSTEXPR(match_m128d(_mm_blend_pd(((__m128d){1.0, 2.0}), ((__m128d){3.0, 4.0}), 1), 3.0, 2.0));
42+
TEST_CONSTEXPR(match_m128d(_mm_blend_pd(((__m128d){1.0, 2.0}), ((__m128d){3.0, 4.0}), 2), 1.0, 4.0));
43+
TEST_CONSTEXPR(match_m128d(_mm_blend_pd(((__m128d){1.0, 2.0}), ((__m128d){3.0, 4.0}), 3), 3.0, 4.0));
3644

3745
__m128 test_mm_blend_ps(__m128 V1, __m128 V2) {
3846
// CHECK-LABEL: test_mm_blend_ps
3947
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
4048
return _mm_blend_ps(V1, V2, 6);
4149
}
50+
TEST_CONSTEXPR(match_m128(_mm_blend_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){5.0f, 6.0f, 7.0f, 8.0f}), 0x0), 1.0f, 2.0f, 3.0f, 4.0f));
51+
TEST_CONSTEXPR(match_m128(_mm_blend_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){5.0f, 6.0f, 7.0f, 8.0f}), 0x5), 5.0f, 2.0f, 7.0f, 4.0f));
52+
TEST_CONSTEXPR(match_m128(_mm_blend_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){5.0f, 6.0f, 7.0f, 8.0f}), 0xA), 1.0f, 6.0f, 3.0f, 8.0f));
53+
TEST_CONSTEXPR(match_m128(_mm_blend_ps(((__m128){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128){5.0f, 6.0f, 7.0f, 8.0f}), 0xF), 5.0f, 6.0f, 7.0f, 8.0f));
4254

4355
__m128i test_mm_blendv_epi8(__m128i V1, __m128i V2, __m128i V3) {
4456
// CHECK-LABEL: test_mm_blendv_epi8
@@ -459,4 +471,3 @@ int test_mm_testz_si128(__m128i x, __m128i y) {
459471
// CHECK: call {{.*}}i32 @llvm.x86.sse41.ptestz(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
460472
return _mm_testz_si128(x, y);
461473
}
462-

0 commit comments

Comments
 (0)