Skip to content

Commit 9b24a8a

Browse files
authored
[X86][bytecode] Allow SSE/AVX PBLENDVB intrinsics to be used in constexpr (#157100)
BLENDV intrinsics use the signbit of the condition mask to select between the LHS (false) and RHS (true) operands First part of #157066 - the BLENDVPS/D requires floatbits hacking which I need to do some prep work for
1 parent 0532ac4 commit 9b24a8a

File tree

7 files changed

+38
-8
lines changed

7 files changed

+38
-8
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,6 @@ let Features = "ssse3", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
312312

313313
let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
314314
def insertps128 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">;
315-
def pblendvb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Vector<16, char>)">;
316315
def pblendw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant int)">;
317316
def blendpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
318317
def blendps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">;
@@ -336,6 +335,7 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
336335
}
337336

338337
let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
338+
def pblendvb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Vector<16, char>)">;
339339
def pmuldq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
340340
}
341341

@@ -573,7 +573,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
573573
def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant int)">;
574574
def pavgb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
575575
def pavgw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
576-
def pblendvb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Vector<32, char>)">;
577576
def pblendw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Constant int)">;
578577
def phaddw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
579578
def phaddd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
@@ -615,6 +614,8 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
615614
}
616615

617616
let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
617+
def pblendvb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Vector<32, char>)">;
618+
618619
def pmuldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
619620
def pmuludq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
620621

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3412,6 +3412,13 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
34123412
case Builtin::BI__builtin_elementwise_fma:
34133413
return interp__builtin_elementwise_fma(S, OpPC, Call);
34143414

3415+
case clang::X86::BI__builtin_ia32_pblendvb128:
3416+
case clang::X86::BI__builtin_ia32_pblendvb256:
3417+
return interp__builtin_elementwise_triop(
3418+
S, OpPC, Call, [](const APSInt &F, const APSInt &T, const APSInt &C) {
3419+
return ((APInt)C).isNegative() ? T : F;
3420+
});
3421+
34153422
case X86::BI__builtin_ia32_selectb_128:
34163423
case X86::BI__builtin_ia32_selectb_256:
34173424
case X86::BI__builtin_ia32_selectb_512:

clang/lib/AST/ExprConstant.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11995,6 +11995,28 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1199511995

1199611996
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
1199711997
}
11998+
case X86::BI__builtin_ia32_pblendvb128:
11999+
case X86::BI__builtin_ia32_pblendvb256: {
12000+
// SSE blendv by mask signbit: "Result = C[] < 0 ? T[] : F[]".
12001+
APValue SourceF, SourceT, SourceC;
12002+
if (!EvaluateAsRValue(Info, E->getArg(0), SourceF) ||
12003+
!EvaluateAsRValue(Info, E->getArg(1), SourceT) ||
12004+
!EvaluateAsRValue(Info, E->getArg(2), SourceC))
12005+
return false;
12006+
12007+
unsigned SourceLen = SourceF.getVectorLength();
12008+
SmallVector<APValue, 32> ResultElements;
12009+
ResultElements.reserve(SourceLen);
12010+
12011+
for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) {
12012+
const APValue &F = SourceF.getVectorElt(EltNum);
12013+
const APValue &T = SourceT.getVectorElt(EltNum);
12014+
APInt C = SourceC.getVectorElt(EltNum).getInt();
12015+
ResultElements.push_back(C.isNegative() ? T : F);
12016+
}
12017+
12018+
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
12019+
}
1199812020
case X86::BI__builtin_ia32_selectb_128:
1199912021
case X86::BI__builtin_ia32_selectb_256:
1200012022
case X86::BI__builtin_ia32_selectb_512:

clang/lib/Headers/avx2intrin.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -557,9 +557,8 @@ _mm256_avg_epu16(__m256i __a, __m256i __b)
557557
/// is 0, the byte is copied from \a __V1; otherwise, it is copied from
558558
/// \a __V2.
559559
/// \returns A 256-bit integer vector containing the result.
560-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
561-
_mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M)
562-
{
560+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
561+
_mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M) {
563562
return (__m256i)__builtin_ia32_pblendvb256((__v32qi)__V1, (__v32qi)__V2,
564563
(__v32qi)__M);
565564
}

clang/lib/Headers/smmintrin.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -493,9 +493,8 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_blendv_ps(__m128 __V1,
493493
/// position in the result. When a mask bit is 1, the corresponding 8-bit
494494
/// element in operand \a __V2 is copied to the same position in the result.
495495
/// \returns A 128-bit vector of [16 x i8] containing the copied values.
496-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_blendv_epi8(__m128i __V1,
497-
__m128i __V2,
498-
__m128i __M) {
496+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
497+
_mm_blendv_epi8(__m128i __V1, __m128i __V2, __m128i __M) {
499498
return (__m128i)__builtin_ia32_pblendvb128((__v16qi)__V1, (__v16qi)__V2,
500499
(__v16qi)__M);
501500
}

clang/test/CodeGen/X86/avx2-builtins.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ __m256i test_mm256_blendv_epi8(__m256i a, __m256i b, __m256i m) {
164164
// CHECK: call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}})
165165
return _mm256_blendv_epi8(a, b, m);
166166
}
167+
TEST_CONSTEXPR(match_v32qi(_mm256_blendv_epi8((__m256i)(__v32qs){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31},(__m256i)(__v32qs){-90,-91,-92,-93,-94,-95,-96,-97,-98,-99,-100,-101,-12,-13,-104,-105,-106,-107,-108,-109,-100,-101,-12,-13,-104,-105,-106,-107,-108,-109,-120,-121},(__m256i)(__v32qs){0,0,0,-1,0,-1,-1,0,0,0,-1,-1,0,-1,0,0,0,0,0,0,0,0,0,-1,-1,-1,0,0,0,0,0,-1}), 0, 1, 2, -93, 4, -95, -96, 7, 8, 9, -100, -101, 12, -13, 14, 15, 16, 17, 18, 19, 20, 21, 22, -13, -104, -105, 26, 27, 28, 29, 30, -121));
167168

168169
__m128i test_mm_broadcastb_epi8(__m128i a) {
169170
// CHECK-LABEL: test_mm_broadcastb_epi8

clang/test/CodeGen/X86/sse41-builtins.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ __m128i test_mm_blendv_epi8(__m128i V1, __m128i V2, __m128i V3) {
4545
// CHECK: call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
4646
return _mm_blendv_epi8(V1, V2, V3);
4747
}
48+
TEST_CONSTEXPR(match_v16qi(_mm_blendv_epi8((__m128i)(__v16qs){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15},(__m128i)(__v16qs){-99,-98,97,-96,-95,-94,-93,-92,-91,-90,-89,-88,-87,-86,-85,-84},(__m128i)(__v16qs){-1,-1,0,-1,0,0,0,0,0,-1,-1,-1,0,0,-1,0}), -99, -98, 2, -96, 4, 5, 6, 7, 8, -90, -89, -88, 12, 13, -85, 15));
4849

4950
__m128d test_mm_blendv_pd(__m128d V1, __m128d V2, __m128d V3) {
5051
// CHECK-LABEL: test_mm_blendv_pd

0 commit comments

Comments
 (0)