Skip to content

Commit 13d50c2

Browse files
authored
[X86][bytecode] Allow SSE/AVX BLENDVPD/PD intrinsics to be used in constexpr (#157126)
BLENDV intrinsics use the signbit of the condition mask to select between the LHS (false) and RHS (true) operands Fixes #157066
1 parent 719b92d commit 13d50c2

File tree

7 files changed

+36
-18
lines changed

7 files changed

+36
-18
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -315,8 +315,6 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
315315
def pblendw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant int)">;
316316
def blendpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
317317
def blendps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">;
318-
def blendvpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>)">;
319-
def blendvps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>)">;
320318
def packusdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">;
321319
def roundps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant int)">;
322320
def roundss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">;
@@ -335,7 +333,10 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
335333
}
336334

337335
let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
336+
def blendvpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Vector<2, double>)">;
337+
def blendvps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>)">;
338338
def pblendvb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Vector<16, char>)">;
339+
339340
def pmuldq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
340341
}
341342

@@ -470,8 +471,6 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in
470471
def vpermilvarps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">;
471472
def blendpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int)">;
472473
def blendps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">;
473-
def blendvpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, double>)">;
474-
def blendvps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>)">;
475474
def shufpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int)">;
476475
def shufps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">;
477476
def dpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
@@ -495,6 +494,11 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
495494
def vpermilps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant int)">;
496495
}
497496

497+
let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
498+
def blendvpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, double>)">;
499+
def blendvps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>)">;
500+
}
501+
498502
let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
499503
def vpermilpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">;
500504
def vpermilps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int)">;

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3421,6 +3421,15 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
34213421
return F;
34223422
});
34233423

3424+
case clang::X86::BI__builtin_ia32_blendvpd:
3425+
case clang::X86::BI__builtin_ia32_blendvpd256:
3426+
case clang::X86::BI__builtin_ia32_blendvps:
3427+
case clang::X86::BI__builtin_ia32_blendvps256:
3428+
return interp__builtin_elementwise_triop_fp(
3429+
S, OpPC, Call,
3430+
[](const APFloat &F, const APFloat &T, const APFloat &C,
3431+
llvm::RoundingMode) { return C.isNegative() ? T : F; });
3432+
34243433
case clang::X86::BI__builtin_ia32_pblendvb128:
34253434
case clang::X86::BI__builtin_ia32_pblendvb256:
34263435
return interp__builtin_elementwise_triop(

clang/lib/AST/ExprConstant.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11995,6 +11995,10 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1199511995

1199611996
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
1199711997
}
11998+
case X86::BI__builtin_ia32_blendvpd:
11999+
case X86::BI__builtin_ia32_blendvpd256:
12000+
case X86::BI__builtin_ia32_blendvps:
12001+
case X86::BI__builtin_ia32_blendvps256:
1199812002
case X86::BI__builtin_ia32_pblendvb128:
1199912003
case X86::BI__builtin_ia32_pblendvb256: {
1200012004
// SSE blendv by mask signbit: "Result = C[] < 0 ? T[] : F[]".
@@ -12011,8 +12015,9 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1201112015
for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) {
1201212016
const APValue &F = SourceF.getVectorElt(EltNum);
1201312017
const APValue &T = SourceT.getVectorElt(EltNum);
12014-
APInt C = SourceC.getVectorElt(EltNum).getInt();
12015-
ResultElements.push_back(C.isNegative() ? T : F);
12018+
const APValue &C = SourceC.getVectorElt(EltNum);
12019+
APInt M = C.isInt() ? (APInt)C.getInt() : C.getFloat().bitcastToAPInt();
12020+
ResultElements.push_back(M.isNegative() ? T : F);
1201612021
}
1201712022

1201812023
return Success(APValue(ResultElements.data(), ResultElements.size()), E);

clang/lib/Headers/avxintrin.h

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1402,9 +1402,8 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c)
14021402
/// 64-bit element in operand \a __b is copied to the same position in the
14031403
/// destination.
14041404
/// \returns A 256-bit vector of [4 x double] containing the copied values.
1405-
static __inline __m256d __DEFAULT_FN_ATTRS
1406-
_mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c)
1407-
{
1405+
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
1406+
_mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c) {
14081407
return (__m256d)__builtin_ia32_blendvpd256(
14091408
(__v4df)__a, (__v4df)__b, (__v4df)__c);
14101409
}
@@ -1430,9 +1429,8 @@ _mm256_blendv_pd(__m256d __a, __m256d __b, __m256d __c)
14301429
/// corresponding 32-bit element in operand \a __b is copied to the same
14311430
/// position in the destination.
14321431
/// \returns A 256-bit vector of [8 x float] containing the copied values.
1433-
static __inline __m256 __DEFAULT_FN_ATTRS
1434-
_mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
1435-
{
1432+
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
1433+
_mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c) {
14361434
return (__m256)__builtin_ia32_blendvps256(
14371435
(__v8sf)__a, (__v8sf)__b, (__v8sf)__c);
14381436
}

clang/lib/Headers/smmintrin.h

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -439,9 +439,8 @@
439439
/// position in the result. When a mask bit is 1, the corresponding 64-bit
440440
/// element in operand \a __V2 is copied to the same position in the result.
441441
/// \returns A 128-bit vector of [2 x double] containing the copied values.
442-
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_blendv_pd(__m128d __V1,
443-
__m128d __V2,
444-
__m128d __M) {
442+
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
443+
_mm_blendv_pd(__m128d __V1, __m128d __V2, __m128d __M) {
445444
return (__m128d)__builtin_ia32_blendvpd((__v2df)__V1, (__v2df)__V2,
446445
(__v2df)__M);
447446
}
@@ -466,9 +465,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_blendv_pd(__m128d __V1,
466465
/// position in the result. When a mask bit is 1, the corresponding 32-bit
467466
/// element in operand \a __V2 is copied to the same position in the result.
468467
/// \returns A 128-bit vector of [4 x float] containing the copied values.
469-
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_blendv_ps(__m128 __V1,
470-
__m128 __V2,
471-
__m128 __M) {
468+
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
469+
_mm_blendv_ps(__m128 __V1, __m128 __V2, __m128 __M) {
472470
return (__m128)__builtin_ia32_blendvps((__v4sf)__V1, (__v4sf)__V2,
473471
(__v4sf)__M);
474472
}

clang/test/CodeGen/X86/avx-builtins.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,12 +99,14 @@ __m256d test_mm256_blendv_pd(__m256d V1, __m256d V2, __m256d V3) {
9999
// CHECK: call {{.*}}<4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
100100
return _mm256_blendv_pd(V1, V2, V3);
101101
}
102+
TEST_CONSTEXPR(match_m256d(_mm256_blendv_pd((__m256d)(__v4df){1.0, 2.0, 3.0, 4.0},(__m256d)(__v4df){-100.0, -101.0, -102.0, -103.0},(__m256d)(__v4df){0.0, -1.0, 1.0, -1.0}), 1.0f, -101.0, 3.0, -103.0));
102103

103104
__m256 test_mm256_blendv_ps(__m256 V1, __m256 V2, __m256 V3) {
104105
// CHECK-LABEL: test_mm256_blendv_ps
105106
// CHECK: call {{.*}}<8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
106107
return _mm256_blendv_ps(V1, V2, V3);
107108
}
109+
TEST_CONSTEXPR(match_m256(_mm256_blendv_ps((__m256)(__v8sf){0.0f,1.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f},(__m256)(__v8sf){-100.0f, -101.0f, -102.0f, -103.0f, -104.0f, -105.0f, -106.0f, -107.0f},(__m256)(__v8sf){-1.0f, 2.0f, -3.0f, 4.0f, -5.0f, -6.0f, 7.0f, -0.0f}), -100.0f, 1.0f, -102.0f, 3.0f, -104.0f, -105.0f, 6.0f, -107.0f));
108110

109111
__m256d test_mm256_broadcast_pd(__m128d* A) {
110112
// CHECK-LABEL: test_mm256_broadcast_pd

clang/test/CodeGen/X86/sse41-builtins.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,12 +52,14 @@ __m128d test_mm_blendv_pd(__m128d V1, __m128d V2, __m128d V3) {
5252
// CHECK: call {{.*}}<2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
5353
return _mm_blendv_pd(V1, V2, V3);
5454
}
55+
TEST_CONSTEXPR(match_m128d(_mm_blendv_pd((__m128d)(__v2df){2.0, -4.0},(__m128d)(__v2df){-111.0, +222.0},(__m128d)(__v2df){2.0, -2.0}), 2.0, 222.0));
5556

5657
__m128 test_mm_blendv_ps(__m128 V1, __m128 V2, __m128 V3) {
5758
// CHECK-LABEL: test_mm_blendv_ps
5859
// CHECK: call {{.*}}<4 x float> @llvm.x86.sse41.blendvps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
5960
return _mm_blendv_ps(V1, V2, V3);
6061
}
62+
TEST_CONSTEXPR(match_m128(_mm_blendv_ps((__m128)(__v4sf){0.0f, 1.0f, 2.0f, 3.0f},(__m128)(__v4sf){-100.0f, -101.0f, -102.0f, -103.0f},(__m128)(__v4sf){-1.0f, 2.0f, -3.0f, 0.0f}), -100.0f, 1.0f, -102.0f, 3.0f));
6163

6264
__m128d test_mm_ceil_pd(__m128d x) {
6365
// CHECK-LABEL: test_mm_ceil_pd

0 commit comments

Comments
 (0)