Skip to content

Commit 22d354a

Browse files
[X86][Clang] Support constexpr evaluation of cvtpd2ps intrinsics (#169980)
This patch implements constant evaluation support for the following X86 intrinsics: - _mm_cvtpd_ps, _mm256_cvtpd_ps (Packed Double to Float) - _mm_cvtsd_ss (Scalar Double to Float merge) - Masked variants of the above It implements the strict "Exact and Finite" rule: conversions that are inexact, infinite, or NaN are rejected in constexpr contexts. Fixes #169370
1 parent d68f543 commit 22d354a

15 files changed

+831
-52
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -166,14 +166,20 @@ let Features = "sse2", Attributes = [NoThrow] in {
166166
def movnti : X86Builtin<"void(int *, int)">;
167167
}
168168

169+
let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
170+
def cvtpd2ps : X86Builtin<"_Vector<4, float>(_Vector<2, double>)">;
171+
def cvtsd2ss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, double>)">;
172+
}
173+
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
174+
def cvtsd2ss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, double>, _Vector<4, float>, unsigned char, _Constant int)">;
175+
}
176+
169177
let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
170178
def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>, _Vector<16, char>)">;
171179
def cvtpd2dq : X86Builtin<"_Vector<2, long long int>(_Vector<2, double>)">;
172-
def cvtpd2ps : X86Builtin<"_Vector<4, float>(_Vector<2, double>)">;
173180
def cvttpd2dq : X86Builtin<"_Vector<4, int>(_Vector<2, double>)">;
174181
def cvtsd2si : X86Builtin<"int(_Vector<2, double>)">;
175182
def cvttsd2si : X86Builtin<"int(_Vector<2, double>)">;
176-
def cvtsd2ss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, double>)">;
177183
def cvtps2dq : X86Builtin<"_Vector<4, int>(_Vector<4, float>)">;
178184
def cvttps2dq : X86Builtin<"_Vector<4, int>(_Vector<4, float>)">;
179185
}
@@ -462,19 +468,21 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
462468
def vpermilvarps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">;
463469
}
464470

471+
let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
472+
def cvtpd2ps256 : X86Builtin<"_Vector<4, float>(_Vector<4, double>)">;
473+
}
474+
465475
let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
466476
def dpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
467477
def cmppd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant char)">;
468478
def cmpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant char)">;
469-
def cvtpd2ps256 : X86Builtin<"_Vector<4, float>(_Vector<4, double>)">;
470479
def cvtps2dq256 : X86Builtin<"_Vector<8, int>(_Vector<8, float>)">;
471480
def cvttpd2dq256 : X86Builtin<"_Vector<4, int>(_Vector<4, double>)">;
472481
def cvtpd2dq256 : X86Builtin<"_Vector<4, int>(_Vector<4, double>)">;
473482
def cvttps2dq256 : X86Builtin<"_Vector<8, int>(_Vector<8, float>)">;
474483
def vperm2f128_pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Constant int)">;
475484
def vperm2f128_ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">;
476485
def vperm2f128_si256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">;
477-
478486
foreach Op = ["max", "min"] in {
479487
def Op#pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">;
480488
def Op#ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">;
@@ -1004,6 +1012,10 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128
10041012
def cmppd128_mask : X86Builtin<"unsigned char(_Vector<2, double>, _Vector<2, double>, _Constant int, unsigned char)">;
10051013
}
10061014

1015+
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
1016+
def cvtpd2ps512_mask : X86Builtin<"_Vector<8, float>(_Vector<8, double>, _Vector<8, float>, unsigned char, _Constant int)">;
1017+
}
1018+
10071019
let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
10081020
def rndscaleps_mask : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int, _Vector<16, float>, unsigned short, _Constant int)">;
10091021
def rndscalepd_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Constant int, _Vector<8, double>, unsigned char, _Constant int)">;
@@ -1017,7 +1029,6 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>
10171029
def maxpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, _Constant int)">;
10181030
def cvtdq2ps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, int>, _Vector<16, float>, unsigned short, _Constant int)">;
10191031
def cvtudq2ps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, int>, _Vector<16, float>, unsigned short, _Constant int)">;
1020-
def cvtpd2ps512_mask : X86Builtin<"_Vector<8, float>(_Vector<8, double>, _Vector<8, float>, unsigned char, _Constant int)">;
10211032
def vcvtps2ph512_mask : X86Builtin<"_Vector<16, short>(_Vector<16, float>, _Constant int, _Vector<16, short>, unsigned short)">;
10221033
def vcvtph2ps512_mask : X86Builtin<"_Vector<16, float>(_Vector<16, short>, _Vector<16, float>, unsigned short, _Constant int)">;
10231034
}
@@ -1452,9 +1463,12 @@ let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
14521463
def compressstoresi256_mask : X86Builtin<"void(_Vector<8, int *>, _Vector<8, int>, unsigned char)">;
14531464
}
14541465

1466+
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
1467+
def cvtpd2ps_mask : X86Builtin<"_Vector<4, float>(_Vector<2, double>, _Vector<4, float>, unsigned char)">;
1468+
}
1469+
14551470
let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
14561471
def cvtpd2dq128_mask : X86Builtin<"_Vector<4, int>(_Vector<2, double>, _Vector<4, int>, unsigned char)">;
1457-
def cvtpd2ps_mask : X86Builtin<"_Vector<4, float>(_Vector<2, double>, _Vector<4, float>, unsigned char)">;
14581472
def cvtpd2udq128_mask : X86Builtin<"_Vector<4, int>(_Vector<2, double>, _Vector<4, int>, unsigned char)">;
14591473
}
14601474

@@ -3288,7 +3302,6 @@ let Features = "avx512bw,avx512vl",
32883302
}
32893303

32903304
let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
3291-
def cvtsd2ss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<2, double>, _Vector<4, float>, unsigned char, _Constant int)">;
32923305
def cvtsi2ss32 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, int, _Constant int)">;
32933306
def cvtss2sd_round_mask : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<4, float>, _Vector<2, double>, unsigned char, _Constant int)">;
32943307
def cvtusi2ss32 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, unsigned int, _Constant int)">;

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,38 @@ static llvm::APSInt convertBoolVectorToInt(const Pointer &Val) {
172172
return Result;
173173
}
174174

175+
// Strict double -> float conversion used for X86 PD2PS/cvtsd2ss intrinsics.
176+
// Reject NaN/Inf/Subnormal inputs and any lossy/inexact conversions.
177+
static bool convertDoubleToFloatStrict(APFloat Src, Floating &Dst,
178+
InterpState &S, const Expr *DiagExpr) {
179+
if (Src.isInfinity()) {
180+
if (S.diagnosing())
181+
S.CCEDiag(DiagExpr, diag::note_constexpr_float_arithmetic) << 0;
182+
return false;
183+
}
184+
if (Src.isNaN()) {
185+
if (S.diagnosing())
186+
S.CCEDiag(DiagExpr, diag::note_constexpr_float_arithmetic) << 1;
187+
return false;
188+
}
189+
APFloat Val = Src;
190+
bool LosesInfo = false;
191+
APFloat::opStatus Status = Val.convert(
192+
APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &LosesInfo);
193+
if (LosesInfo || Val.isDenormal()) {
194+
if (S.diagnosing())
195+
S.CCEDiag(DiagExpr, diag::note_constexpr_float_arithmetic_strict);
196+
return false;
197+
}
198+
if (Status != APFloat::opOK) {
199+
if (S.diagnosing())
200+
S.CCEDiag(DiagExpr, diag::note_invalid_subexpr_in_const_expr);
201+
return false;
202+
}
203+
Dst.copy(Val);
204+
return true;
205+
}
206+
175207
static bool interp__builtin_is_constant_evaluated(InterpState &S, CodePtr OpPC,
176208
const InterpFrame *Frame,
177209
const CallExpr *Call) {
@@ -3363,6 +3395,122 @@ static bool interp__builtin_ia32_cvt_vec2mask(InterpState &S, CodePtr OpPC,
33633395
pushInteger(S, RetMask, Call->getType());
33643396
return true;
33653397
}
3398+
static bool interp__builtin_ia32_cvtsd2ss(InterpState &S, CodePtr OpPC,
3399+
const CallExpr *Call,
3400+
bool HasRoundingMask) {
3401+
APSInt Rounding, MaskInt;
3402+
Pointer Src, B, A;
3403+
3404+
if (HasRoundingMask) {
3405+
assert(Call->getNumArgs() == 5);
3406+
Rounding = popToAPSInt(S, Call->getArg(4));
3407+
MaskInt = popToAPSInt(S, Call->getArg(3));
3408+
Src = S.Stk.pop<Pointer>();
3409+
B = S.Stk.pop<Pointer>();
3410+
A = S.Stk.pop<Pointer>();
3411+
if (!CheckLoad(S, OpPC, A) || !CheckLoad(S, OpPC, B) ||
3412+
!CheckLoad(S, OpPC, Src))
3413+
return false;
3414+
} else {
3415+
assert(Call->getNumArgs() == 2);
3416+
B = S.Stk.pop<Pointer>();
3417+
A = S.Stk.pop<Pointer>();
3418+
if (!CheckLoad(S, OpPC, A) || !CheckLoad(S, OpPC, B))
3419+
return false;
3420+
}
3421+
3422+
const auto *DstVTy = Call->getType()->castAs<VectorType>();
3423+
unsigned NumElems = DstVTy->getNumElements();
3424+
const Pointer &Dst = S.Stk.peek<Pointer>();
3425+
3426+
// Copy all elements except lane 0 (overwritten below) from A to Dst.
3427+
for (unsigned I = 1; I != NumElems; ++I)
3428+
Dst.elem<Floating>(I) = A.elem<Floating>(I);
3429+
3430+
// Convert element 0 from double to float, or use Src if masked off.
3431+
if (!HasRoundingMask || (MaskInt.getZExtValue() & 0x1)) {
3432+
assert(S.getASTContext().FloatTy == DstVTy->getElementType() &&
3433+
"cvtsd2ss requires float element type in destination vector");
3434+
3435+
Floating Conv = S.allocFloat(
3436+
S.getASTContext().getFloatTypeSemantics(DstVTy->getElementType()));
3437+
APFloat SrcVal = B.elem<Floating>(0).getAPFloat();
3438+
if (!convertDoubleToFloatStrict(SrcVal, Conv, S, Call))
3439+
return false;
3440+
Dst.elem<Floating>(0) = Conv;
3441+
} else {
3442+
Dst.elem<Floating>(0) = Src.elem<Floating>(0);
3443+
}
3444+
3445+
Dst.initializeAllElements();
3446+
return true;
3447+
}
3448+
3449+
static bool interp__builtin_ia32_cvtpd2ps(InterpState &S, CodePtr OpPC,
3450+
const CallExpr *Call, bool IsMasked,
3451+
bool HasRounding) {
3452+
3453+
APSInt MaskVal;
3454+
Pointer PassThrough;
3455+
Pointer Src;
3456+
APSInt Rounding;
3457+
3458+
if (IsMasked) {
3459+
// Pop in reverse order.
3460+
if (HasRounding) {
3461+
Rounding = popToAPSInt(S, Call->getArg(3));
3462+
MaskVal = popToAPSInt(S, Call->getArg(2));
3463+
PassThrough = S.Stk.pop<Pointer>();
3464+
Src = S.Stk.pop<Pointer>();
3465+
} else {
3466+
MaskVal = popToAPSInt(S, Call->getArg(2));
3467+
PassThrough = S.Stk.pop<Pointer>();
3468+
Src = S.Stk.pop<Pointer>();
3469+
}
3470+
3471+
if (!CheckLoad(S, OpPC, PassThrough))
3472+
return false;
3473+
} else {
3474+
// Pop source only.
3475+
Src = S.Stk.pop<Pointer>();
3476+
}
3477+
3478+
if (!CheckLoad(S, OpPC, Src))
3479+
return false;
3480+
3481+
const auto *RetVTy = Call->getType()->castAs<VectorType>();
3482+
unsigned RetElems = RetVTy->getNumElements();
3483+
unsigned SrcElems = Src.getNumElems();
3484+
const Pointer &Dst = S.Stk.peek<Pointer>();
3485+
3486+
// Initialize destination with passthrough or zeros.
3487+
for (unsigned I = 0; I != RetElems; ++I)
3488+
if (IsMasked)
3489+
Dst.elem<Floating>(I) = PassThrough.elem<Floating>(I);
3490+
else
3491+
Dst.elem<Floating>(I) = Floating(APFloat(0.0f));
3492+
3493+
assert(S.getASTContext().FloatTy == RetVTy->getElementType() &&
3494+
"cvtpd2ps requires float element type in return vector");
3495+
3496+
// Convert double to float for enabled elements (only process source elements
3497+
// that exist).
3498+
for (unsigned I = 0; I != SrcElems; ++I) {
3499+
if (IsMasked && !MaskVal[I])
3500+
continue;
3501+
3502+
APFloat SrcVal = Src.elem<Floating>(I).getAPFloat();
3503+
3504+
Floating Conv = S.allocFloat(
3505+
S.getASTContext().getFloatTypeSemantics(RetVTy->getElementType()));
3506+
if (!convertDoubleToFloatStrict(SrcVal, Conv, S, Call))
3507+
return false;
3508+
Dst.elem<Floating>(I) = Conv;
3509+
}
3510+
3511+
Dst.initializeAllElements();
3512+
return true;
3513+
}
33663514

33673515
static bool interp__builtin_ia32_shuffle_generic(
33683516
InterpState &S, CodePtr OpPC, const CallExpr *Call,
@@ -5180,6 +5328,20 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
51805328
case X86::BI__builtin_ia32_cvtq2mask512:
51815329
return interp__builtin_ia32_cvt_vec2mask(S, OpPC, Call, BuiltinID);
51825330

5331+
case X86::BI__builtin_ia32_cvtsd2ss:
5332+
return interp__builtin_ia32_cvtsd2ss(S, OpPC, Call, false);
5333+
5334+
case X86::BI__builtin_ia32_cvtsd2ss_round_mask:
5335+
return interp__builtin_ia32_cvtsd2ss(S, OpPC, Call, true);
5336+
5337+
case X86::BI__builtin_ia32_cvtpd2ps:
5338+
case X86::BI__builtin_ia32_cvtpd2ps256:
5339+
return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, false, false);
5340+
case X86::BI__builtin_ia32_cvtpd2ps_mask:
5341+
return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, true, false);
5342+
case X86::BI__builtin_ia32_cvtpd2ps512_mask:
5343+
return interp__builtin_ia32_cvtpd2ps(S, OpPC, Call, true, true);
5344+
51835345
case X86::BI__builtin_ia32_cmpb128_mask:
51845346
case X86::BI__builtin_ia32_cmpw128_mask:
51855347
case X86::BI__builtin_ia32_cmpd128_mask:

0 commit comments

Comments
 (0)