Skip to content

Commit 816eb9e

Browse files
authored
[Clang][bytecode] Add interp__builtin_elementwise_triop_fp to handle general 3-operand floating point intrinsics (#157106)
Refactor interp__builtin_elementwise_fma into something similar to interp__builtin_elementwise_triop with a callback function argument to allow reuse with other intrinsics. This will allow reuse with some upcoming x86 intrinsics
1 parent c8d065b commit 816eb9e

File tree

1 file changed

+16
-7
lines changed

1 file changed

+16
-7
lines changed

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2736,8 +2736,11 @@ static bool interp__builtin_ia32_pmul(InterpState &S, CodePtr OpPC,
27362736
return true;
27372737
}
27382738

2739-
static bool interp__builtin_elementwise_fma(InterpState &S, CodePtr OpPC,
2740-
const CallExpr *Call) {
2739+
static bool interp__builtin_elementwise_triop_fp(
2740+
InterpState &S, CodePtr OpPC, const CallExpr *Call,
2741+
llvm::function_ref<APFloat(const APFloat &, const APFloat &,
2742+
const APFloat &, llvm::RoundingMode)>
2743+
Fn) {
27412744
assert(Call->getNumArgs() == 3);
27422745

27432746
FPOptions FPO = Call->getFPFeaturesInEffect(S.Ctx.getLangOpts());
@@ -2756,8 +2759,7 @@ static bool interp__builtin_elementwise_fma(InterpState &S, CodePtr OpPC,
27562759
const Floating &Z = S.Stk.pop<Floating>();
27572760
const Floating &Y = S.Stk.pop<Floating>();
27582761
const Floating &X = S.Stk.pop<Floating>();
2759-
APFloat F = X.getAPFloat();
2760-
F.fusedMultiplyAdd(Y.getAPFloat(), Z.getAPFloat(), RM);
2762+
APFloat F = Fn(X.getAPFloat(), Y.getAPFloat(), Z.getAPFloat(), RM);
27612763
Floating Result = S.allocFloat(X.getSemantics());
27622764
Result.copy(F);
27632765
S.Stk.push<Floating>(Result);
@@ -2788,8 +2790,8 @@ static bool interp__builtin_elementwise_fma(InterpState &S, CodePtr OpPC,
27882790
APFloat X = VX.elem<T>(I).getAPFloat();
27892791
APFloat Y = VY.elem<T>(I).getAPFloat();
27902792
APFloat Z = VZ.elem<T>(I).getAPFloat();
2791-
(void)X.fusedMultiplyAdd(Y, Z, RM);
2792-
Dst.elem<Floating>(I) = Floating(X);
2793+
APFloat F = Fn(X, Y, Z, RM);
2794+
Dst.elem<Floating>(I) = Floating(F);
27932795
}
27942796
Dst.initializeAllElements();
27952797
return true;
@@ -3410,7 +3412,14 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
34103412
return interp__builtin_ia32_pmul(S, OpPC, Call, BuiltinID);
34113413

34123414
case Builtin::BI__builtin_elementwise_fma:
3413-
return interp__builtin_elementwise_fma(S, OpPC, Call);
3415+
return interp__builtin_elementwise_triop_fp(
3416+
S, OpPC, Call,
3417+
[](const APFloat &X, const APFloat &Y, const APFloat &Z,
3418+
llvm::RoundingMode RM) {
3419+
APFloat F = X;
3420+
F.fusedMultiplyAdd(Y, Z, RM);
3421+
return F;
3422+
});
34143423

34153424
case clang::X86::BI__builtin_ia32_pblendvb128:
34163425
case clang::X86::BI__builtin_ia32_pblendvb256:

0 commit comments

Comments
 (0)