Skip to content

Commit 52113cf

Browse files
authored
AMDGPU: Fix broken exp10 lowering for f16 (#170582)
This was calling the exp handling, so multiplying by the wrong constant. GlobalISel is still broken, but missing the fast exp10 path. This is tracked in #170576
1 parent 29afd5a commit 52113cf

File tree

2 files changed

+392
-129
lines changed

2 files changed

+392
-129
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3055,8 +3055,11 @@ SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const {
30553055

30563056
if (VT.getScalarType() == MVT::f16) {
30573057
// v_exp_f16 (fmul x, log2e)
3058-
if (allowApproxFunc(DAG, Flags)) // TODO: Does this really require fast?
3059-
return lowerFEXPUnsafe(X, SL, DAG, Flags);
3058+
3059+
if (allowApproxFunc(DAG, Flags)) { // TODO: Does this really require fast?
3060+
return IsExp10 ? lowerFEXP10Unsafe(X, SL, DAG, Flags)
3061+
: lowerFEXPUnsafe(X, SL, DAG, Flags);
3062+
}
30603063

30613064
if (VT.isVector())
30623065
return SDValue();
@@ -3066,7 +3069,8 @@ SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const {
30663069

30673070
// Nothing in half is a denormal when promoted to f32.
30683071
SDValue Ext = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, X, Flags);
3069-
SDValue Lowered = lowerFEXPUnsafe(Ext, SL, DAG, Flags);
3072+
SDValue Lowered = IsExp10 ? lowerFEXP10Unsafe(Ext, SL, DAG, Flags)
3073+
: lowerFEXPUnsafe(Ext, SL, DAG, Flags);
30703074
return DAG.getNode(ISD::FP_ROUND, SL, VT, Lowered,
30713075
DAG.getTargetConstant(0, SL, MVT::i32), Flags);
30723076
}

0 commit comments

Comments
 (0)