Skip to content

Commit 62078af

Browse files
committed
AMDGPU: Make v2f64 -> v2f16 conversion Legal only when unsafe fast math is set
Custom lowering v2f64 -> v2f16.
1 parent 6d970b0 commit 62078af

File tree

3 files changed

+41
-5
lines changed

3 files changed

+41
-5
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1054,9 +1054,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
10541054

10551055
auto &FPTruncActions = getActionDefinitionsBuilder(G_FPTRUNC);
10561056
if (ST.hasCvtPkF16F32Inst()) {
1057-
FPTruncActions.legalFor({{S32, S64}, {S16, S32}, {V2S16, V2S32}});
1058-
if (TM.Options.UnsafeFPMath)
1059-
FPTruncActions.legalFor({V2S16, V2S64});
1057+
FPTruncActions.legalFor({{S32, S64}, {S16, S32}, {V2S16, V2S32}})
1058+
.customFor({V2S16, V2S64});
10601059
} else
10611060
FPTruncActions.legalFor({{S32, S64}, {S16, S32}});
10621061
FPTruncActions.scalarize(0).lower();
@@ -2156,6 +2155,8 @@ bool AMDGPULegalizerInfo::legalizeCustom(
21562155
case TargetOpcode::G_FMINNUM_IEEE:
21572156
case TargetOpcode::G_FMAXNUM_IEEE:
21582157
return legalizeMinNumMaxNum(Helper, MI);
2158+
case TargetOpcode::G_FPTRUNC:
2159+
return legalizeFPTrunc(Helper, MI, MRI);
21592160
case TargetOpcode::G_EXTRACT_VECTOR_ELT:
21602161
return legalizeExtractVectorElt(MI, MRI, B);
21612162
case TargetOpcode::G_INSERT_VECTOR_ELT:
@@ -2742,6 +2743,29 @@ bool AMDGPULegalizerInfo::legalizeMinNumMaxNum(LegalizerHelper &Helper,
27422743
return Helper.lowerFMinNumMaxNum(MI) == LegalizerHelper::Legalized;
27432744
}
27442745

2746+
bool AMDGPULegalizerInfo::legalizeFPTrunc(LegalizerHelper &Helper,
2747+
MachineInstr &MI,
2748+
MachineRegisterInfo &MRI) const {
2749+
// TODO: We should only use fast math flag. But the global option is
2750+
// still used here to be consistent, especially when the fast math flag is
2751+
// not working for FP_ROUND on the SelectDAG path at this moment.
2752+
MachineFunction &MF = Helper.MIRBuilder.getMF();
2753+
bool AllowInaccurateFPTRUNC = MI.getFlag(MachineInstr::FmAfn) ||
2754+
MF.getTarget().Options.UnsafeFPMath;
2755+
2756+
if (AllowInaccurateFPTRUNC) {
2757+
// Use the tablegen pattern to select native instructions.
2758+
return true;
2759+
}
2760+
2761+
Register DstReg = MI.getOperand(0).getReg();
2762+
LLT DstTy = MRI.getType(DstReg);
2763+
2764+
// Scalarize the vector and fall through to lower f64 -> f16.
2765+
return Helper.fewerElementsVector(MI, 0, DstTy.getElementType()) ==
2766+
LegalizerHelper::Legalized;
2767+
}
2768+
27452769
bool AMDGPULegalizerInfo::legalizeExtractVectorElt(
27462770
MachineInstr &MI, MachineRegisterInfo &MRI,
27472771
MachineIRBuilder &B) const {

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
5656
bool legalizeFPTOI(MachineInstr &MI, MachineRegisterInfo &MRI,
5757
MachineIRBuilder &B, bool Signed) const;
5858
bool legalizeMinNumMaxNum(LegalizerHelper &Helper, MachineInstr &MI) const;
59+
bool legalizeFPTrunc(LegalizerHelper &Helper, MachineInstr &MI,
60+
MachineRegisterInfo &MRI) const;
5961
bool legalizeExtractVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI,
6062
MachineIRBuilder &B) const;
6163
bool legalizeInsertVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI,

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6893,8 +6893,18 @@ SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
68936893
return Op;
68946894

68956895
EVT DstVT = Op.getValueType();
6896-
if (DstVT == MVT::v2f16)
6897-
return DAG.getTarget().Options.UnsafeFPMath ? Op : SDValue();
6896+
6897+
if (DstVT == MVT::v2f16) {
6898+
// FIXME: We should only use fast math flag here. However, the fast math
6899+
// flag is lost during fptrunc to fp_round lowering. In addition, the flag
6900+
// is not propagated during subsequent lowering.
6901+
bool AllowInaccurateFP_ROUND = Op->getFlags().hasApproximateFuncs() ||
6902+
DAG.getTarget().Options.UnsafeFPMath;
6903+
// With fast math, the tablegen pattern is used to select native
6904+
// instructions. Otherwise, the vector will be scalarized and custom lowered
6905+
// to preserve the precision.
6906+
return AllowInaccurateFP_ROUND ? Op : SDValue();
6907+
}
68986908

68996909
SDLoc DL(Op);
69006910
if (DstVT == MVT::f16) {

0 commit comments

Comments
 (0)