Skip to content

Commit fe5f499

Browse files
authored
[AMDGPU][GlobalISel] Lower G_FMINIMUM and G_FMAXIMUM (llvm#151122)
Add GlobalISel lowering of G_FMINIMUM and G_FMAXIMUM following the same logic as in SDag's expandFMINIMUM_FMAXIMUM. Update AMDGPU legalization rules: Pre GFX12 now uses new lowering method and make G_FMINNUM_IEEE and G_FMAXNUM_IEEE legal to match SDag.
1 parent 26db214 commit fe5f499

File tree

7 files changed

+2235
-285
lines changed

7 files changed

+2235
-285
lines changed

llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,7 @@ class LegalizerHelper {
497497
LLVM_ABI LegalizeResult lowerMinMax(MachineInstr &MI);
498498
LLVM_ABI LegalizeResult lowerFCopySign(MachineInstr &MI);
499499
LLVM_ABI LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI);
500+
LLVM_ABI LegalizeResult lowerFMinimumMaximum(MachineInstr &MI);
500501
LLVM_ABI LegalizeResult lowerFMad(MachineInstr &MI);
501502
LLVM_ABI LegalizeResult lowerIntrinsicRound(MachineInstr &MI);
502503
LLVM_ABI LegalizeResult lowerFFloor(MachineInstr &MI);

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4748,6 +4748,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
47484748
case G_FMINIMUMNUM:
47494749
case G_FMAXIMUMNUM:
47504750
return lowerFMinNumMaxNum(MI);
4751+
case G_FMINIMUM:
4752+
case G_FMAXIMUM:
4753+
return lowerFMinimumMaximum(MI);
47514754
case G_MERGE_VALUES:
47524755
return lowerMergeValues(MI);
47534756
case G_UNMERGE_VALUES:
@@ -8777,6 +8780,77 @@ LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
87778780
return Legalized;
87788781
}
87798782

8783+
LegalizerHelper::LegalizeResult
8784+
LegalizerHelper::lowerFMinimumMaximum(MachineInstr &MI) {
8785+
unsigned Opc = MI.getOpcode();
8786+
auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8787+
LLT Ty = MRI.getType(Dst);
8788+
LLT CmpTy = Ty.changeElementSize(1);
8789+
8790+
bool IsMax = (Opc == TargetOpcode::G_FMAXIMUM);
8791+
unsigned OpcIeee =
8792+
IsMax ? TargetOpcode::G_FMAXNUM_IEEE : TargetOpcode::G_FMINNUM_IEEE;
8793+
unsigned OpcNonIeee =
8794+
IsMax ? TargetOpcode::G_FMAXNUM : TargetOpcode::G_FMINNUM;
8795+
bool MinMaxMustRespectOrderedZero = false;
8796+
Register Res;
8797+
8798+
// IEEE variants don't need canonicalization
8799+
if (LI.isLegalOrCustom({OpcIeee, Ty})) {
8800+
Res = MIRBuilder.buildInstr(OpcIeee, {Ty}, {Src0, Src1}).getReg(0);
8801+
MinMaxMustRespectOrderedZero = true;
8802+
} else if (LI.isLegalOrCustom({OpcNonIeee, Ty})) {
8803+
Res = MIRBuilder.buildInstr(OpcNonIeee, {Ty}, {Src0, Src1}).getReg(0);
8804+
} else {
8805+
auto Compare = MIRBuilder.buildFCmp(
8806+
IsMax ? CmpInst::FCMP_OGT : CmpInst::FCMP_OLT, CmpTy, Src0, Src1);
8807+
Res = MIRBuilder.buildSelect(Ty, Compare, Src0, Src1).getReg(0);
8808+
}
8809+
8810+
// Propagate any NaN of both operands
8811+
if (!MI.getFlag(MachineInstr::FmNoNans) &&
8812+
(!isKnownNeverNaN(Src0, MRI) || isKnownNeverNaN(Src1, MRI))) {
8813+
auto IsOrdered = MIRBuilder.buildFCmp(CmpInst::FCMP_ORD, CmpTy, Src0, Src1);
8814+
8815+
LLT ElementTy = Ty.isScalar() ? Ty : Ty.getElementType();
8816+
APFloat NaNValue = APFloat::getNaN(getFltSemanticForLLT(ElementTy));
8817+
Register NaN = MIRBuilder.buildFConstant(ElementTy, NaNValue).getReg(0);
8818+
if (Ty.isVector())
8819+
NaN = MIRBuilder.buildSplatBuildVector(Ty, NaN).getReg(0);
8820+
8821+
Res = MIRBuilder.buildSelect(Ty, IsOrdered, Res, NaN).getReg(0);
8822+
}
8823+
8824+
// fminimum/fmaximum requires -0.0 less than +0.0
8825+
if (!MinMaxMustRespectOrderedZero && !MI.getFlag(MachineInstr::FmNsz)) {
8826+
GISelValueTracking VT(MIRBuilder.getMF());
8827+
KnownFPClass Src0Info = VT.computeKnownFPClass(Src0, fcZero);
8828+
KnownFPClass Src1Info = VT.computeKnownFPClass(Src1, fcZero);
8829+
8830+
if (!Src0Info.isKnownNeverZero() && !Src1Info.isKnownNeverZero()) {
8831+
const unsigned Flags = MI.getFlags();
8832+
Register Zero = MIRBuilder.buildFConstant(Ty, 0.0).getReg(0);
8833+
auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_OEQ, CmpTy, Res, Zero);
8834+
8835+
unsigned TestClass = IsMax ? fcPosZero : fcNegZero;
8836+
8837+
auto LHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src0, TestClass);
8838+
auto LHSSelect =
8839+
MIRBuilder.buildSelect(Ty, LHSTestZero, Src0, Res, Flags);
8840+
8841+
auto RHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src1, TestClass);
8842+
auto RHSSelect =
8843+
MIRBuilder.buildSelect(Ty, RHSTestZero, Src1, LHSSelect, Flags);
8844+
8845+
Res = MIRBuilder.buildSelect(Ty, IsZero, RHSSelect, Res, Flags).getReg(0);
8846+
}
8847+
}
8848+
8849+
MIRBuilder.buildCopy(Dst, Res);
8850+
MI.eraseFromParent();
8851+
return Legalized;
8852+
}
8853+
87808854
LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
87818855
// Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
87828856
Register DstReg = MI.getOperand(0).getReg();

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 30 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -976,9 +976,25 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
976976
FPOpActions.clampMaxNumElementsStrict(0, S32, 2);
977977
}
978978

979+
auto &MinNumMaxNumIeee =
980+
getActionDefinitionsBuilder({G_FMINNUM_IEEE, G_FMAXNUM_IEEE});
981+
982+
if (ST.hasVOP3PInsts()) {
983+
MinNumMaxNumIeee.legalFor(FPTypesPK16)
984+
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
985+
.clampMaxNumElements(0, S16, 2)
986+
.clampScalar(0, S16, S64)
987+
.scalarize(0);
988+
} else if (ST.has16BitInsts()) {
989+
MinNumMaxNumIeee.legalFor(FPTypes16).clampScalar(0, S16, S64).scalarize(0);
990+
} else {
991+
MinNumMaxNumIeee.legalFor(FPTypesBase)
992+
.clampScalar(0, S32, S64)
993+
.scalarize(0);
994+
}
995+
979996
auto &MinNumMaxNum = getActionDefinitionsBuilder(
980-
{G_FMINNUM, G_FMAXNUM, G_FMINIMUMNUM, G_FMAXIMUMNUM, G_FMINNUM_IEEE,
981-
G_FMAXNUM_IEEE});
997+
{G_FMINNUM, G_FMAXNUM, G_FMINIMUMNUM, G_FMAXIMUMNUM});
982998

983999
if (ST.hasVOP3PInsts()) {
9841000
MinNumMaxNum.customFor(FPTypesPK16)
@@ -2136,9 +2152,17 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
21362152
.legalFor(FPTypesPK16)
21372153
.clampMaxNumElements(0, S16, 2)
21382154
.scalarize(0);
2155+
} else if (ST.hasVOP3PInsts()) {
2156+
getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM})
2157+
.lowerFor({V2S16})
2158+
.clampMaxNumElementsStrict(0, S16, 2)
2159+
.scalarize(0)
2160+
.lower();
21392161
} else {
2140-
// TODO: Implement
2141-
getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM}).lower();
2162+
getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM})
2163+
.scalarize(0)
2164+
.clampScalar(0, S32, S64)
2165+
.lower();
21422166
}
21432167

21442168
getActionDefinitionsBuilder({G_MEMCPY, G_MEMCPY_INLINE, G_MEMMOVE, G_MEMSET})
@@ -2195,8 +2219,6 @@ bool AMDGPULegalizerInfo::legalizeCustom(
21952219
case TargetOpcode::G_FMAXNUM:
21962220
case TargetOpcode::G_FMINIMUMNUM:
21972221
case TargetOpcode::G_FMAXIMUMNUM:
2198-
case TargetOpcode::G_FMINNUM_IEEE:
2199-
case TargetOpcode::G_FMAXNUM_IEEE:
22002222
return legalizeMinNumMaxNum(Helper, MI);
22012223
case TargetOpcode::G_EXTRACT_VECTOR_ELT:
22022224
return legalizeExtractVectorElt(MI, MRI, B);
@@ -2817,23 +2839,8 @@ bool AMDGPULegalizerInfo::legalizeMinNumMaxNum(LegalizerHelper &Helper,
28172839
MachineFunction &MF = Helper.MIRBuilder.getMF();
28182840
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
28192841

2820-
const bool IsIEEEOp = MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE ||
2821-
MI.getOpcode() == AMDGPU::G_FMAXNUM_IEEE;
2822-
2823-
// With ieee_mode disabled, the instructions have the correct behavior
2824-
// already for G_FMINIMUMNUM/G_FMAXIMUMNUM.
2825-
//
2826-
// FIXME: G_FMINNUM/G_FMAXNUM should match the behavior with ieee_mode
2827-
// enabled.
2828-
if (!MFI->getMode().IEEE) {
2829-
if (MI.getOpcode() == AMDGPU::G_FMINIMUMNUM ||
2830-
MI.getOpcode() == AMDGPU::G_FMAXIMUMNUM)
2831-
return true;
2832-
2833-
return !IsIEEEOp;
2834-
}
2835-
2836-
if (IsIEEEOp)
2842+
// With ieee_mode disabled, the instructions have the correct behavior.
2843+
if (!MFI->getMode().IEEE)
28372844
return true;
28382845

28392846
return Helper.lowerFMinNumMaxNum(MI) == LegalizerHelper::Legalized;

0 commit comments

Comments
 (0)