Skip to content

Commit 61fbbb4

Browse files
committed
[AMDGPU][GlobalISel] Lower G_FMINIMUM and G_FMAXIMUM
Add GlobalISel lowering of G_FMINIMUM and G_FMAXIMUM following the same logic as in SDag's expandFMINIMUM_FMAXIMUM. Update AMDGPU legalization rules: Pre GFX12 now uses new lowering method and make G_FMINNUM_IEEE and G_FMAXNUM_IEEE legal to match SDag.
1 parent 50f3a6b commit 61fbbb4

File tree

7 files changed

+2235
-285
lines changed

7 files changed

+2235
-285
lines changed

llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,7 @@ class LegalizerHelper {
456456
LLVM_ABI LegalizeResult lowerMinMax(MachineInstr &MI);
457457
LLVM_ABI LegalizeResult lowerFCopySign(MachineInstr &MI);
458458
LLVM_ABI LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI);
459+
LLVM_ABI LegalizeResult lowerFMinimumMaximum(MachineInstr &MI);
459460
LLVM_ABI LegalizeResult lowerFMad(MachineInstr &MI);
460461
LLVM_ABI LegalizeResult lowerIntrinsicRound(MachineInstr &MI);
461462
LLVM_ABI LegalizeResult lowerFFloor(MachineInstr &MI);

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4674,6 +4674,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
46744674
case G_FMINIMUMNUM:
46754675
case G_FMAXIMUMNUM:
46764676
return lowerFMinNumMaxNum(MI);
4677+
case G_FMINIMUM:
4678+
case G_FMAXIMUM:
4679+
return lowerFMinimumMaximum(MI);
46774680
case G_MERGE_VALUES:
46784681
return lowerMergeValues(MI);
46794682
case G_UNMERGE_VALUES:
@@ -8294,6 +8297,75 @@ LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
82948297
return Legalized;
82958298
}
82968299

8300+
LegalizerHelper::LegalizeResult
8301+
LegalizerHelper::lowerFMinimumMaximum(MachineInstr &MI) {
8302+
unsigned Opc = MI.getOpcode();
8303+
auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8304+
LLT Ty = MRI.getType(Dst);
8305+
LLT CmpTy =
8306+
Ty.isScalar() ? LLT::scalar(1) : LLT::vector(Ty.getElementCount(), 1);
8307+
8308+
bool IsMax = (Opc == TargetOpcode::G_FMAXIMUM);
8309+
unsigned OpcIeee =
8310+
IsMax ? TargetOpcode::G_FMAXNUM_IEEE : TargetOpcode::G_FMINNUM_IEEE;
8311+
unsigned OpcNonIeee =
8312+
IsMax ? TargetOpcode::G_FMAXNUM : TargetOpcode::G_FMINNUM;
8313+
bool MinMaxMustRespectOrderedZero = false;
8314+
Register Res;
8315+
8316+
// IEEE variants don't need canonicalization
8317+
if (LI.isLegalOrCustom({OpcIeee, Ty})) {
8318+
Res = MIRBuilder.buildInstr(OpcIeee, {Ty}, {Src0, Src1}).getReg(0);
8319+
MinMaxMustRespectOrderedZero = true;
8320+
} else if (LI.isLegalOrCustom({OpcNonIeee, Ty})) {
8321+
Res = MIRBuilder.buildInstr(OpcNonIeee, {Ty}, {Src0, Src1}).getReg(0);
8322+
} else {
8323+
auto Compare = MIRBuilder.buildFCmp(
8324+
IsMax ? CmpInst::FCMP_OGT : CmpInst::FCMP_OLT, CmpTy, Src0, Src1);
8325+
Res = MIRBuilder.buildSelect(Ty, Compare, Src0, Src1).getReg(0);
8326+
}
8327+
8328+
// Propagate any NaN of both operands
8329+
if (!MI.getFlag(MachineInstr::FmNoNans) &&
8330+
(!isKnownNeverNaN(Src0, MRI) || isKnownNeverNaN(Src1, MRI))) {
8331+
auto IsOrdered = MIRBuilder.buildFCmp(CmpInst::FCMP_ORD, CmpTy, Src0, Src1);
8332+
8333+
LLT ElementTy = Ty.isScalar() ? Ty : Ty.getElementType();
8334+
APFloat NaNValue = APFloat::getNaN(getFltSemanticForLLT(ElementTy));
8335+
Register NaN = MIRBuilder.buildFConstant(ElementTy, NaNValue).getReg(0);
8336+
if (Ty.isVector())
8337+
NaN = MIRBuilder.buildSplatBuildVector(Ty, NaN).getReg(0);
8338+
8339+
Res = MIRBuilder.buildSelect(Ty, IsOrdered, Res, NaN).getReg(0);
8340+
}
8341+
8342+
// fminimum/fmaximum requires -0.0 less than +0.0
8343+
if (!MinMaxMustRespectOrderedZero && !MI.getFlag(MachineInstr::FmNsz)) {
8344+
GISelValueTracking VT(MIRBuilder.getMF());
8345+
KnownFPClass Src0Info = VT.computeKnownFPClass(Src0, fcZero);
8346+
KnownFPClass Src1Info = VT.computeKnownFPClass(Src1, fcZero);
8347+
8348+
if (!Src0Info.isKnownNeverZero() && !Src1Info.isKnownNeverZero()) {
8349+
Register Zero = MIRBuilder.buildFConstant(Ty, 0.0).getReg(0);
8350+
auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_OEQ, CmpTy, Res, Zero);
8351+
8352+
unsigned TestClass = IsMax ? fcPosZero : fcNegZero;
8353+
8354+
auto LHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src0, TestClass);
8355+
auto LHSSelect = MIRBuilder.buildSelect(Ty, LHSTestZero, Src0, Res);
8356+
8357+
auto RHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src1, TestClass);
8358+
auto RHSSelect = MIRBuilder.buildSelect(Ty, RHSTestZero, Src1, LHSSelect);
8359+
8360+
Res = MIRBuilder.buildSelect(Ty, IsZero, RHSSelect, Res).getReg(0);
8361+
}
8362+
}
8363+
8364+
MIRBuilder.buildCopy(Dst, Res);
8365+
MI.eraseFromParent();
8366+
return Legalized;
8367+
}
8368+
82978369
LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
82988370
// Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
82998371
Register DstReg = MI.getOperand(0).getReg();

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 32 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -957,9 +957,27 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
957957
FPOpActions.clampMaxNumElementsStrict(0, S32, 2);
958958
}
959959

960+
auto &MinNumMaxNumIeee = getActionDefinitionsBuilder(
961+
{G_FMINNUM_IEEE, G_FMAXNUM_IEEE});
962+
963+
if (ST.hasVOP3PInsts()) {
964+
MinNumMaxNumIeee.legalFor(FPTypesPK16)
965+
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
966+
.clampMaxNumElements(0, S16, 2)
967+
.clampScalar(0, S16, S64)
968+
.scalarize(0);
969+
} else if (ST.has16BitInsts()) {
970+
MinNumMaxNumIeee.legalFor(FPTypes16)
971+
.clampScalar(0, S16, S64)
972+
.scalarize(0);
973+
} else {
974+
MinNumMaxNumIeee.legalFor(FPTypesBase)
975+
.clampScalar(0, S32, S64)
976+
.scalarize(0);
977+
}
978+
960979
auto &MinNumMaxNum = getActionDefinitionsBuilder(
961-
{G_FMINNUM, G_FMAXNUM, G_FMINIMUMNUM, G_FMAXIMUMNUM, G_FMINNUM_IEEE,
962-
G_FMAXNUM_IEEE});
980+
{G_FMINNUM, G_FMAXNUM, G_FMINIMUMNUM, G_FMAXIMUMNUM});
963981

964982
if (ST.hasVOP3PInsts()) {
965983
MinNumMaxNum.customFor(FPTypesPK16)
@@ -2100,9 +2118,17 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
21002118
.legalFor(FPTypesPK16)
21012119
.clampMaxNumElements(0, S16, 2)
21022120
.scalarize(0);
2121+
} else if (ST.hasVOP3PInsts()){
2122+
getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM})
2123+
.lowerFor({V2S16})
2124+
.clampMaxNumElementsStrict(0, S16, 2)
2125+
.scalarize(0)
2126+
.lower();
21032127
} else {
2104-
// TODO: Implement
2105-
getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM}).lower();
2128+
getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM})
2129+
.scalarize(0)
2130+
.clampScalar(0, S32, S64)
2131+
.lower();
21062132
}
21072133

21082134
getActionDefinitionsBuilder({G_MEMCPY, G_MEMCPY_INLINE, G_MEMMOVE, G_MEMSET})
@@ -2159,8 +2185,6 @@ bool AMDGPULegalizerInfo::legalizeCustom(
21592185
case TargetOpcode::G_FMAXNUM:
21602186
case TargetOpcode::G_FMINIMUMNUM:
21612187
case TargetOpcode::G_FMAXIMUMNUM:
2162-
case TargetOpcode::G_FMINNUM_IEEE:
2163-
case TargetOpcode::G_FMAXNUM_IEEE:
21642188
return legalizeMinNumMaxNum(Helper, MI);
21652189
case TargetOpcode::G_EXTRACT_VECTOR_ELT:
21662190
return legalizeExtractVectorElt(MI, MRI, B);
@@ -2734,23 +2758,8 @@ bool AMDGPULegalizerInfo::legalizeMinNumMaxNum(LegalizerHelper &Helper,
27342758
MachineFunction &MF = Helper.MIRBuilder.getMF();
27352759
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
27362760

2737-
const bool IsIEEEOp = MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE ||
2738-
MI.getOpcode() == AMDGPU::G_FMAXNUM_IEEE;
2739-
2740-
// With ieee_mode disabled, the instructions have the correct behavior
2741-
// already for G_FMINIMUMNUM/G_FMAXIMUMNUM.
2742-
//
2743-
// FIXME: G_FMINNUM/G_FMAXNUM should match the behavior with ieee_mode
2744-
// enabled.
2745-
if (!MFI->getMode().IEEE) {
2746-
if (MI.getOpcode() == AMDGPU::G_FMINIMUMNUM ||
2747-
MI.getOpcode() == AMDGPU::G_FMAXIMUMNUM)
2748-
return true;
2749-
2750-
return !IsIEEEOp;
2751-
}
2752-
2753-
if (IsIEEEOp)
2761+
// With ieee_mode disabled, the instructions have the correct behavior.
2762+
if (!MFI->getMode().IEEE)
27542763
return true;
27552764

27562765
return Helper.lowerFMinNumMaxNum(MI) == LegalizerHelper::Legalized;

0 commit comments

Comments
 (0)