Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,7 @@ class LegalizerHelper {
LLVM_ABI LegalizeResult lowerMinMax(MachineInstr &MI);
LLVM_ABI LegalizeResult lowerFCopySign(MachineInstr &MI);
LLVM_ABI LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI);
LLVM_ABI LegalizeResult lowerFMinimumMaximum(MachineInstr &MI);
LLVM_ABI LegalizeResult lowerFMad(MachineInstr &MI);
LLVM_ABI LegalizeResult lowerIntrinsicRound(MachineInstr &MI);
LLVM_ABI LegalizeResult lowerFFloor(MachineInstr &MI);
Expand Down
74 changes: 74 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4748,6 +4748,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
case G_FMINIMUMNUM:
case G_FMAXIMUMNUM:
return lowerFMinNumMaxNum(MI);
case G_FMINIMUM:
case G_FMAXIMUM:
return lowerFMinimumMaximum(MI);
case G_MERGE_VALUES:
return lowerMergeValues(MI);
case G_UNMERGE_VALUES:
Expand Down Expand Up @@ -8777,6 +8780,77 @@ LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
return Legalized;
}

LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFMinimumMaximum(MachineInstr &MI) {
unsigned Opc = MI.getOpcode();
auto [Dst, Src0, Src1] = MI.getFirst3Regs();
LLT Ty = MRI.getType(Dst);
LLT CmpTy = Ty.changeElementSize(1);

bool IsMax = (Opc == TargetOpcode::G_FMAXIMUM);
unsigned OpcIeee =
IsMax ? TargetOpcode::G_FMAXNUM_IEEE : TargetOpcode::G_FMINNUM_IEEE;
unsigned OpcNonIeee =
IsMax ? TargetOpcode::G_FMAXNUM : TargetOpcode::G_FMINNUM;
bool MinMaxMustRespectOrderedZero = false;
Register Res;

// IEEE variants don't need canonicalization
if (LI.isLegalOrCustom({OpcIeee, Ty})) {
Res = MIRBuilder.buildInstr(OpcIeee, {Ty}, {Src0, Src1}).getReg(0);
MinMaxMustRespectOrderedZero = true;
} else if (LI.isLegalOrCustom({OpcNonIeee, Ty})) {
Res = MIRBuilder.buildInstr(OpcNonIeee, {Ty}, {Src0, Src1}).getReg(0);
} else {
auto Compare = MIRBuilder.buildFCmp(
IsMax ? CmpInst::FCMP_OGT : CmpInst::FCMP_OLT, CmpTy, Src0, Src1);
Res = MIRBuilder.buildSelect(Ty, Compare, Src0, Src1).getReg(0);
}

// Propagate any NaN of both operands
if (!MI.getFlag(MachineInstr::FmNoNans) &&
(!isKnownNeverNaN(Src0, MRI) || isKnownNeverNaN(Src1, MRI))) {
auto IsOrdered = MIRBuilder.buildFCmp(CmpInst::FCMP_ORD, CmpTy, Src0, Src1);

LLT ElementTy = Ty.isScalar() ? Ty : Ty.getElementType();
APFloat NaNValue = APFloat::getNaN(getFltSemanticForLLT(ElementTy));
Register NaN = MIRBuilder.buildFConstant(ElementTy, NaNValue).getReg(0);
if (Ty.isVector())
NaN = MIRBuilder.buildSplatBuildVector(Ty, NaN).getReg(0);

Res = MIRBuilder.buildSelect(Ty, IsOrdered, Res, NaN).getReg(0);
}

// fminimum/fmaximum requires -0.0 less than +0.0
if (!MinMaxMustRespectOrderedZero && !MI.getFlag(MachineInstr::FmNsz)) {
GISelValueTracking VT(MIRBuilder.getMF());
KnownFPClass Src0Info = VT.computeKnownFPClass(Src0, fcZero);
KnownFPClass Src1Info = VT.computeKnownFPClass(Src1, fcZero);

if (!Src0Info.isKnownNeverZero() && !Src1Info.isKnownNeverZero()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think there is a bug in this sequence (existing in the DAG one) but I don't remember what the status of that is

const unsigned Flags = MI.getFlags();
Register Zero = MIRBuilder.buildFConstant(Ty, 0.0).getReg(0);
auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_OEQ, CmpTy, Res, Zero);

unsigned TestClass = IsMax ? fcPosZero : fcNegZero;

auto LHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src0, TestClass);
auto LHSSelect =
MIRBuilder.buildSelect(Ty, LHSTestZero, Src0, Res, Flags);

auto RHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src1, TestClass);
auto RHSSelect =
MIRBuilder.buildSelect(Ty, RHSTestZero, Src1, LHSSelect, Flags);

Res = MIRBuilder.buildSelect(Ty, IsZero, RHSSelect, Res, Flags).getReg(0);
}
}

MIRBuilder.buildCopy(Dst, Res);
MI.eraseFromParent();
return Legalized;
}

LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
// Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
Register DstReg = MI.getOperand(0).getReg();
Expand Down
53 changes: 30 additions & 23 deletions llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -976,9 +976,25 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
FPOpActions.clampMaxNumElementsStrict(0, S32, 2);
}

auto &MinNumMaxNumIeee =
getActionDefinitionsBuilder({G_FMINNUM_IEEE, G_FMAXNUM_IEEE});

if (ST.hasVOP3PInsts()) {
MinNumMaxNumIeee.legalFor(FPTypesPK16)
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
.clampMaxNumElements(0, S16, 2)
.clampScalar(0, S16, S64)
.scalarize(0);
} else if (ST.has16BitInsts()) {
MinNumMaxNumIeee.legalFor(FPTypes16).clampScalar(0, S16, S64).scalarize(0);
} else {
MinNumMaxNumIeee.legalFor(FPTypesBase)
.clampScalar(0, S32, S64)
.scalarize(0);
}

auto &MinNumMaxNum = getActionDefinitionsBuilder(
{G_FMINNUM, G_FMAXNUM, G_FMINIMUMNUM, G_FMAXIMUMNUM, G_FMINNUM_IEEE,
G_FMAXNUM_IEEE});
{G_FMINNUM, G_FMAXNUM, G_FMINIMUMNUM, G_FMAXIMUMNUM});

if (ST.hasVOP3PInsts()) {
MinNumMaxNum.customFor(FPTypesPK16)
Expand Down Expand Up @@ -2136,9 +2152,17 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.legalFor(FPTypesPK16)
.clampMaxNumElements(0, S16, 2)
.scalarize(0);
} else if (ST.hasVOP3PInsts()) {
getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM})
.lowerFor({V2S16})
.clampMaxNumElementsStrict(0, S16, 2)
.scalarize(0)
.lower();
} else {
// TODO: Implement
getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM}).lower();
getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM})
.scalarize(0)
.clampScalar(0, S32, S64)
.lower();
}

getActionDefinitionsBuilder({G_MEMCPY, G_MEMCPY_INLINE, G_MEMMOVE, G_MEMSET})
Expand Down Expand Up @@ -2195,8 +2219,6 @@ bool AMDGPULegalizerInfo::legalizeCustom(
case TargetOpcode::G_FMAXNUM:
case TargetOpcode::G_FMINIMUMNUM:
case TargetOpcode::G_FMAXIMUMNUM:
case TargetOpcode::G_FMINNUM_IEEE:
case TargetOpcode::G_FMAXNUM_IEEE:
return legalizeMinNumMaxNum(Helper, MI);
case TargetOpcode::G_EXTRACT_VECTOR_ELT:
return legalizeExtractVectorElt(MI, MRI, B);
Expand Down Expand Up @@ -2817,23 +2839,8 @@ bool AMDGPULegalizerInfo::legalizeMinNumMaxNum(LegalizerHelper &Helper,
MachineFunction &MF = Helper.MIRBuilder.getMF();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

const bool IsIEEEOp = MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE ||
MI.getOpcode() == AMDGPU::G_FMAXNUM_IEEE;

// With ieee_mode disabled, the instructions have the correct behavior
// already for G_FMINIMUMNUM/G_FMAXIMUMNUM.
//
// FIXME: G_FMINNUM/G_FMAXNUM should match the behavior with ieee_mode
// enabled.
if (!MFI->getMode().IEEE) {
if (MI.getOpcode() == AMDGPU::G_FMINIMUMNUM ||
MI.getOpcode() == AMDGPU::G_FMAXIMUMNUM)
return true;

return !IsIEEEOp;
}

if (IsIEEEOp)
// With ieee_mode disabled, the instructions have the correct behavior.
if (!MFI->getMode().IEEE)
return true;

return Helper.lowerFMinNumMaxNum(MI) == LegalizerHelper::Legalized;
Expand Down
Loading