Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPUCombine.td
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,16 @@ def sign_extension_in_reg : GICombineRule<
[{ return matchCombineSignExtendInReg(*${sign_inreg}, ${matchinfo}); }]),
(apply [{ applyCombineSignExtendInReg(*${sign_inreg}, ${matchinfo}); }])>;

// Do the following combines :
// fmul x, select(y, A, B) -> fldexp (x, select i32 (y, a, b))
// fmul x, select(y, -A, -B) -> fldexp ((fneg x), select i32 (y, a, b))
def combine_fmul_with_select_to_fldexp : GICombineRule<
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We really should have complex patterns and constant xforms like selection patterns

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you kindly elaborate, as dealing with pattern matching for first time in Isel!

(defs root:$root, build_fn_matchinfo:$matchinfo),
(match (G_FMUL $dst, $x, $select):$root,
(G_SELECT $select, $y, $A, $B):$sel,
[{ return Helper.matchCombineFmulWithSelectToFldexp(*${root}, *${sel}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;


let Predicates = [Has16BitInsts, NotHasMed3_16] in {
// For gfx8, expand f16-fmed3-as-f32 into a min/max f16 sequence. This
Expand Down Expand Up @@ -153,13 +163,13 @@ def gfx8_combines : GICombineGroup<[expand_promoted_fmed3]>;

def AMDGPUPreLegalizerCombiner: GICombiner<
"AMDGPUPreLegalizerCombinerImpl",
[all_combines, clamp_i64_to_i16, foldable_fneg]> {
[all_combines, combine_fmul_with_select_to_fldexp, clamp_i64_to_i16, foldable_fneg]> {
let CombineAllMethodName = "tryCombineAllImpl";
}

def AMDGPUPostLegalizerCombiner: GICombiner<
"AMDGPUPostLegalizerCombinerImpl",
[all_combines, gfx6gfx7_combines, gfx8_combines,
[all_combines, gfx6gfx7_combines, gfx8_combines, combine_fmul_with_select_to_fldexp,
uchar_to_float, cvt_f32_ubyteN, remove_fcanonicalize, foldable_fneg,
rcp_sqrt_to_rsq, fdiv_by_sqrt_to_rsq_f16, sign_extension_in_reg, smulu64]> {
let CombineAllMethodName = "tryCombineAllImpl";
Expand Down
71 changes: 71 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,13 @@
using namespace llvm;
using namespace MIPatternMatch;

AMDGPUCombinerHelper::AMDGPUCombinerHelper(
GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize,
GISelKnownBits *KB, MachineDominatorTree *MDT, const LegalizerInfo *LI,
const GCNSubtarget &STI)
: CombinerHelper(Observer, B, IsPreLegalize, KB, MDT, LI), STI(STI),
TII(*STI.getInstrInfo()) {}

LLVM_READNONE
static bool fnegFoldsIntoMI(const MachineInstr &MI) {
switch (MI.getOpcode()) {
Expand Down Expand Up @@ -445,3 +452,67 @@ void AMDGPUCombinerHelper::applyExpandPromotedF16FMed3(MachineInstr &MI,
Builder.buildFMinNumIEEE(MI.getOperand(0), B1, C1);
MI.eraseFromParent();
}

bool AMDGPUCombinerHelper::matchCombineFmulWithSelectToFldexp(
MachineInstr &MI, MachineInstr &Sel,
std::function<void(MachineIRBuilder &)> &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_FMUL);
assert(Sel.getOpcode() == TargetOpcode::G_SELECT);
assert(MI.getOperand(2).getReg() == Sel.getOperand(0).getReg());

Register Dst = MI.getOperand(0).getReg();
LLT DestTy = MRI.getType(Dst);
LLT ScalarDestTy = DestTy.getScalarType();

if ((ScalarDestTy != LLT::float64() && ScalarDestTy != LLT::float32() &&
ScalarDestTy != LLT::float16()) ||
!MRI.hasOneNonDBGUse(Sel.getOperand(0).getReg()))
return false;

Register SelectCondReg = Sel.getOperand(1).getReg();
MachineInstr *SelectTrue = MRI.getVRegDef(Sel.getOperand(2).getReg());
MachineInstr *SelectFalse = MRI.getVRegDef(Sel.getOperand(3).getReg());

const auto SelectTrueVal =
isConstantOrConstantSplatVectorFP(*SelectTrue, MRI);
if (!SelectTrueVal)
return false;
const auto SelectFalseVal =
isConstantOrConstantSplatVectorFP(*SelectFalse, MRI);
if (!SelectFalseVal)
return false;

if (SelectTrueVal->isNegative() != SelectFalseVal->isNegative())
return false;

// For f32, only non-inline constants should be transformed.
if (ScalarDestTy == LLT::float32() && TII.isInlineConstant(*SelectTrueVal) &&
TII.isInlineConstant(*SelectFalseVal))
return false;

int SelectTrueLog2Val = SelectTrueVal->getExactLog2Abs();
if (SelectTrueLog2Val == INT_MIN)
return false;
int SelectFalseLog2Val = SelectFalseVal->getExactLog2Abs();
if (SelectFalseLog2Val == INT_MIN)
return false;

MatchInfo = [=, &MI](MachineIRBuilder &Builder) {
LLT IntDestTy = DestTy.changeElementType(LLT::scalar(32));
auto NewSel = Builder.buildSelect(
IntDestTy, SelectCondReg,
Builder.buildConstant(IntDestTy, SelectTrueLog2Val),
Builder.buildConstant(IntDestTy, SelectFalseLog2Val));

Register XReg = MI.getOperand(1).getReg();
if (SelectTrueVal->isNegative()) {
auto NegX =
Builder.buildFNeg(DestTy, XReg, MRI.getVRegDef(XReg)->getFlags());
Builder.buildFLdexp(Dst, NegX, NewSel, MI.getFlags());
} else {
Builder.buildFLdexp(Dst, XReg, NewSel, MI.getFlags());
}
};

return true;
}
13 changes: 13 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,22 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUCOMBINERHELPER_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUCOMBINERHELPER_H

#include "GCNSubtarget.h"
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"

namespace llvm {
class AMDGPUCombinerHelper : public CombinerHelper {
protected:
const GCNSubtarget &STI;
const SIInstrInfo &TII;

public:
using CombinerHelper::CombinerHelper;
AMDGPUCombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B,
bool IsPreLegalize, GISelKnownBits *KB,
MachineDominatorTree *MDT, const LegalizerInfo *LI,
const GCNSubtarget &STI);

bool matchFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo);
void applyFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo);
Expand All @@ -30,6 +39,10 @@ class AMDGPUCombinerHelper : public CombinerHelper {
Register Src1, Register Src2);
void applyExpandPromotedF16FMed3(MachineInstr &MI, Register Src0,
Register Src1, Register Src2);

bool matchCombineFmulWithSelectToFldexp(
MachineInstr &MI, MachineInstr &Sel,
std::function<void(MachineIRBuilder &)> &MatchInfo);
};

} // namespace llvm
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ AMDGPUPostLegalizerCombinerImpl::AMDGPUPostLegalizerCombinerImpl(
const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)
: Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI),
TII(*STI.getInstrInfo()),
Helper(Observer, B, /*IsPreLegalize*/ false, &KB, MDT, LI),
Helper(Observer, B, /*IsPreLegalize*/ false, &KB, MDT, LI, STI),
#define GET_GICOMBINER_CONSTRUCTOR_INITS
#include "AMDGPUGenPostLegalizeGICombiner.inc"
#undef GET_GICOMBINER_CONSTRUCTOR_INITS
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ AMDGPUPreLegalizerCombinerImpl::AMDGPUPreLegalizerCombinerImpl(
const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,
const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)
: Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI),
Helper(Observer, B, /*IsPreLegalize*/ true, &KB, MDT, LI),
Helper(Observer, B, /*IsPreLegalize*/ true, &KB, MDT, LI, STI),
#define GET_GICOMBINER_CONSTRUCTOR_INITS
#include "AMDGPUGenPreLegalizeGICombiner.inc"
#undef GET_GICOMBINER_CONSTRUCTOR_INITS
Expand Down
Loading
Loading