Skip to content

Commit 3f7b408

Browse files
committed
[AMDGPU] [GlobalIsel] Combine Fmul with Select into ldexp.
This combine pattern perform the below transformation. fmul x, select(y, A, B) -> ldexp (x, select i32 (y, a, b)) fmul x, select(y, -A, -B) -> ldexp ((fneg x), select i32 (y, a, b)) where, A=2^a & B=2^b ; a and b are integers. It is a follow-up PR to implement the above combine for globalIsel, as it has been done for SelectionDAG Isel (PR-111109)
1 parent 814902a commit 3f7b408

File tree

3 files changed

+86
-0
lines changed

3 files changed

+86
-0
lines changed

llvm/lib/Target/AMDGPU/AMDGPUCombine.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,16 @@ def sign_extension_in_reg : GICombineRule<
124124
[{ return matchCombineSignExtendInReg(*${sign_inreg}, ${matchinfo}); }]),
125125
(apply [{ applyCombineSignExtendInReg(*${sign_inreg}, ${matchinfo}); }])>;
126126

127+
// Do the following combines :
128+
// fmul x, select(y, A, B) -> ldexp (x, select i32 (y, a, b))
129+
// fmul x, select(y, -A, -B) -> ldexp ((fneg x), select i32 (y, a, b))
130+
def combine_fmul_with_select_to_ldexp : GICombineRule<
131+
(defs root:$root, build_fn_matchinfo:$matchinfo),
132+
(match (G_FMUL $dst, $x, $select):$root,
133+
(G_SELECT $select, $y, $A, $B):$sel,
134+
[{ return Helper.matchCombineFmulWithSelectToLdexp(*${root}, *${sel}, ${matchinfo}); }]),
135+
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
136+
127137

128138
let Predicates = [Has16BitInsts, NotHasMed3_16] in {
129139
// For gfx8, expand f16-fmed3-as-f32 into a min/max f16 sequence. This

llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -445,3 +445,75 @@ void AMDGPUCombinerHelper::applyExpandPromotedF16FMed3(MachineInstr &MI,
445445
Builder.buildFMinNumIEEE(MI.getOperand(0), B1, C1);
446446
MI.eraseFromParent();
447447
}
448+
449+
bool AMDGPUCombinerHelper::matchCombineFmulWithSelectToLdexp(
450+
MachineInstr &MI, MachineInstr &Sel,
451+
std::function<void(MachineIRBuilder &)> &MatchInfo) {
452+
assert(MI.getOpcode() == TargetOpcode::G_FMUL);
453+
assert(Sel.getOpcode() == TargetOpcode::G_SELECT);
454+
455+
Register Dst = MI.getOperand(0).getReg();
456+
LLT DestTy = MRI.getType(Dst);
457+
LLT ScalarDestTy = DestTy.getScalarType();
458+
459+
if ((ScalarDestTy == LLT::float64() || ScalarDestTy == LLT::float32() ||
460+
ScalarDestTy == LLT::float16()) &&
461+
(MRI.hasOneNonDBGUse(Sel.getOperand(0).getReg()))) {
462+
Register SelectCond = Sel.getOperand(1).getReg();
463+
Register SelectTrue = Sel.getOperand(2).getReg();
464+
Register SelectFalse = Sel.getOperand(3).getReg();
465+
466+
const auto SelectTrueCst =
467+
DestTy.isVector()
468+
? getFConstantSplat(SelectTrue, MRI, /* allowUndef */ true)
469+
: getFConstantVRegValWithLookThrough(SelectTrue, MRI);
470+
if (!SelectTrueCst)
471+
return false;
472+
const auto SelectFalseCst =
473+
DestTy.isVector()
474+
? getFConstantSplat(SelectFalse, MRI, /* allowUndef */ true)
475+
: getFConstantVRegValWithLookThrough(SelectFalse, MRI);
476+
if (!SelectFalseCst)
477+
return false;
478+
479+
if (SelectTrueCst->Value.isNegative() != SelectFalseCst->Value.isNegative())
480+
return false;
481+
482+
// For f32, only non-inline constants should be transformed.
483+
const SIInstrInfo *TII =
484+
(MI.getMF()->getSubtarget<GCNSubtarget>()).getInstrInfo();
485+
if (ScalarDestTy == LLT::float32() &&
486+
TII->isInlineConstant(SelectTrueCst->Value) &&
487+
TII->isInlineConstant(SelectFalseCst->Value))
488+
return false;
489+
490+
int SelectTrueVal = SelectTrueCst->Value.getExactLog2Abs();
491+
if (SelectTrueVal == INT_MIN)
492+
return false;
493+
int SelectFalseVal = SelectFalseCst->Value.getExactLog2Abs();
494+
if (SelectFalseVal == INT_MIN)
495+
return false;
496+
497+
MatchInfo = [=, &MI](MachineIRBuilder &Builder) {
498+
LLT IntDestTy = DestTy.changeElementType(LLT::scalar(32));
499+
auto NewSel =
500+
Builder.buildSelect(IntDestTy, SelectCond,
501+
Builder.buildConstant(IntDestTy, SelectTrueVal),
502+
Builder.buildConstant(IntDestTy, SelectFalseVal));
503+
504+
if (SelectTrueCst->Value.isNegative()) {
505+
auto NegX = Builder.buildFNeg(
506+
DestTy, MI.getOperand(1).getReg(),
507+
MRI.getVRegDef(MI.getOperand(1).getReg())->getFlags());
508+
Builder.buildFLdexp(Dst, NegX, NewSel, MI.getFlags());
509+
} else {
510+
Builder.buildFLdexp(Dst, MI.getOperand(1).getReg(), NewSel,
511+
MI.getFlags());
512+
}
513+
};
514+
515+
return true;
516+
}
517+
518+
return false;
519+
}

llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@ class AMDGPUCombinerHelper : public CombinerHelper {
3030
Register Src1, Register Src2);
3131
void applyExpandPromotedF16FMed3(MachineInstr &MI, Register Src0,
3232
Register Src1, Register Src2);
33+
34+
bool matchCombineFmulWithSelectToLdexp(
35+
MachineInstr &MI, MachineInstr &Sel,
36+
std::function<void(MachineIRBuilder &)> &MatchInfo);
3337
};
3438

3539
} // namespace llvm

0 commit comments

Comments
 (0)