Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ bool AMDGPURegBankLegalize::runOnMachineFunction(MachineFunction &MF) {
const RegBankLegalizeRules &RBLRules = getRules(ST, MRI);

// Logic that does legalization based on IDs assigned to Opcode.
RegBankLegalizeHelper RBLHelper(B, MUI, RBI, RBLRules);
RegBankLegalizeHelper RBLHelper(B, MUI, RBI, TPC, RBLRules);

SmallVector<MachineInstr *> AllInst;

Expand All @@ -435,7 +435,8 @@ bool AMDGPURegBankLegalize::runOnMachineFunction(MachineFunction &MF) {
unsigned Opc = MI->getOpcode();
// Insert point for use operands needs some calculation.
if (Opc == AMDGPU::G_PHI) {
RBLHelper.applyMappingPHI(*MI);
if (!RBLHelper.applyMappingPHI(*MI))
return false;
continue;
}

Expand Down Expand Up @@ -466,7 +467,8 @@ bool AMDGPURegBankLegalize::runOnMachineFunction(MachineFunction &MF) {
// S1 rules are in RegBankLegalizeRules.
}

RBLHelper.findRuleAndApplyMapping(*MI);
if (!RBLHelper.findRuleAndApplyMapping(*MI))
return false;
}

// Sgpr S1 clean up combines:
Expand Down
73 changes: 49 additions & 24 deletions llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,29 +31,47 @@ using namespace AMDGPU;

RegBankLegalizeHelper::RegBankLegalizeHelper(
MachineIRBuilder &B, const MachineUniformityInfo &MUI,
const RegisterBankInfo &RBI, const RegBankLegalizeRules &RBLRules)
: ST(B.getMF().getSubtarget<GCNSubtarget>()), B(B), MRI(*B.getMRI()),
MUI(MUI), RBI(RBI), RBLRules(RBLRules), IsWave32(ST.isWave32()),
const RegisterBankInfo &RBI, const TargetPassConfig &TPC,
const RegBankLegalizeRules &RBLRules)
: MF(B.getMF()), ST(MF.getSubtarget<GCNSubtarget>()), B(B),
MRI(*B.getMRI()), MUI(MUI), RBI(RBI), TPC(TPC), MORE(MF, nullptr),
RBLRules(RBLRules), IsWave32(ST.isWave32()),
SgprRB(&RBI.getRegBank(AMDGPU::SGPRRegBankID)),
VgprRB(&RBI.getRegBank(AMDGPU::VGPRRegBankID)),
VccRB(&RBI.getRegBank(AMDGPU::VCCRegBankID)) {}

void RegBankLegalizeHelper::findRuleAndApplyMapping(MachineInstr &MI) {
const SetOfRulesForOpcode &RuleSet = RBLRules.getRulesForOpc(MI);
const RegBankLLTMapping &Mapping = RuleSet.findMappingForMI(MI, MRI, MUI);
bool RegBankLegalizeHelper::findRuleAndApplyMapping(MachineInstr &MI) {
const SetOfRulesForOpcode *RuleSet = RBLRules.getRulesForOpc(MI);
if (!RuleSet) {
reportGISelFailure(MF, TPC, MORE, "amdgpu-regbanklegalize",
"No AMDGPU RegBankLegalize rules defined for opcode",
MI);
return false;
}

const RegBankLLTMapping *Mapping = RuleSet->findMappingForMI(MI, MRI, MUI);
if (!Mapping) {
reportGISelFailure(MF, TPC, MORE, "amdgpu-regbanklegalize",
"AMDGPU RegBankLegalize: none of the rules defined with "
"'Any' for MI's opcode matched MI",
MI);
return false;
}

SmallSet<Register, 4> WaterfallSgprs;
unsigned OpIdx = 0;
if (Mapping.DstOpMapping.size() > 0) {
if (Mapping->DstOpMapping.size() > 0) {
B.setInsertPt(*MI.getParent(), std::next(MI.getIterator()));
applyMappingDst(MI, OpIdx, Mapping.DstOpMapping);
if (!applyMappingDst(MI, OpIdx, Mapping->DstOpMapping))
return false;
}
if (Mapping.SrcOpMapping.size() > 0) {
if (Mapping->SrcOpMapping.size() > 0) {
B.setInstr(MI);
applyMappingSrc(MI, OpIdx, Mapping.SrcOpMapping, WaterfallSgprs);
applyMappingSrc(MI, OpIdx, Mapping->SrcOpMapping, WaterfallSgprs);
}

lower(MI, Mapping, WaterfallSgprs);
lower(MI, *Mapping, WaterfallSgprs);
return true;
}

bool RegBankLegalizeHelper::executeInWaterfallLoop(
Expand Down Expand Up @@ -1055,7 +1073,7 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) {
}
}

void RegBankLegalizeHelper::applyMappingDst(
bool RegBankLegalizeHelper::applyMappingDst(
MachineInstr &MI, unsigned &OpIdx,
const SmallVectorImpl<RegBankLLTMappingApplyID> &MethodIDs) {
// Defs start from operand 0
Expand Down Expand Up @@ -1180,13 +1198,17 @@ void RegBankLegalizeHelper::applyMappingDst(
break;
}
case InvalidMapping: {
LLVM_DEBUG(dbgs() << "Instruction with Invalid mapping: "; MI.dump(););
llvm_unreachable("missing fast rule for MI");
reportGISelFailure(
MF, TPC, MORE, "amdgpu-regbanklegalize",
"AMDGPU RegBankLegalize: missing fast rule ('Div' or 'Uni') for", MI);
return false;
}
default:
llvm_unreachable("ID not supported");
}
}

return true;
}

void RegBankLegalizeHelper::applyMappingSrc(
Expand Down Expand Up @@ -1348,7 +1370,7 @@ void RegBankLegalizeHelper::applyMappingSrc(
}
}

void RegBankLegalizeHelper::applyMappingPHI(MachineInstr &MI) {
bool RegBankLegalizeHelper::applyMappingPHI(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
LLT Ty = MRI.getType(Dst);

Expand All @@ -1371,28 +1393,31 @@ void RegBankLegalizeHelper::applyMappingPHI(MachineInstr &MI) {
MI.getOperand(i).setReg(NewUse.getReg(0));
}

return;
return true;
}

// ALL divergent i1 phis should be already lowered and inst-selected into PHI
// with sgpr reg class and S1 LLT.
// ALL divergent i1 phis should have been lowered and inst-selected into PHI
// with sgpr reg class and S1 LLT in AMDGPUGlobalISelDivergenceLowering pass.
// Note: this includes divergent phis that don't require lowering.
if (Ty == LLT::scalar(1) && MUI.isDivergent(Dst)) {
LLVM_DEBUG(dbgs() << "Divergent S1 G_PHI: "; MI.dump(););
llvm_unreachable("Make sure to run AMDGPUGlobalISelDivergenceLowering "
"before RegBankLegalize to lower lane mask(vcc) phis");
reportGISelFailure(MF, TPC, MORE, "amdgpu-regbanklegalize",
"AMDGPU RegBankLegalize: Can't lower divergent S1 G_PHI",
MI);
return false;
}

// We accept all types that can fit in some register class.
// Uniform G_PHIs have all sgpr registers.
// Divergent G_PHIs have vgpr dst but inputs can be sgpr or vgpr.
if (Ty == LLT::scalar(32) || Ty == LLT::pointer(1, 64) ||
Ty == LLT::pointer(4, 64)) {
return;
return true;
}

LLVM_DEBUG(dbgs() << "G_PHI not handled: "; MI.dump(););
llvm_unreachable("type not supported");
reportGISelFailure(MF, TPC, MORE, "amdgpu-regbanklegalize",
"AMDGPU RegBankLegalize: type not supported for G_PHI",
MI);
return false;
}

[[maybe_unused]] static bool verifyRegBankOnOperands(MachineInstr &MI,
Expand Down
12 changes: 9 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
#include "AMDGPURegBankLegalizeRules.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"

namespace llvm {

Expand All @@ -27,11 +29,14 @@ namespace AMDGPU {
// to replace instruction. In other case InstApplyMethod will create new
// instruction(s).
class RegBankLegalizeHelper {
MachineFunction &MF;
const GCNSubtarget &ST;
MachineIRBuilder &B;
MachineRegisterInfo &MRI;
const MachineUniformityInfo &MUI;
const RegisterBankInfo &RBI;
const TargetPassConfig &TPC;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably shouldn't be spreading this, it doesn't exist in new pm

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a // FIXME: Eliminate dependency on TargetPassConfig for NewPM transition
what should I do here then, write reportGISelFailure without TPC argument?

MachineOptimizationRemarkEmitter MORE;
const RegBankLegalizeRules &RBLRules;
const bool IsWave32;
const RegisterBank *SgprRB;
Expand Down Expand Up @@ -79,12 +84,13 @@ class RegBankLegalizeHelper {
public:
RegBankLegalizeHelper(MachineIRBuilder &B, const MachineUniformityInfo &MUI,
const RegisterBankInfo &RBI,
const TargetPassConfig &TPC,
const RegBankLegalizeRules &RBLRules);

void findRuleAndApplyMapping(MachineInstr &MI);
bool findRuleAndApplyMapping(MachineInstr &MI);

// Manual apply helpers.
void applyMappingPHI(MachineInstr &MI);
bool applyMappingPHI(MachineInstr &MI);
void applyMappingTrivial(MachineInstr &MI);

private:
Expand All @@ -97,7 +103,7 @@ class RegBankLegalizeHelper {

const RegisterBank *getRegBankFromID(RegBankLLTMappingApplyID ID);

void
bool
applyMappingDst(MachineInstr &MI, unsigned &OpIdx,
const SmallVectorImpl<RegBankLLTMappingApplyID> &MethodIDs);

Expand Down
27 changes: 11 additions & 16 deletions llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ UniformityLLTOpPredicateID LLTToBId(LLT Ty) {
return _;
}

const RegBankLLTMapping &
const RegBankLLTMapping *
SetOfRulesForOpcode::findMappingForMI(const MachineInstr &MI,
const MachineRegisterInfo &MRI,
const MachineUniformityInfo &MUI) const {
Expand All @@ -260,17 +260,16 @@ SetOfRulesForOpcode::findMappingForMI(const MachineInstr &MI,
Slot = getFastPredicateSlot(LLTToId(MRI.getType(Reg)));

if (Slot != -1)
return MUI.isUniform(Reg) ? Uni[Slot] : Div[Slot];
return MUI.isUniform(Reg) ? &Uni[Slot] : &Div[Slot];
}

// Slow search for more complex rules.
for (const RegBankLegalizeRule &Rule : Rules) {
if (Rule.Predicate.match(MI, MUI, MRI))
return Rule.OperandMapping;
return &Rule.OperandMapping;
}

LLVM_DEBUG(dbgs() << "MI: "; MI.dump(););
llvm_unreachable("None of the rules defined for MI's opcode matched MI");
return nullptr;
}

void SetOfRulesForOpcode::addRule(RegBankLegalizeRule Rule) {
Expand Down Expand Up @@ -353,27 +352,23 @@ RegBankLegalizeRules::addRulesForIOpcs(std::initializer_list<unsigned> OpcList,
return RuleSetInitializer(OpcList, IRulesAlias, IRules, FastTypes);
}

const SetOfRulesForOpcode &
const SetOfRulesForOpcode *
RegBankLegalizeRules::getRulesForOpc(MachineInstr &MI) const {
unsigned Opc = MI.getOpcode();
if (Opc == AMDGPU::G_INTRINSIC || Opc == AMDGPU::G_INTRINSIC_CONVERGENT ||
Opc == AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS ||
Opc == AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS) {
unsigned IntrID = cast<GIntrinsic>(MI).getIntrinsicID();
auto IRAIt = IRulesAlias.find(IntrID);
if (IRAIt == IRulesAlias.end()) {
LLVM_DEBUG(dbgs() << "MI: "; MI.dump(););
llvm_unreachable("No rules defined for intrinsic opcode");
}
return IRules.at(IRAIt->second);
if (IRAIt == IRulesAlias.end())
return nullptr;
return &IRules.at(IRAIt->second);
}

auto GRAIt = GRulesAlias.find(Opc);
if (GRAIt == GRulesAlias.end()) {
LLVM_DEBUG(dbgs() << "MI: "; MI.dump(););
llvm_unreachable("No rules defined for generic opcode");
}
return GRules.at(GRAIt->second);
if (GRAIt == GRulesAlias.end())
return nullptr;
return &GRules.at(GRAIt->second);
}

// Syntactic sugar wrapper for predicate lambda that enables '&&', '||' and '!'.
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ class SetOfRulesForOpcode {
SetOfRulesForOpcode();
SetOfRulesForOpcode(FastRulesTypes FastTypes);

const RegBankLLTMapping &
const RegBankLLTMapping *
findMappingForMI(const MachineInstr &MI, const MachineRegisterInfo &MRI,
const MachineUniformityInfo &MUI) const;

Expand Down Expand Up @@ -385,7 +385,7 @@ class RegBankLegalizeRules {
MRI = &_MRI;
};

const SetOfRulesForOpcode &getRulesForOpc(MachineInstr &MI) const;
const SetOfRulesForOpcode *getRulesForOpc(MachineInstr &MI) const;
};

} // end namespace AMDGPU
Expand Down