Skip to content

Commit b293fd3

Browse files
committed
[AMDGPU] Add option to bias SGPR allocation to reduce read hazards
- Scan for potential hazards in virtual registers before SGPR allocation. - Use this data to build new allocation order via allocation hints.
1 parent e24bcfb commit b293fd3

File tree

10 files changed

+544
-1
lines changed

10 files changed

+544
-1
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -478,6 +478,9 @@ extern char &GCNRewritePartialRegUsesID;
478478
void initializeAMDGPUWaitSGPRHazardsLegacyPass(PassRegistry &);
479479
extern char &AMDGPUWaitSGPRHazardsLegacyID;
480480

481+
void initializeAMDGPUMarkSGPRHazardRegsLegacyPass(PassRegistry &);
482+
extern char &AMDGPUMarkSGPRHazardRegsLegacyID;
483+
481484
namespace AMDGPU {
482485
enum TargetIndex {
483486
TI_CONSTDATA_START,
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
//===- AMDGPUMarkSGPRHazardRegs.cpp - Annotate SGPRs used by VALU ---------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
/// \file Pass to mark SGPRs used by VALU.
10+
/// Marks can be used during register allocation to reduce hazards.
11+
//
12+
//===----------------------------------------------------------------------===//
13+
14+
#include "AMDGPUMarkSGPRHazardRegs.h"
15+
#include "AMDGPU.h"
16+
#include "GCNSubtarget.h"
17+
#include "SIMachineFunctionInfo.h"
18+
#include "llvm/CodeGen/MachineFunctionPass.h"
19+
#include "llvm/CodeGen/RegisterClassInfo.h"
20+
#include "llvm/CodeGen/VirtRegMap.h"
21+
#include "llvm/InitializePasses.h"
22+
23+
using namespace llvm;
24+
25+
#define DEBUG_TYPE "amdgpu-mark-sgpr-hazard-regs"
26+
27+
namespace {
28+
29+
class AMDGPUMarkSGPRHazardRegs {
30+
public:
31+
AMDGPUMarkSGPRHazardRegs() {}
32+
bool run(MachineFunction &MF);
33+
};
34+
35+
class AMDGPUMarkSGPRHazardRegsLegacy : public MachineFunctionPass {
36+
public:
37+
static char ID;
38+
39+
AMDGPUMarkSGPRHazardRegsLegacy() : MachineFunctionPass(ID) {}
40+
41+
bool runOnMachineFunction(MachineFunction &MF) override {
42+
if (skipFunction(MF.getFunction()))
43+
return false;
44+
return AMDGPUMarkSGPRHazardRegs().run(MF);
45+
}
46+
47+
void getAnalysisUsage(AnalysisUsage &AU) const override {
48+
AU.setPreservesAll();
49+
MachineFunctionPass::getAnalysisUsage(AU);
50+
}
51+
};
52+
53+
} // End anonymous namespace.
54+
55+
bool AMDGPUMarkSGPRHazardRegs::run(MachineFunction &MF) {
56+
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
57+
if (!ST.hasVALUReadSGPRHazard())
58+
return false;
59+
60+
const SIRegisterInfo *TRI = ST.getRegisterInfo();
61+
if (!TRI->getSGPRHazardAvoidanceStrategy(MF))
62+
return false;
63+
64+
LLVM_DEBUG(dbgs() << "AMDGPUMarkSGPRHazardRegs: function " << MF.getName()
65+
<< "\n");
66+
67+
const MachineRegisterInfo *MRI = &MF.getRegInfo();
68+
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
69+
70+
for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) {
71+
Register Reg = Register::index2VirtReg(I);
72+
if (MRI->reg_nodbg_empty(Reg))
73+
continue;
74+
const auto *RC = MRI->getRegClass(Reg);
75+
if (!RC || !TRI->isSGPRClass(RC))
76+
continue;
77+
for (const auto &MO : MRI->reg_nodbg_operands(Reg)) {
78+
const MachineInstr &MI = *MO.getParent();
79+
if (SIInstrInfo::isVALU(MI) && MO.isUse()) {
80+
FuncInfo->setFlag(Reg, AMDGPU::VirtRegFlag::SGPR_HAZARD_REG);
81+
break;
82+
}
83+
}
84+
}
85+
86+
return true;
87+
}
88+
89+
INITIALIZE_PASS(AMDGPUMarkSGPRHazardRegsLegacy, DEBUG_TYPE,
90+
"AMDGPU Mark Hazard SGPRs", false, false)
91+
92+
char AMDGPUMarkSGPRHazardRegsLegacy::ID = 0;
93+
94+
char &llvm::AMDGPUMarkSGPRHazardRegsLegacyID =
95+
AMDGPUMarkSGPRHazardRegsLegacy::ID;
96+
97+
PreservedAnalyses
98+
AMDGPUMarkSGPRHazardRegsPass::run(MachineFunction &MF,
99+
MachineFunctionAnalysisManager &MFAM) {
100+
AMDGPUMarkSGPRHazardRegs().run(MF);
101+
return PreservedAnalyses::all();
102+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
//===--- AMDGPUMarkSGPRHazardRegs.h -----------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMARKSGPRHAZARDSREGS_H
10+
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUMARKSGPRHAZARDSREGS_H
11+
12+
#include "llvm/CodeGen/MachinePassManager.h"
13+
14+
namespace llvm {
15+
16+
class AMDGPUMarkSGPRHazardRegsPass
17+
: public PassInfoMixin<AMDGPUMarkSGPRHazardRegsPass> {
18+
public:
19+
PreservedAnalyses run(MachineFunction &MF,
20+
MachineFunctionAnalysisManager &MFAM);
21+
};
22+
23+
} // namespace llvm
24+
25+
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUMARKSGPRHAZARDSREGS_H

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "AMDGPUIGroupLP.h"
2323
#include "AMDGPUISelDAGToDAG.h"
2424
#include "AMDGPUMacroFusion.h"
25+
#include "AMDGPUMarkSGPRHazardRegs.h"
2526
#include "AMDGPUOpenCLEnqueuedBlockLowering.h"
2627
#include "AMDGPUPerfHintAnalysis.h"
2728
#include "AMDGPURemoveIncompatibleFunctions.h"
@@ -560,6 +561,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
560561
initializeGCNRegPressurePrinterPass(*PR);
561562
initializeAMDGPUPreloadKernArgPrologLegacyPass(*PR);
562563
initializeAMDGPUWaitSGPRHazardsLegacyPass(*PR);
564+
initializeAMDGPUMarkSGPRHazardRegsLegacyPass(*PR);
563565
}
564566

565567
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -1613,6 +1615,7 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
16131615

16141616
addPass(&GCNPreRALongBranchRegID);
16151617

1618+
addPass(&AMDGPUMarkSGPRHazardRegsLegacyID);
16161619
addPass(createSGPRAllocPass(true));
16171620

16181621
// Commit allocated register changes. This is mostly necessary because too

llvm/lib/Target/AMDGPU/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ add_llvm_target(AMDGPUCodeGen
8585
AMDGPUMarkLastScratchLoad.cpp
8686
AMDGPUMIRFormatter.cpp
8787
AMDGPUOpenCLEnqueuedBlockLowering.cpp
88+
AMDGPUMarkSGPRHazardRegs.cpp
8889
AMDGPUPerfHintAnalysis.cpp
8990
AMDGPUPostLegalizerCombiner.cpp
9091
AMDGPUPreLegalizerCombiner.cpp

llvm/lib/Target/AMDGPU/SIDefines.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1066,6 +1066,7 @@ namespace VirtRegFlag {
10661066
enum Register_Flag : uint8_t {
10671067
// Register operand in a whole-wave mode operation.
10681068
WWM_REG = 1 << 0,
1069+
SGPR_HAZARD_REG = 1 << 1
10691070
};
10701071

10711072
} // namespace VirtRegFlag

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 169 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,15 @@
1111
//
1212
//===----------------------------------------------------------------------===//
1313

14+
#include "SIRegisterInfo.h"
1415
#include "AMDGPU.h"
1516
#include "AMDGPURegisterBankInfo.h"
1617
#include "GCNSubtarget.h"
1718
#include "MCTargetDesc/AMDGPUInstPrinter.h"
1819
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
1920
#include "SIMachineFunctionInfo.h"
20-
#include "SIRegisterInfo.h"
2121
#include "llvm/CodeGen/LiveIntervals.h"
22+
#include "llvm/CodeGen/LiveRegMatrix.h"
2223
#include "llvm/CodeGen/LiveRegUnits.h"
2324
#include "llvm/CodeGen/MachineDominators.h"
2425
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -35,6 +36,10 @@ static cl::opt<bool> EnableSpillSGPRToVGPR(
3536
cl::ReallyHidden,
3637
cl::init(true));
3738

39+
static cl::opt<unsigned> SGPRHazardAvoidanceStrategy(
40+
"amdgpu-sgpr-hazard-regalloc", cl::init(0), cl::ReallyHidden,
41+
cl::desc("Register allocation strategy to reduce SGPR read hazards"));
42+
3843
std::array<std::vector<int16_t>, 32> SIRegisterInfo::RegSplitParts;
3944
std::array<std::array<uint16_t, 32>, 9> SIRegisterInfo::SubRegFromChannelTable;
4045

@@ -3904,3 +3909,166 @@ SIRegisterInfo::getVRegFlagsOfReg(Register Reg,
39043909
RegFlags.push_back("WWM_REG");
39053910
return RegFlags;
39063911
}
3912+
3913+
unsigned SIRegisterInfo::getSGPRHazardAvoidanceStrategy(
3914+
const MachineFunction &MF) const {
3915+
if (SGPRHazardAvoidanceStrategy.getNumOccurrences()) {
3916+
return SGPRHazardAvoidanceStrategy;
3917+
} else {
3918+
return MF.getFunction().getFnAttributeAsParsedInteger(
3919+
"amdgpu-sgpr-hazard-regalloc", 0);
3920+
}
3921+
}
3922+
3923+
bool SIRegisterInfo::getRegAllocationHints(Register VirtReg,
3924+
ArrayRef<MCPhysReg> Order,
3925+
SmallVectorImpl<MCPhysReg> &Hints,
3926+
const MachineFunction &MF,
3927+
const VirtRegMap *VRM,
3928+
const LiveRegMatrix *Matrix) const {
3929+
bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints(
3930+
VirtReg, Order, Hints, MF, VRM, Matrix);
3931+
if (!VRM)
3932+
return BaseImplRetVal;
3933+
3934+
// Only use hinting to reduce SGPR read hazards when required.
3935+
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
3936+
if (!ST.hasVALUReadSGPRHazard())
3937+
return BaseImplRetVal;
3938+
3939+
// Only treat SGPRs
3940+
const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
3941+
const MachineRegisterInfo *MRI = &MF.getRegInfo();
3942+
const auto *RC = MRI->getRegClass(VirtReg);
3943+
if (!isSGPRClass(RC))
3944+
return BaseImplRetVal;
3945+
3946+
const unsigned Strategy = getSGPRHazardAvoidanceStrategy(MF);
3947+
if (!Strategy)
3948+
return BaseImplRetVal;
3949+
3950+
SmallSet<MCPhysReg, 4> CopyHints;
3951+
CopyHints.insert(Hints.begin(), Hints.end());
3952+
3953+
auto AddHint = [&](MCPhysReg PhysReg) {
3954+
if (CopyHints.contains(PhysReg) || MRI->isReserved(PhysReg))
3955+
return;
3956+
Hints.push_back(PhysReg);
3957+
};
3958+
auto AddHints = [&](ArrayRef<MCPhysReg> Regs) {
3959+
for (MCPhysReg PhysReg : Regs)
3960+
AddHint(PhysReg);
3961+
};
3962+
3963+
// V1: simply reverse allocation order, mean 23% reduction in hazards
3964+
if (Strategy == 1) {
3965+
if (FuncInfo->checkFlag(VirtReg, AMDGPU::VirtRegFlag::SGPR_HAZARD_REG)) {
3966+
for (MCPhysReg PhysReg : reverse(Order))
3967+
AddHint(PhysReg);
3968+
} else {
3969+
for (MCPhysReg PhysReg : Order)
3970+
AddHint(PhysReg);
3971+
}
3972+
return true;
3973+
}
3974+
3975+
// Build set of current hazard pairs from live matrix
3976+
auto *LiveUnions = const_cast<LiveRegMatrix *>(Matrix)->getLiveUnions();
3977+
const TargetRegisterInfo *TRI = ST.getRegisterInfo();
3978+
3979+
DenseMap<MCPhysReg, unsigned> IntervalCount;
3980+
std::bitset<64> HazardPairs;
3981+
3982+
for (MCPhysReg PhysReg : Order) {
3983+
SmallSet<const LiveInterval *, 4> Intervals;
3984+
bool IsHazard = false;
3985+
for (auto Unit : TRI->regunits(PhysReg)) {
3986+
LiveIntervalUnion &LIU = LiveUnions[Unit];
3987+
for (const LiveInterval *LI : LIU.getMap()) {
3988+
Intervals.insert(LI);
3989+
if (FuncInfo->checkFlag(LI->reg(),
3990+
AMDGPU::VirtRegFlag::SGPR_HAZARD_REG)) {
3991+
IsHazard = true;
3992+
// Break here as we only care about interval count for non-hazard regs
3993+
break;
3994+
}
3995+
}
3996+
if (IsHazard)
3997+
break;
3998+
}
3999+
if (IsHazard) {
4000+
unsigned PairN = TRI->getEncodingValue(PhysReg) >> 1;
4001+
if (PairN <= 63)
4002+
HazardPairs.set(PairN);
4003+
}
4004+
IntervalCount[PhysReg] = Intervals.size();
4005+
}
4006+
4007+
// V2: weight the entire order based on hazard free usage, mean 30% reduction
4008+
// in hazards
4009+
if (Strategy == 2) {
4010+
bool VRegIsHazard =
4011+
FuncInfo->checkFlag(VirtReg, AMDGPU::VirtRegFlag::SGPR_HAZARD_REG);
4012+
SmallVector<MCPhysReg> NewOrder(Order);
4013+
std::sort(NewOrder.begin(), NewOrder.end(), [&](MCPhysReg A, MCPhysReg B) {
4014+
return VRegIsHazard ? IntervalCount[A] < IntervalCount[B]
4015+
: IntervalCount[B] < IntervalCount[A];
4016+
});
4017+
AddHints(NewOrder);
4018+
return true;
4019+
}
4020+
4021+
// V3: complex partitioning, mean 35% reduction in hazards
4022+
assert(Strategy == 3);
4023+
4024+
// Partition the allocation order based on hazards
4025+
SmallVector<MCPhysReg> Unallocated, UnallocatedWithHazard;
4026+
SmallVector<MCPhysReg> Allocated, AllocatedWithHazard;
4027+
4028+
for (MCPhysReg PhysReg : Order) {
4029+
Register VReg = Matrix->getOneVReg(PhysReg);
4030+
bool HasHazard = false;
4031+
// XXX: can remove regunit scan for just SGPR32/SGPR64
4032+
for (auto Unit : TRI->regunits(PhysReg)) {
4033+
unsigned PairN = TRI->getEncodingValue(Unit) >> 1;
4034+
if (PairN <= 63 && HazardPairs[PairN]) {
4035+
HasHazard = true;
4036+
break;
4037+
}
4038+
}
4039+
if (VReg == MCRegister::NoRegister) {
4040+
if (HasHazard)
4041+
UnallocatedWithHazard.push_back(PhysReg);
4042+
else
4043+
Unallocated.push_back(PhysReg);
4044+
} else {
4045+
if (HasHazard)
4046+
AllocatedWithHazard.push_back(PhysReg);
4047+
else
4048+
Allocated.push_back(PhysReg);
4049+
}
4050+
}
4051+
4052+
if (FuncInfo->checkFlag(VirtReg, AMDGPU::VirtRegFlag::SGPR_HAZARD_REG)) {
4053+
// Reorder allocations based on usage, so least used will be reused first.
4054+
// This means least used regs are touched by hazards first.
4055+
std::sort(Allocated.begin(), Allocated.end(),
4056+
[&](MCPhysReg A, MCPhysReg B) {
4057+
return IntervalCount[A] < IntervalCount[B];
4058+
});
4059+
// Reverse order of allocations to try to keep hazards away - yes it helps.
4060+
std::reverse(Unallocated.begin(), Unallocated.end());
4061+
4062+
AddHints(AllocatedWithHazard);
4063+
AddHints(UnallocatedWithHazard);
4064+
AddHints(Unallocated);
4065+
AddHints(Allocated);
4066+
} else {
4067+
AddHints(Allocated);
4068+
AddHints(Unallocated);
4069+
AddHints(UnallocatedWithHazard);
4070+
AddHints(AllocatedWithHazard);
4071+
}
4072+
4073+
return true;
4074+
}

llvm/lib/Target/AMDGPU/SIRegisterInfo.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,13 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
465465

466466
SmallVector<StringLiteral>
467467
getVRegFlagsOfReg(Register Reg, const MachineFunction &MF) const override;
468+
469+
unsigned getSGPRHazardAvoidanceStrategy(const MachineFunction &MF) const;
470+
471+
bool getRegAllocationHints(Register VirtReg, ArrayRef<MCPhysReg> Order,
472+
SmallVectorImpl<MCPhysReg> &Hints,
473+
const MachineFunction &MF, const VirtRegMap *VRM,
474+
const LiveRegMatrix *Matrix) const override;
468475
};
469476

470477
namespace AMDGPU {

0 commit comments

Comments
 (0)