1111//
1212// ===----------------------------------------------------------------------===//
1313
14+ #include " SIRegisterInfo.h"
1415#include " AMDGPU.h"
1516#include " AMDGPURegisterBankInfo.h"
1617#include " GCNSubtarget.h"
1718#include " MCTargetDesc/AMDGPUInstPrinter.h"
1819#include " MCTargetDesc/AMDGPUMCTargetDesc.h"
1920#include " SIMachineFunctionInfo.h"
20- #include " SIRegisterInfo.h"
2121#include " llvm/CodeGen/LiveIntervals.h"
22+ #include " llvm/CodeGen/LiveRegMatrix.h"
2223#include " llvm/CodeGen/LiveRegUnits.h"
2324#include " llvm/CodeGen/MachineDominators.h"
2425#include " llvm/CodeGen/MachineFrameInfo.h"
@@ -35,6 +36,10 @@ static cl::opt<bool> EnableSpillSGPRToVGPR(
3536 cl::ReallyHidden,
3637 cl::init(true ));
3738
39+ static cl::opt<unsigned > SGPRHazardAvoidanceStrategy (
40+ " amdgpu-sgpr-hazard-regalloc" , cl::init(0 ), cl::ReallyHidden,
41+ cl::desc(" Register allocation strategy to reduce SGPR read hazards" ));
42+
3843std::array<std::vector<int16_t >, 32 > SIRegisterInfo::RegSplitParts;
3944std::array<std::array<uint16_t , 32 >, 9 > SIRegisterInfo::SubRegFromChannelTable;
4045
@@ -3904,3 +3909,166 @@ SIRegisterInfo::getVRegFlagsOfReg(Register Reg,
39043909 RegFlags.push_back (" WWM_REG" );
39053910 return RegFlags;
39063911}
3912+
3913+ unsigned SIRegisterInfo::getSGPRHazardAvoidanceStrategy (
3914+ const MachineFunction &MF) const {
3915+ if (SGPRHazardAvoidanceStrategy.getNumOccurrences ()) {
3916+ return SGPRHazardAvoidanceStrategy;
3917+ } else {
3918+ return MF.getFunction ().getFnAttributeAsParsedInteger (
3919+ " amdgpu-sgpr-hazard-regalloc" , 0 );
3920+ }
3921+ }
3922+
3923+ bool SIRegisterInfo::getRegAllocationHints (Register VirtReg,
3924+ ArrayRef<MCPhysReg> Order,
3925+ SmallVectorImpl<MCPhysReg> &Hints,
3926+ const MachineFunction &MF,
3927+ const VirtRegMap *VRM,
3928+ const LiveRegMatrix *Matrix) const {
3929+ bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints (
3930+ VirtReg, Order, Hints, MF, VRM, Matrix);
3931+ if (!VRM)
3932+ return BaseImplRetVal;
3933+
3934+ // Only use hinting to reduce SGPR read hazards when required.
3935+ const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
3936+ if (!ST.hasVALUReadSGPRHazard ())
3937+ return BaseImplRetVal;
3938+
3939+ // Only treat SGPRs
3940+ const SIMachineFunctionInfo *FuncInfo = MF.getInfo <SIMachineFunctionInfo>();
3941+ const MachineRegisterInfo *MRI = &MF.getRegInfo ();
3942+ const auto *RC = MRI->getRegClass (VirtReg);
3943+ if (!isSGPRClass (RC))
3944+ return BaseImplRetVal;
3945+
3946+ const unsigned Strategy = getSGPRHazardAvoidanceStrategy (MF);
3947+ if (!Strategy)
3948+ return BaseImplRetVal;
3949+
3950+ SmallSet<MCPhysReg, 4 > CopyHints;
3951+ CopyHints.insert (Hints.begin (), Hints.end ());
3952+
3953+ auto AddHint = [&](MCPhysReg PhysReg) {
3954+ if (CopyHints.contains (PhysReg) || MRI->isReserved (PhysReg))
3955+ return ;
3956+ Hints.push_back (PhysReg);
3957+ };
3958+ auto AddHints = [&](ArrayRef<MCPhysReg> Regs) {
3959+ for (MCPhysReg PhysReg : Regs)
3960+ AddHint (PhysReg);
3961+ };
3962+
3963+ // V1: simply reverse allocation order, mean 23% reduction in hazards
3964+ if (Strategy == 1 ) {
3965+ if (FuncInfo->checkFlag (VirtReg, AMDGPU::VirtRegFlag::SGPR_HAZARD_REG)) {
3966+ for (MCPhysReg PhysReg : reverse (Order))
3967+ AddHint (PhysReg);
3968+ } else {
3969+ for (MCPhysReg PhysReg : Order)
3970+ AddHint (PhysReg);
3971+ }
3972+ return true ;
3973+ }
3974+
3975+ // Build set of current hazard pairs from live matrix
3976+ auto *LiveUnions = const_cast <LiveRegMatrix *>(Matrix)->getLiveUnions ();
3977+ const TargetRegisterInfo *TRI = ST.getRegisterInfo ();
3978+
3979+ DenseMap<MCPhysReg, unsigned > IntervalCount;
3980+ std::bitset<64 > HazardPairs;
3981+
3982+ for (MCPhysReg PhysReg : Order) {
3983+ SmallSet<const LiveInterval *, 4 > Intervals;
3984+ bool IsHazard = false ;
3985+ for (auto Unit : TRI->regunits (PhysReg)) {
3986+ LiveIntervalUnion &LIU = LiveUnions[Unit];
3987+ for (const LiveInterval *LI : LIU.getMap ()) {
3988+ Intervals.insert (LI);
3989+ if (FuncInfo->checkFlag (LI->reg (),
3990+ AMDGPU::VirtRegFlag::SGPR_HAZARD_REG)) {
3991+ IsHazard = true ;
3992+ // Break here as we only care about interval count for non-hazard regs
3993+ break ;
3994+ }
3995+ }
3996+ if (IsHazard)
3997+ break ;
3998+ }
3999+ if (IsHazard) {
4000+ unsigned PairN = TRI->getEncodingValue (PhysReg) >> 1 ;
4001+ if (PairN <= 63 )
4002+ HazardPairs.set (PairN);
4003+ }
4004+ IntervalCount[PhysReg] = Intervals.size ();
4005+ }
4006+
4007+ // V2: weight the entire order based on hazard free usage, mean 30% reduction
4008+ // in hazards
4009+ if (Strategy == 2 ) {
4010+ bool VRegIsHazard =
4011+ FuncInfo->checkFlag (VirtReg, AMDGPU::VirtRegFlag::SGPR_HAZARD_REG);
4012+ SmallVector<MCPhysReg> NewOrder (Order);
4013+ std::sort (NewOrder.begin (), NewOrder.end (), [&](MCPhysReg A, MCPhysReg B) {
4014+ return VRegIsHazard ? IntervalCount[A] < IntervalCount[B]
4015+ : IntervalCount[B] < IntervalCount[A];
4016+ });
4017+ AddHints (NewOrder);
4018+ return true ;
4019+ }
4020+
4021+ // V3: complex partitioning, mean 35% reduction in hazards
4022+ assert (Strategy == 3 );
4023+
4024+ // Partition the allocation order based on hazards
4025+ SmallVector<MCPhysReg> Unallocated, UnallocatedWithHazard;
4026+ SmallVector<MCPhysReg> Allocated, AllocatedWithHazard;
4027+
4028+ for (MCPhysReg PhysReg : Order) {
4029+ Register VReg = Matrix->getOneVReg (PhysReg);
4030+ bool HasHazard = false ;
4031+ // XXX: can remove regunit scan for just SGPR32/SGPR64
4032+ for (auto Unit : TRI->regunits (PhysReg)) {
4033+ unsigned PairN = TRI->getEncodingValue (Unit) >> 1 ;
4034+ if (PairN <= 63 && HazardPairs[PairN]) {
4035+ HasHazard = true ;
4036+ break ;
4037+ }
4038+ }
4039+ if (VReg == MCRegister::NoRegister) {
4040+ if (HasHazard)
4041+ UnallocatedWithHazard.push_back (PhysReg);
4042+ else
4043+ Unallocated.push_back (PhysReg);
4044+ } else {
4045+ if (HasHazard)
4046+ AllocatedWithHazard.push_back (PhysReg);
4047+ else
4048+ Allocated.push_back (PhysReg);
4049+ }
4050+ }
4051+
4052+ if (FuncInfo->checkFlag (VirtReg, AMDGPU::VirtRegFlag::SGPR_HAZARD_REG)) {
4053+ // Reorder allocations based on usage, so least used will be reused first.
4054+ // This means least used regs are touched by hazards first.
4055+ std::sort (Allocated.begin (), Allocated.end (),
4056+ [&](MCPhysReg A, MCPhysReg B) {
4057+ return IntervalCount[A] < IntervalCount[B];
4058+ });
4059+ // Reverse order of allocations to try to keep hazards away - yes it helps.
4060+ std::reverse (Unallocated.begin (), Unallocated.end ());
4061+
4062+ AddHints (AllocatedWithHazard);
4063+ AddHints (UnallocatedWithHazard);
4064+ AddHints (Unallocated);
4065+ AddHints (Allocated);
4066+ } else {
4067+ AddHints (Allocated);
4068+ AddHints (Unallocated);
4069+ AddHints (UnallocatedWithHazard);
4070+ AddHints (AllocatedWithHazard);
4071+ }
4072+
4073+ return true ;
4074+ }
0 commit comments