Skip to content

Commit c6ccbb7

Browse files
committed
- Fold analysis into getAllocationHints
1 parent 7ad9eee commit c6ccbb7

File tree

7 files changed

+20
-144
lines changed

7 files changed

+20
-144
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -549,9 +549,6 @@ extern char &GCNRewritePartialRegUsesID;
549549
void initializeAMDGPUWaitSGPRHazardsLegacyPass(PassRegistry &);
550550
extern char &AMDGPUWaitSGPRHazardsLegacyID;
551551

552-
void initializeAMDGPUMarkSGPRHazardRegsLegacyPass(PassRegistry &);
553-
extern char &AMDGPUMarkSGPRHazardRegsLegacyID;
554-
555552
namespace AMDGPU {
556553
enum TargetIndex {
557554
TI_CONSTDATA_START,

llvm/lib/Target/AMDGPU/AMDGPUMarkSGPRHazardRegs.cpp

Lines changed: 0 additions & 102 deletions
This file was deleted.

llvm/lib/Target/AMDGPU/AMDGPUMarkSGPRHazardRegs.h

Lines changed: 0 additions & 25 deletions
This file was deleted.

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
#include "AMDGPUIGroupLP.h"
2424
#include "AMDGPUISelDAGToDAG.h"
2525
#include "AMDGPUMacroFusion.h"
26-
#include "AMDGPUMarkSGPRHazardRegs.h"
2726
#include "AMDGPUPerfHintAnalysis.h"
2827
#include "AMDGPUPreloadKernArgProlog.h"
2928
#include "AMDGPURemoveIncompatibleFunctions.h"
@@ -568,7 +567,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
568567
initializeAMDGPUPreloadKernArgPrologLegacyPass(*PR);
569568
initializeAMDGPUWaitSGPRHazardsLegacyPass(*PR);
570569
initializeAMDGPUPreloadKernelArgumentsLegacyPass(*PR);
571-
initializeAMDGPUMarkSGPRHazardRegsLegacyPass(*PR);
572570
}
573571

574572
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -1669,7 +1667,6 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
16691667

16701668
addPass(&GCNPreRALongBranchRegID);
16711669

1672-
addPass(&AMDGPUMarkSGPRHazardRegsLegacyID);
16731670
addPass(createSGPRAllocPass(true));
16741671

16751672
// Commit allocated register changes. This is mostly necessary because too

llvm/lib/Target/AMDGPU/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,6 @@ add_llvm_target(AMDGPUCodeGen
8484
AMDGPUIGroupLP.cpp
8585
AMDGPUMCResourceInfo.cpp
8686
AMDGPUMarkLastScratchLoad.cpp
87-
AMDGPUMarkSGPRHazardRegs.cpp
8887
AMDGPUMIRFormatter.cpp
8988
AMDGPUPerfHintAnalysis.cpp
9089
AMDGPUPostLegalizerCombiner.cpp

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3864,10 +3864,26 @@ bool SIRegisterInfo::getRegAllocationHints(Register VirtReg,
38643864
if (!isSGPRClass(RC))
38653865
return BaseImplRetVal;
38663866

3867+
// Exit without an avoidance strategy
38673868
const unsigned Strategy = getSGPRHazardAvoidanceStrategy(MF);
38683869
if (!Strategy)
38693870
return BaseImplRetVal;
38703871

3872+
// Register has a hazard if it is SGPR used by VALU
3873+
DenseMap<Register, bool> HazardRegs;
3874+
auto HasSGPRHazard = [&MRI, TRI, &HazardRegs](Register Reg) {
3875+
const auto *RC = MRI.getRegClass(Reg);
3876+
if (!RC || !TRI->isSGPRClass(RC))
3877+
return false;
3878+
if (!HazardRegs.contains(Reg)) {
3879+
HazardRegs[Reg] = llvm::any_of(
3880+
MRI.reg_nodbg_operands(Reg), [](const MachineOperand &MO) {
3881+
return MO.isUse() && SIInstrInfo::isVALU(*MO.getParent());
3882+
});
3883+
}
3884+
return HazardRegs[Reg];
3885+
};
3886+
38713887
SmallSet<MCPhysReg, 4> CopyHints;
38723888
CopyHints.insert(Hints.begin(), Hints.end());
38733889

@@ -3883,7 +3899,7 @@ bool SIRegisterInfo::getRegAllocationHints(Register VirtReg,
38833899

38843900
// V1: simply reverse allocation order, mean 23% reduction in hazards
38853901
if (Strategy == 1) {
3886-
if (FuncInfo->checkFlag(VirtReg, AMDGPU::VirtRegFlag::SGPR_HAZARD_REG)) {
3902+
if (HasSGPRHazard(VirtReg)) {
38873903
for (MCPhysReg PhysReg : reverse(Order))
38883904
AddHint(PhysReg);
38893905
} else {
@@ -3906,8 +3922,7 @@ bool SIRegisterInfo::getRegAllocationHints(Register VirtReg,
39063922
LiveIntervalUnion &LIU = LiveUnions[Unit];
39073923
for (const LiveInterval *LI : LIU.getMap()) {
39083924
Intervals.insert(LI);
3909-
if (FuncInfo->checkFlag(LI->reg(),
3910-
AMDGPU::VirtRegFlag::SGPR_HAZARD_REG)) {
3925+
if (HasSGPRHazard(LI->reg())) {
39113926
IsHazard = true;
39123927
// Break here as we only care about interval count for non-hazard regs
39133928
break;
@@ -3927,8 +3942,7 @@ bool SIRegisterInfo::getRegAllocationHints(Register VirtReg,
39273942
// V2: weight the entire order based on hazard free usage, mean 30% reduction
39283943
// in hazards
39293944
if (Strategy == 2) {
3930-
bool VRegIsHazard =
3931-
FuncInfo->checkFlag(VirtReg, AMDGPU::VirtRegFlag::SGPR_HAZARD_REG);
3945+
bool VRegIsHazard = HasSGPRHazard(VirtReg);
39323946
SmallVector<MCPhysReg> NewOrder(Order);
39333947
std::sort(NewOrder.begin(), NewOrder.end(), [&](MCPhysReg A, MCPhysReg B) {
39343948
return VRegIsHazard ? IntervalCount[A] < IntervalCount[B]
@@ -3969,7 +3983,7 @@ bool SIRegisterInfo::getRegAllocationHints(Register VirtReg,
39693983
}
39703984
}
39713985

3972-
if (FuncInfo->checkFlag(VirtReg, AMDGPU::VirtRegFlag::SGPR_HAZARD_REG)) {
3986+
if (HasSGPRHazard(VirtReg)) {
39733987
// Reorder allocations based on usage, so least used will be reused first.
39743988
// This means least used regs are touched by hazards first.
39753989
std::sort(Allocated.begin(), Allocated.end(),

llvm/test/CodeGen/AMDGPU/llc-pipeline.ll

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,6 @@
350350
; GCN-O1-NEXT: SI Whole Quad Mode
351351
; GCN-O1-NEXT: SI optimize exec mask operations pre-RA
352352
; GCN-O1-NEXT: AMDGPU Pre-RA Long Branch Reg
353-
; GCN-O1-NEXT: AMDGPU Mark Hazard SGPRs
354353
; GCN-O1-NEXT: Machine Natural Loop Construction
355354
; GCN-O1-NEXT: Machine Block Frequency Analysis
356355
; GCN-O1-NEXT: Debug Variable Analysis
@@ -661,7 +660,6 @@
661660
; GCN-O1-OPTS-NEXT: SI Whole Quad Mode
662661
; GCN-O1-OPTS-NEXT: SI optimize exec mask operations pre-RA
663662
; GCN-O1-OPTS-NEXT: AMDGPU Pre-RA Long Branch Reg
664-
; GCN-O1-OPTS-NEXT: AMDGPU Mark Hazard SGPRs
665663
; GCN-O1-OPTS-NEXT: Machine Natural Loop Construction
666664
; GCN-O1-OPTS-NEXT: Machine Block Frequency Analysis
667665
; GCN-O1-OPTS-NEXT: Debug Variable Analysis
@@ -978,7 +976,6 @@
978976
; GCN-O2-NEXT: SI optimize exec mask operations pre-RA
979977
; GCN-O2-NEXT: SI Form memory clauses
980978
; GCN-O2-NEXT: AMDGPU Pre-RA Long Branch Reg
981-
; GCN-O2-NEXT: AMDGPU Mark Hazard SGPRs
982979
; GCN-O2-NEXT: Machine Natural Loop Construction
983980
; GCN-O2-NEXT: Machine Block Frequency Analysis
984981
; GCN-O2-NEXT: Debug Variable Analysis
@@ -1308,7 +1305,6 @@
13081305
; GCN-O3-NEXT: SI optimize exec mask operations pre-RA
13091306
; GCN-O3-NEXT: SI Form memory clauses
13101307
; GCN-O3-NEXT: AMDGPU Pre-RA Long Branch Reg
1311-
; GCN-O3-NEXT: AMDGPU Mark Hazard SGPRs
13121308
; GCN-O3-NEXT: Machine Natural Loop Construction
13131309
; GCN-O3-NEXT: Machine Block Frequency Analysis
13141310
; GCN-O3-NEXT: Debug Variable Analysis

0 commit comments

Comments
 (0)