Skip to content

Commit 62f61c7

Browse files
author
Salinas, David
authored
[AMDGPU] Rewrite GFX12 SGPR hazard handling to dedicated pass (llvm#118750) (llvm#897)
2 parents 7cd1261 + 590d1d9 commit 62f61c7

File tree

98 files changed

+2951
-2095
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

98 files changed

+2951
-2095
lines changed

llvm/docs/AMDGPUUsage.rst

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1710,6 +1710,20 @@ The AMDGPU backend supports the following LLVM IR attributes.
17101710
as hidden. Hidden arguments are managed by the compiler and are not part of
17111711
the explicit arguments supplied by the user.
17121712

1713+
"amdgpu-sgpr-hazard-wait" Disabled SGPR hazard wait insertion if set to 0.
1714+
Exists for testing performance impact of SGPR hazard waits only.
1715+
1716+
"amdgpu-sgpr-hazard-boundary-cull" Enable insertion of SGPR hazard cull sequences at function call boundaries.
1717+
Cull sequence reduces future hazard waits, but has a performance cost.
1718+
1719+
"amdgpu-sgpr-hazard-mem-wait-cull" Enable insertion of SGPR hazard cull sequences before memory waits.
1720+
Cull sequence reduces future hazard waits, but has a performance cost.
1721+
Attempt to amortize cost by overlapping with memory accesses.
1722+
1723+
"amdgpu-sgpr-hazard-mem-wait-cull-threshold"
1724+
Sets the number of active SGPR hazards that must be present before
1725+
inserting a cull sequence at a memory wait.
1726+
17131727
======================================= ==========================================================
17141728

17151729
Calling Conventions

llvm/lib/Target/AMDGPU/AMDGPU.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,9 @@ void initializeAMDGPUSetWavePriorityPass(PassRegistry &);
467467
void initializeGCNRewritePartialRegUsesPass(llvm::PassRegistry &);
468468
extern char &GCNRewritePartialRegUsesID;
469469

470+
void initializeAMDGPUWaitSGPRHazardsLegacyPass(PassRegistry &);
471+
extern char &AMDGPUWaitSGPRHazardsLegacyID;
472+
470473
namespace AMDGPU {
471474
enum TargetIndex {
472475
TI_CONSTDATA_START,

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "AMDGPUTargetObjectFile.h"
3030
#include "AMDGPUTargetTransformInfo.h"
3131
#include "AMDGPUUnifyDivergentExitNodes.h"
32+
#include "AMDGPUWaitSGPRHazards.h"
3233
#include "GCNDPPCombine.h"
3334
#include "GCNIterativeScheduler.h"
3435
#include "GCNSchedStrategy.h"
@@ -550,6 +551,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
550551
initializeGCNRewritePartialRegUsesPass(*PR);
551552
initializeGCNRegPressurePrinterPass(*PR);
552553
initializeAMDGPUPreloadKernArgPrologLegacyPass(*PR);
554+
initializeAMDGPUWaitSGPRHazardsLegacyPass(*PR);
553555
}
554556

555557
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -1715,6 +1717,8 @@ void GCNPassConfig::addPreEmitPass() {
17151717
// cases.
17161718
addPass(&PostRAHazardRecognizerID);
17171719

1720+
addPass(&AMDGPUWaitSGPRHazardsLegacyID);
1721+
17181722
if (isPassEnabled(EnableInsertDelayAlu, CodeGenOptLevel::Less))
17191723
addPass(&AMDGPUInsertDelayAluID);
17201724

0 commit comments

Comments
 (0)