diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 856b5eb359c49..1167465444f12 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -78,6 +78,7 @@ #include "llvm/CodeGen/MachineLICM.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegAllocFast.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/IntrinsicsAMDGPU.h" @@ -189,6 +190,24 @@ static cl::opt SGPRRegAllocTypeNPM( + "sgpr-regalloc-npm", cl::Hidden, + cl::desc("Register allocator to use for SGPRs in new pass " + "manager"), + cl::init(RegAllocType::Default)); + +static cl::opt VGPRRegAllocTypeNPM( + "vgpr-regalloc-npm", cl::Hidden, + cl::desc("Register allocator to use for VGPRs in new pass " + "manager"), + cl::init(RegAllocType::Default)); + +static cl::opt WWMRegAllocTypeNPM( + "wwm-regalloc-npm", cl::Hidden, + cl::desc("Register allocator to use for WWM registers in " + "new pass manager"), + cl::init(RegAllocType::Default)); + static void initializeDefaultSGPRRegisterAllocatorOnce() { RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault(); @@ -2141,6 +2160,113 @@ void AMDGPUCodeGenPassBuilder::addMachineSSAOptimization( addPass(SIShrinkInstructionsPass()); } +static const char NPMRegAllocOptNotSupportedMessage[] = + "-regalloc-npm not supported with amdgcn. Use -sgpr-regalloc-npm, " + "-wwm-regalloc-npm, " + "and -vgpr-regalloc-npm"; + +template +typename RegAllocPassT::Options +AMDGPUCodeGenPassBuilder::getRAOptionsForPhase(RegAllocPhase Phase) const { +#define RA_OPTIONS(FilterFunc, Name, ClearVirtRegs) \ + [&]() { \ + if constexpr (std::is_same_v) { \ + return RegAllocFastPass::Options{FilterFunc, Name, ClearVirtRegs}; \ + } else { \ + return typename RegAllocPassT::Options{FilterFunc, Name}; \ + } \ + }() + + switch (Phase) { + case RegAllocPhase::SGPR: + return RA_OPTIONS(onlyAllocateSGPRs, "sgpr", false); + case RegAllocPhase::WWM: + return RA_OPTIONS(onlyAllocateWWMRegs, "wwm", false); + case RegAllocPhase::VGPR: + return RA_OPTIONS(onlyAllocateVGPRs, "vgpr", true); + } + + llvm_unreachable("invalid phase value"); +#undef RA_OPTIONS +} + +template +void AMDGPUCodeGenPassBuilder::addRegAlloc(AddMachinePass &addPass, + RegAllocPhase Phase) const { + RegAllocType RAType; + // Read the appropriate phase's regalloc type. + switch (Phase) { + case RegAllocPhase::SGPR: + RAType = SGPRRegAllocTypeNPM; + break; + case RegAllocPhase::WWM: + RAType = WWMRegAllocTypeNPM; + break; + case RegAllocPhase::VGPR: + RAType = VGPRRegAllocTypeNPM; + break; + } + + // Construct the pass with the appropriate options. + switch (RAType) { + case RegAllocType::Greedy: + addPass(RAGreedyPass(getRAOptionsForPhase(Phase))); + return; + case RegAllocType::Fast: + addPass(RegAllocFastPass(getRAOptionsForPhase(Phase))); + return; + case RegAllocType::Unset: + case RegAllocType::Default: + addPass(RegAllocPassT(getRAOptionsForPhase(Phase))); + return; + default: + report_fatal_error("Unsupported regalloc type for AMDGPU", false); + } +} + +Error AMDGPUCodeGenPassBuilder::addRegAssignmentOptimized( + AddMachinePass &addPass) const { + if (Opt.RegAlloc != RegAllocType::Unset) + return make_error(NPMRegAllocOptNotSupportedMessage, + inconvertibleErrorCode()); + + addPass(GCNPreRALongBranchRegPass()); + + addRegAlloc(addPass, RegAllocPhase::SGPR); + + // Commit allocated register changes. This is mostly necessary because too + // many things rely on the use lists of the physical registers, such as the + // verifier. This is only necessary with allocators which use LiveIntervals, + // since FastRegAlloc does the replacements itself. + // TODO: addPass(VirtRegRewriterPass(false)); + + // At this point, the sgpr-regalloc has been done and it is good to have the + // stack slot coloring to try to optimize the SGPR spill stack indices before + // attempting the custom SGPR spill lowering. + addPass(StackSlotColoringPass()); + + // Equivalent of PEI for SGPRs. + addPass(SILowerSGPRSpillsPass()); + + // To Allocate wwm registers used in whole quad mode operations (for shaders). + addPass(SIPreAllocateWWMRegsPass()); + + // For allocating other wwm register operands. + addRegAlloc(addPass, RegAllocPhase::WWM); + addPass(SILowerWWMCopiesPass()); + // TODO: addPass(VirtRegRewriterPass(false)); + // TODO: addPass(AMDGPUReserveWWMRegsPass()); + + // For allocating per-thread VGPRs. + addRegAlloc(addPass, RegAllocPhase::VGPR); + + // TODO: addPreRewrite(); + addPass(VirtRegRewriterPass(false)); + + // TODO: addPass(AMDGPUMarkLastScratchLoadPass()); + return Error::success(); +} + void AMDGPUCodeGenPassBuilder::addPostRegAlloc(AddMachinePass &addPass) const { addPass(SIFixVGPRCopiesPass()); if (TM.getOptLevel() > CodeGenOptLevel::None) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h index 3df4115324ac2..053d76e89ce08 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -16,9 +16,11 @@ #include "GCNSubtarget.h" #include "llvm/CodeGen/CodeGenTargetMachineImpl.h" +#include "llvm/CodeGen/RegAllocCommon.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Passes/CodeGenPassBuilder.h" +#include "llvm/Target/CGPassBuilderOption.h" #include #include @@ -179,6 +181,7 @@ class AMDGPUCodeGenPassBuilder Error addInstSelector(AddMachinePass &) const; void addPreRewrite(AddMachinePass &) const; void addMachineSSAOptimization(AddMachinePass &) const; + Error addRegAssignmentOptimized(AddMachinePass &) const; void addPostRegAlloc(AddMachinePass &) const; void addPreEmitPass(AddMachinePass &) const; @@ -189,6 +192,23 @@ class AMDGPUCodeGenPassBuilder CodeGenOptLevel Level = CodeGenOptLevel::Default) const; void addEarlyCSEOrGVNPass(AddIRPass &) const; void addStraightLineScalarOptimizationPasses(AddIRPass &) const; + +private: + enum class RegAllocPhase { SGPR, VGPR, WWM }; + + template + typename RegAllocPassT::Options getRAOptionsForPhase(RegAllocPhase) const; + + /// \brief Add register allocation pass to the pass manager. + /// This checks for the regalloc type given through + /// -{phase}-regalloc-npm={type} cl option. If the option is not specified, it + /// uses the preferred regalloc pass type. + /// + /// \tparam PreferredRegAllocPassT The fallback reg alloc pass type to use if + /// cl::opt is unspecified. + /// \param Phase The phase of register allocation to add. + template + void addRegAlloc(AddMachinePass &, RegAllocPhase Phase) const; }; } // end namespace llvm diff --git a/llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir b/llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir index 07f2d350ffd9c..ca0420ffaea3f 100644 --- a/llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir +++ b/llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir @@ -2,11 +2,17 @@ # RUN: llc -mtriple=amdgcn --passes='regallocfast,regallocfast,regallocfast' --print-pipeline-passes --filetype=null %s | FileCheck %s --check-prefix=PASS # RUN: not llc -mtriple=amdgcn --passes='regallocfast' --print-pipeline-passes --filetype=null %s 2>&1 | FileCheck %s --check-prefix=BAD-FILTER +# RUN: llc -mtriple=amdgcn -enable-new-pm -sgpr-regalloc-npm=greedy -wwm-regalloc-npm=fast -vgpr-regalloc-npm=fast -print-pipeline-passes %s | FileCheck %s --check-prefix=NPM-PASS + + # PASS: regallocfast # PASS: regallocfast # PASS: regallocfast # BAD-FILTER: invalid regallocfast register filter 'bad-filter' +# NPM-PASS: greedy +# NPM-PASS: regallocfast +# NPM-PASS: regallocfast --- name: f ...