Skip to content

Commit 4f727bc

Browse files
committed
[AMDGPU][NPM] Support -{phase}-regalloc-npm options
1 parent 1fe702f commit 4f727bc

File tree

2 files changed

+262
-0
lines changed

2 files changed

+262
-0
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@
7878
#include "llvm/CodeGen/MachineLICM.h"
7979
#include "llvm/CodeGen/MachineScheduler.h"
8080
#include "llvm/CodeGen/Passes.h"
81+
#include "llvm/CodeGen/RegAllocFast.h"
8182
#include "llvm/CodeGen/RegAllocRegistry.h"
8283
#include "llvm/CodeGen/TargetPassConfig.h"
8384
#include "llvm/IR/IntrinsicsAMDGPU.h"
@@ -189,6 +190,24 @@ static cl::opt<WWMRegisterRegAlloc::FunctionPassCtor, false,
189190
cl::init(&useDefaultRegisterAllocator),
190191
cl::desc("Register allocator to use for WWM registers"));
191192

193+
static cl::opt<RegAllocType, false, RegAllocTypeParser> SGPRRegAllocTypeNPM(
194+
"sgpr-regalloc-npm", cl::Hidden,
195+
cl::desc("Register allocator to use for SGPRs in new pass "
196+
"manager"),
197+
cl::init(RegAllocType::Default));
198+
199+
static cl::opt<RegAllocType, false, RegAllocTypeParser> VGPRRegAllocTypeNPM(
200+
"vgpr-regalloc-npm", cl::Hidden,
201+
cl::desc("Register allocator to use for VGPRs in new pass "
202+
"manager"),
203+
cl::init(RegAllocType::Default));
204+
205+
static cl::opt<RegAllocType, false, RegAllocTypeParser> WWMRegAllocTypeNPM(
206+
"wwm-regalloc-npm", cl::Hidden,
207+
cl::desc("Register allocator to use for WWM registers in "
208+
"new pass manager"),
209+
cl::init(RegAllocType::Default));
210+
192211
static void initializeDefaultSGPRRegisterAllocatorOnce() {
193212
RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault();
194213

@@ -2141,6 +2160,214 @@ void AMDGPUCodeGenPassBuilder::addMachineSSAOptimization(
21412160
addPass(SIShrinkInstructionsPass());
21422161
}
21432162

2163+
static const char NPMRegAllocOptNotSupportedMessage[] =
2164+
"-regalloc-npm not supported with amdgcn. Use -sgpr-regalloc-npm, "
2165+
"-wwm-regalloc-npm, "
2166+
"and -vgpr-regalloc-npm";
2167+
2168+
// void AMDGPUCodeGenPassBuilder::addSGPRRegAlloc(AddMachinePass &addPass,
2169+
// RegAllocType RAType, RegAllocFilterFunc FilterFunc, bool Optimized) const {
2170+
// RegAllocType RAType = RegAllocTypeNPM;
2171+
// if (RAType == RegAllocType::Default) {
2172+
// RAType = Optimized ? RegAllocType::Greedy : RegAllocType::Fast;
2173+
// }
2174+
2175+
// if (RAType == RegAllocType::Greedy) {
2176+
// addPass(RAGreedyPass({onlyAllocateSGPRs, "sgpr"}));
2177+
// return;
2178+
// }
2179+
2180+
// if (RAType == RegAllocType::Fast) {
2181+
// addPass(RegAllocFastPass({onlyAllocateSGPRs, "sgpr", false}));
2182+
// return;
2183+
// }
2184+
// report_fatal_error("Unsupported SGPR regalloc type", false);
2185+
// }
2186+
2187+
// template<typename RegAllocPass>
2188+
// void AMDGPUCodeGenPassBuilder::addRegAllocOfType(AddMachinePass &addPass,
2189+
// RegAllocPass::Options Options) {
2190+
// addPass(RegAllocPass(Options));
2191+
// }
2192+
2193+
// this is the final method
2194+
// template<typename RegAllocPass>
2195+
// void AMDGPUCodeGenPassBuilder::addRegAllocOfType(AddMachinePass &addPass,
2196+
// RegAllocPhase Phase) {
2197+
// #define RA_OPTIONS(FilterFunc, Name, ClearVirtRegs) \
2198+
// [&]() { \
2199+
// if constexpr (std::is_same_v<RegAllocPass, RegAllocFastPass>) { \
2200+
// return RegAllocFastPass::Options{FilterFunc, Name, ClearVirtRegs}; \
2201+
// } else { \
2202+
// return typename RegAllocPass::Options{FilterFunc, Name}; \
2203+
// } \
2204+
// }()
2205+
2206+
// typename RegAllocPass::Options Options;
2207+
// RegAllocType RAType;
2208+
2209+
// switch (Phase) {
2210+
// case RegAllocPhase::SGPR:
2211+
// Options = RA_OPTIONS(onlyAllocateSGPRs, "sgpr", false);
2212+
// RAType = SGPRRegAllocTypeNPM;
2213+
// break;
2214+
// case RegAllocPhase::WWM:
2215+
// Options = RA_OPTIONS(onlyAllocateWWMRegs, "wwm", false);
2216+
// RAType = WWMRegAllocTypeNPM;
2217+
// break;
2218+
// case RegAllocPhase::VGPR:
2219+
// Options = RA_OPTIONS(onlyAllocateVGPRs, "vgpr", true);
2220+
// RAType = VGPRRegAllocTypeNPM;
2221+
// break;
2222+
// };
2223+
2224+
// switch(RAType) {
2225+
// case RegAllocType::Greedy:
2226+
// addPass(RAGreedyPass(Options));
2227+
// return;
2228+
// case RegAllocType::Fast:
2229+
// addPass(RegAllocFastPass(Options));
2230+
// return;
2231+
// case RegAllocType::Unset:
2232+
// addPass(RegAllocPass(Options));
2233+
// }
2234+
// #undef RA_OPTIONS
2235+
// }
2236+
2237+
// template<typename RegAllocPass>
2238+
// void AMDGPUCodeGenPassBuilder::addRegAlloc(AddMachinePass &addPass,
2239+
// RegAllocPhase Phase) {
2240+
// RegAllocType RAType;
2241+
// switch(Phase) {
2242+
// case RegAllocPhase::SGPR:
2243+
// RAType = SGPRRegAllocTypeNPM;
2244+
// break;
2245+
// case RegAllocPhase::WWM:
2246+
// RAType = WWMRegAllocTypeNPM;
2247+
// break;
2248+
// case RegAllocPhase::VGPR:
2249+
// RAType = VGPRRegAllocTypeNPM;
2250+
// break;
2251+
// }
2252+
// switch (RAType) {
2253+
// case RegAllocType::Greedy:
2254+
// addRegAllocOfType(addPass, Phase);
2255+
// }
2256+
// addRegAllocOfType<RegAllocPass>(addPass, Phase);
2257+
// }
2258+
2259+
template <typename RegAllocPassT>
2260+
typename RegAllocPassT::Options
2261+
AMDGPUCodeGenPassBuilder::getRAOptionsForPhase(RegAllocPhase Phase) const {
2262+
#define RA_OPTIONS(FilterFunc, Name, ClearVirtRegs) \
2263+
[&]() { \
2264+
if constexpr (std::is_same_v<RegAllocPassT, RegAllocFastPass>) { \
2265+
return RegAllocFastPass::Options{FilterFunc, Name, ClearVirtRegs}; \
2266+
} else { \
2267+
return typename RegAllocPassT::Options{FilterFunc, Name}; \
2268+
} \
2269+
}()
2270+
2271+
switch (Phase) {
2272+
case RegAllocPhase::SGPR:
2273+
return RA_OPTIONS(onlyAllocateSGPRs, "sgpr", false);
2274+
case RegAllocPhase::WWM:
2275+
return RA_OPTIONS(onlyAllocateWWMRegs, "wwm", false);
2276+
case RegAllocPhase::VGPR:
2277+
return RA_OPTIONS(onlyAllocateVGPRs, "vgpr", true);
2278+
}
2279+
// static_assert(std::is_same_v<PhaseT, SGPRPhase> ||
2280+
// std::is_same_v<PhaseT, WWMPhase> ||
2281+
// std::is_same_v<PhaseT, VGPRPhase>,
2282+
// "Unsupported phase type");
2283+
2284+
// if constexpr(std::is_same_v<PhaseT, SGPRPhase>) {
2285+
// return RA_OPTIONS(onlyAllocateSGPRs, "sgpr", false);
2286+
// } else if constexpr(std::is_same_v<PhaseT, WWMPhase>) {
2287+
// return RA_OPTIONS(onlyAllocateWWMRegs, "wwm", false);
2288+
// } else if constexpr(std::is_same_v<PhaseT, VGPRPhase>) {
2289+
// return RA_OPTIONS(onlyAllocateVGPRs, "vgpr", true);
2290+
// }
2291+
2292+
#undef RA_OPTIONS
2293+
}
2294+
2295+
template <typename RegAllocPassT>
2296+
void AMDGPUCodeGenPassBuilder::addRegAlloc(AddMachinePass &addPass,
2297+
RegAllocPhase Phase) const {
2298+
RegAllocType RAType;
2299+
// Read the appropriate phase's regalloc type.
2300+
switch (Phase) {
2301+
case RegAllocPhase::SGPR:
2302+
RAType = SGPRRegAllocTypeNPM;
2303+
break;
2304+
case RegAllocPhase::WWM:
2305+
RAType = WWMRegAllocTypeNPM;
2306+
break;
2307+
case RegAllocPhase::VGPR:
2308+
RAType = VGPRRegAllocTypeNPM;
2309+
break;
2310+
}
2311+
2312+
// Construct the pass with the appropriate options.
2313+
switch (RAType) {
2314+
case RegAllocType::Greedy:
2315+
addPass(RAGreedyPass(getRAOptionsForPhase<RAGreedyPass>(Phase)));
2316+
return;
2317+
case RegAllocType::Fast:
2318+
addPass(RegAllocFastPass(getRAOptionsForPhase<RegAllocFastPass>(Phase)));
2319+
return;
2320+
case RegAllocType::Unset:
2321+
addPass(RegAllocPassT(getRAOptionsForPhase<RegAllocPassT>(Phase)));
2322+
return;
2323+
default:
2324+
report_fatal_error("Unsupported regalloc type", false);
2325+
}
2326+
}
2327+
2328+
Error AMDGPUCodeGenPassBuilder::addRegAssignmentOptimized(
2329+
AddMachinePass &addPass) const {
2330+
if (Opt.RegAlloc != RegAllocType::Unset)
2331+
return make_error<StringError>(NPMRegAllocOptNotSupportedMessage,
2332+
inconvertibleErrorCode());
2333+
2334+
addPass(GCNPreRALongBranchRegPass());
2335+
2336+
addRegAlloc<RAGreedyPass>(addPass, RegAllocPhase::SGPR);
2337+
2338+
// Commit allocated register changes. This is mostly necessary because too
2339+
// many things rely on the use lists of the physical registers, such as the
2340+
// verifier. This is only necessary with allocators which use LiveIntervals,
2341+
// since FastRegAlloc does the replacements itself.
2342+
// TODO: addPass(VirtRegRewriterPass(false));
2343+
2344+
// At this point, the sgpr-regalloc has been done and it is good to have the
2345+
// stack slot coloring to try to optimize the SGPR spill stack indices before
2346+
// attempting the custom SGPR spill lowering.
2347+
addPass(StackSlotColoringPass());
2348+
2349+
// Equivalent of PEI for SGPRs.
2350+
addPass(SILowerSGPRSpillsPass());
2351+
2352+
// To Allocate wwm registers used in whole quad mode operations (for shaders).
2353+
addPass(SIPreAllocateWWMRegsPass());
2354+
2355+
// For allocating other wwm register operands.
2356+
addRegAlloc<RAGreedyPass>(addPass, RegAllocPhase::WWM);
2357+
addPass(SILowerWWMCopiesPass());
2358+
// TODO: addPass(VirtRegRewriterPass(false));
2359+
// TODO: addPass(AMDGPUReserveWWMRegsPass());
2360+
2361+
// For allocating per-thread VGPRs.
2362+
addRegAlloc<RAGreedyPass>(addPass, RegAllocPhase::VGPR);
2363+
2364+
// TODO: addPreRewrite();
2365+
addPass(VirtRegRewriterPass(false));
2366+
2367+
// TODO: addPass(AMDGPUMarkLastScratchLoadPass());
2368+
return Error::success();
2369+
}
2370+
21442371
void AMDGPUCodeGenPassBuilder::addPostRegAlloc(AddMachinePass &addPass) const {
21452372
addPass(SIFixVGPRCopiesPass());
21462373
if (TM.getOptLevel() > CodeGenOptLevel::None)

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,11 @@
1616

1717
#include "GCNSubtarget.h"
1818
#include "llvm/CodeGen/CodeGenTargetMachineImpl.h"
19+
#include "llvm/CodeGen/RegAllocCommon.h"
1920
#include "llvm/CodeGen/TargetPassConfig.h"
2021
#include "llvm/MC/MCStreamer.h"
2122
#include "llvm/Passes/CodeGenPassBuilder.h"
23+
#include "llvm/Target/CGPassBuilderOption.h"
2224
#include <optional>
2325
#include <utility>
2426

@@ -179,6 +181,7 @@ class AMDGPUCodeGenPassBuilder
179181
Error addInstSelector(AddMachinePass &) const;
180182
void addPreRewrite(AddMachinePass &) const;
181183
void addMachineSSAOptimization(AddMachinePass &) const;
184+
Error addRegAssignmentOptimized(AddMachinePass &) const;
182185
void addPostRegAlloc(AddMachinePass &) const;
183186
void addPreEmitPass(AddMachinePass &) const;
184187

@@ -189,6 +192,38 @@ class AMDGPUCodeGenPassBuilder
189192
CodeGenOptLevel Level = CodeGenOptLevel::Default) const;
190193
void addEarlyCSEOrGVNPass(AddIRPass &) const;
191194
void addStraightLineScalarOptimizationPasses(AddIRPass &) const;
195+
196+
private:
197+
// /// Dummy structs to represent different phases of register allocation.
198+
// struct SGPRPhase{};
199+
// struct VGPRPhase{};
200+
// struct WWMPhase{};
201+
enum class RegAllocPhase { SGPR, VGPR, WWM };
202+
203+
template <typename RegAllocPassT>
204+
typename RegAllocPassT::Options getRAOptionsForPhase(RegAllocPhase) const;
205+
206+
/// \brief Add register allocation pass to the pass manager.
207+
/// This checks for the regalloc type given through
208+
/// -{phase}-regalloc-npm={type} cl option. If the option is not specified, it
209+
/// uses the preferred regalloc pass type.
210+
///
211+
/// \tparam PreferredRegAllocPassT The fallback reg alloc pass type to use if
212+
/// cl::opt is unspecified.
213+
/// \param Phase The phase of register allocation to add.
214+
template <typename PreferredRegAllocPassT>
215+
void addRegAlloc(AddMachinePass &, RegAllocPhase Phase) const;
216+
217+
// instantiate the template for each phase
218+
/// Add the register allocation pass for given filter func and type
219+
/// (greedy/fast).
220+
/// @param Type If RegAllocType::Default, add according to the optimization
221+
/// level.
222+
// void addRegAllocPass(AddMachinePass &, RegAllocType Type,
223+
// RegAllocFilterFunc FilterFunc) const;
224+
void addSGPRRegAlloc(AddMachinePass &, bool Optimized) const;
225+
void addWWMRegAlloc(AddMachinePass &, bool Optimized) const;
226+
void addVGPRRegAlloc(AddMachinePass &, bool Optimized) const;
192227
};
193228

194229
} // end namespace llvm

0 commit comments

Comments
 (0)