|
78 | 78 | #include "llvm/CodeGen/MachineLICM.h" |
79 | 79 | #include "llvm/CodeGen/MachineScheduler.h" |
80 | 80 | #include "llvm/CodeGen/Passes.h" |
| 81 | +#include "llvm/CodeGen/RegAllocFast.h" |
81 | 82 | #include "llvm/CodeGen/RegAllocRegistry.h" |
82 | 83 | #include "llvm/CodeGen/TargetPassConfig.h" |
83 | 84 | #include "llvm/IR/IntrinsicsAMDGPU.h" |
@@ -189,6 +190,24 @@ static cl::opt<WWMRegisterRegAlloc::FunctionPassCtor, false, |
189 | 190 | cl::init(&useDefaultRegisterAllocator), |
190 | 191 | cl::desc("Register allocator to use for WWM registers")); |
191 | 192 |
|
| 193 | +static cl::opt<RegAllocType, false, RegAllocTypeParser> SGPRRegAllocTypeNPM( |
| 194 | + "sgpr-regalloc-npm", cl::Hidden, |
| 195 | + cl::desc("Register allocator to use for SGPRs in new pass " |
| 196 | + "manager"), |
| 197 | + cl::init(RegAllocType::Default)); |
| 198 | + |
| 199 | +static cl::opt<RegAllocType, false, RegAllocTypeParser> VGPRRegAllocTypeNPM( |
| 200 | + "vgpr-regalloc-npm", cl::Hidden, |
| 201 | + cl::desc("Register allocator to use for VGPRs in new pass " |
| 202 | + "manager"), |
| 203 | + cl::init(RegAllocType::Default)); |
| 204 | + |
| 205 | +static cl::opt<RegAllocType, false, RegAllocTypeParser> WWMRegAllocTypeNPM( |
| 206 | + "wwm-regalloc-npm", cl::Hidden, |
| 207 | + cl::desc("Register allocator to use for WWM registers in " |
| 208 | + "new pass manager"), |
| 209 | + cl::init(RegAllocType::Default)); |
| 210 | + |
192 | 211 | static void initializeDefaultSGPRRegisterAllocatorOnce() { |
193 | 212 | RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault(); |
194 | 213 |
|
@@ -2141,6 +2160,214 @@ void AMDGPUCodeGenPassBuilder::addMachineSSAOptimization( |
2141 | 2160 | addPass(SIShrinkInstructionsPass()); |
2142 | 2161 | } |
2143 | 2162 |
|
| 2163 | +static const char NPMRegAllocOptNotSupportedMessage[] = |
| 2164 | + "-regalloc-npm not supported with amdgcn. Use -sgpr-regalloc-npm, " |
| 2165 | + "-wwm-regalloc-npm, " |
| 2166 | + "and -vgpr-regalloc-npm"; |
| 2167 | + |
| 2168 | +// void AMDGPUCodeGenPassBuilder::addSGPRRegAlloc(AddMachinePass &addPass, |
| 2169 | +// RegAllocType RAType, RegAllocFilterFunc FilterFunc, bool Optimized) const { |
| 2170 | +// RegAllocType RAType = RegAllocTypeNPM; |
| 2171 | +// if (RAType == RegAllocType::Default) { |
| 2172 | +// RAType = Optimized ? RegAllocType::Greedy : RegAllocType::Fast; |
| 2173 | +// } |
| 2174 | + |
| 2175 | +// if (RAType == RegAllocType::Greedy) { |
| 2176 | +// addPass(RAGreedyPass({onlyAllocateSGPRs, "sgpr"})); |
| 2177 | +// return; |
| 2178 | +// } |
| 2179 | + |
| 2180 | +// if (RAType == RegAllocType::Fast) { |
| 2181 | +// addPass(RegAllocFastPass({onlyAllocateSGPRs, "sgpr", false})); |
| 2182 | +// return; |
| 2183 | +// } |
| 2184 | +// report_fatal_error("Unsupported SGPR regalloc type", false); |
| 2185 | +// } |
| 2186 | + |
| 2187 | +// template<typename RegAllocPass> |
| 2188 | +// void AMDGPUCodeGenPassBuilder::addRegAllocOfType(AddMachinePass &addPass, |
| 2189 | +// RegAllocPass::Options Options) { |
| 2190 | +// addPass(RegAllocPass(Options)); |
| 2191 | +// } |
| 2192 | + |
| 2193 | +// this is the final method |
| 2194 | +// template<typename RegAllocPass> |
| 2195 | +// void AMDGPUCodeGenPassBuilder::addRegAllocOfType(AddMachinePass &addPass, |
| 2196 | +// RegAllocPhase Phase) { |
| 2197 | +// #define RA_OPTIONS(FilterFunc, Name, ClearVirtRegs) \ |
| 2198 | +// [&]() { \ |
| 2199 | +// if constexpr (std::is_same_v<RegAllocPass, RegAllocFastPass>) { \ |
| 2200 | +// return RegAllocFastPass::Options{FilterFunc, Name, ClearVirtRegs}; \ |
| 2201 | +// } else { \ |
| 2202 | +// return typename RegAllocPass::Options{FilterFunc, Name}; \ |
| 2203 | +// } \ |
| 2204 | +// }() |
| 2205 | + |
| 2206 | +// typename RegAllocPass::Options Options; |
| 2207 | +// RegAllocType RAType; |
| 2208 | + |
| 2209 | +// switch (Phase) { |
| 2210 | +// case RegAllocPhase::SGPR: |
| 2211 | +// Options = RA_OPTIONS(onlyAllocateSGPRs, "sgpr", false); |
| 2212 | +// RAType = SGPRRegAllocTypeNPM; |
| 2213 | +// break; |
| 2214 | +// case RegAllocPhase::WWM: |
| 2215 | +// Options = RA_OPTIONS(onlyAllocateWWMRegs, "wwm", false); |
| 2216 | +// RAType = WWMRegAllocTypeNPM; |
| 2217 | +// break; |
| 2218 | +// case RegAllocPhase::VGPR: |
| 2219 | +// Options = RA_OPTIONS(onlyAllocateVGPRs, "vgpr", true); |
| 2220 | +// RAType = VGPRRegAllocTypeNPM; |
| 2221 | +// break; |
| 2222 | +// }; |
| 2223 | + |
| 2224 | +// switch(RAType) { |
| 2225 | +// case RegAllocType::Greedy: |
| 2226 | +// addPass(RAGreedyPass(Options)); |
| 2227 | +// return; |
| 2228 | +// case RegAllocType::Fast: |
| 2229 | +// addPass(RegAllocFastPass(Options)); |
| 2230 | +// return; |
| 2231 | +// case RegAllocType::Unset: |
| 2232 | +// addPass(RegAllocPass(Options)); |
| 2233 | +// } |
| 2234 | +// #undef RA_OPTIONS |
| 2235 | +// } |
| 2236 | + |
| 2237 | +// template<typename RegAllocPass> |
| 2238 | +// void AMDGPUCodeGenPassBuilder::addRegAlloc(AddMachinePass &addPass, |
| 2239 | +// RegAllocPhase Phase) { |
| 2240 | +// RegAllocType RAType; |
| 2241 | +// switch(Phase) { |
| 2242 | +// case RegAllocPhase::SGPR: |
| 2243 | +// RAType = SGPRRegAllocTypeNPM; |
| 2244 | +// break; |
| 2245 | +// case RegAllocPhase::WWM: |
| 2246 | +// RAType = WWMRegAllocTypeNPM; |
| 2247 | +// break; |
| 2248 | +// case RegAllocPhase::VGPR: |
| 2249 | +// RAType = VGPRRegAllocTypeNPM; |
| 2250 | +// break; |
| 2251 | +// } |
| 2252 | +// switch (RAType) { |
| 2253 | +// case RegAllocType::Greedy: |
| 2254 | +// addRegAllocOfType(addPass, Phase); |
| 2255 | +// } |
| 2256 | +// addRegAllocOfType<RegAllocPass>(addPass, Phase); |
| 2257 | +// } |
| 2258 | + |
| 2259 | +template <typename RegAllocPassT> |
| 2260 | +typename RegAllocPassT::Options |
| 2261 | +AMDGPUCodeGenPassBuilder::getRAOptionsForPhase(RegAllocPhase Phase) const { |
| 2262 | +#define RA_OPTIONS(FilterFunc, Name, ClearVirtRegs) \ |
| 2263 | + [&]() { \ |
| 2264 | + if constexpr (std::is_same_v<RegAllocPassT, RegAllocFastPass>) { \ |
| 2265 | + return RegAllocFastPass::Options{FilterFunc, Name, ClearVirtRegs}; \ |
| 2266 | + } else { \ |
| 2267 | + return typename RegAllocPassT::Options{FilterFunc, Name}; \ |
| 2268 | + } \ |
| 2269 | + }() |
| 2270 | + |
| 2271 | + switch (Phase) { |
| 2272 | + case RegAllocPhase::SGPR: |
| 2273 | + return RA_OPTIONS(onlyAllocateSGPRs, "sgpr", false); |
| 2274 | + case RegAllocPhase::WWM: |
| 2275 | + return RA_OPTIONS(onlyAllocateWWMRegs, "wwm", false); |
| 2276 | + case RegAllocPhase::VGPR: |
| 2277 | + return RA_OPTIONS(onlyAllocateVGPRs, "vgpr", true); |
| 2278 | + } |
| 2279 | + // static_assert(std::is_same_v<PhaseT, SGPRPhase> || |
| 2280 | + // std::is_same_v<PhaseT, WWMPhase> || |
| 2281 | + // std::is_same_v<PhaseT, VGPRPhase>, |
| 2282 | + // "Unsupported phase type"); |
| 2283 | + |
| 2284 | + // if constexpr(std::is_same_v<PhaseT, SGPRPhase>) { |
| 2285 | + // return RA_OPTIONS(onlyAllocateSGPRs, "sgpr", false); |
| 2286 | + // } else if constexpr(std::is_same_v<PhaseT, WWMPhase>) { |
| 2287 | + // return RA_OPTIONS(onlyAllocateWWMRegs, "wwm", false); |
| 2288 | + // } else if constexpr(std::is_same_v<PhaseT, VGPRPhase>) { |
| 2289 | + // return RA_OPTIONS(onlyAllocateVGPRs, "vgpr", true); |
| 2290 | + // } |
| 2291 | + |
| 2292 | +#undef RA_OPTIONS |
| 2293 | +} |
| 2294 | + |
| 2295 | +template <typename RegAllocPassT> |
| 2296 | +void AMDGPUCodeGenPassBuilder::addRegAlloc(AddMachinePass &addPass, |
| 2297 | + RegAllocPhase Phase) const { |
| 2298 | + RegAllocType RAType; |
| 2299 | + // Read the appropriate phase's regalloc type. |
| 2300 | + switch (Phase) { |
| 2301 | + case RegAllocPhase::SGPR: |
| 2302 | + RAType = SGPRRegAllocTypeNPM; |
| 2303 | + break; |
| 2304 | + case RegAllocPhase::WWM: |
| 2305 | + RAType = WWMRegAllocTypeNPM; |
| 2306 | + break; |
| 2307 | + case RegAllocPhase::VGPR: |
| 2308 | + RAType = VGPRRegAllocTypeNPM; |
| 2309 | + break; |
| 2310 | + } |
| 2311 | + |
| 2312 | + // Construct the pass with the appropriate options. |
| 2313 | + switch (RAType) { |
| 2314 | + case RegAllocType::Greedy: |
| 2315 | + addPass(RAGreedyPass(getRAOptionsForPhase<RAGreedyPass>(Phase))); |
| 2316 | + return; |
| 2317 | + case RegAllocType::Fast: |
| 2318 | + addPass(RegAllocFastPass(getRAOptionsForPhase<RegAllocFastPass>(Phase))); |
| 2319 | + return; |
| 2320 | + case RegAllocType::Unset: |
| 2321 | + addPass(RegAllocPassT(getRAOptionsForPhase<RegAllocPassT>(Phase))); |
| 2322 | + return; |
| 2323 | + default: |
| 2324 | + report_fatal_error("Unsupported regalloc type", false); |
| 2325 | + } |
| 2326 | +} |
| 2327 | + |
| 2328 | +Error AMDGPUCodeGenPassBuilder::addRegAssignmentOptimized( |
| 2329 | + AddMachinePass &addPass) const { |
| 2330 | + if (Opt.RegAlloc != RegAllocType::Unset) |
| 2331 | + return make_error<StringError>(NPMRegAllocOptNotSupportedMessage, |
| 2332 | + inconvertibleErrorCode()); |
| 2333 | + |
| 2334 | + addPass(GCNPreRALongBranchRegPass()); |
| 2335 | + |
| 2336 | + addRegAlloc<RAGreedyPass>(addPass, RegAllocPhase::SGPR); |
| 2337 | + |
| 2338 | + // Commit allocated register changes. This is mostly necessary because too |
| 2339 | + // many things rely on the use lists of the physical registers, such as the |
| 2340 | + // verifier. This is only necessary with allocators which use LiveIntervals, |
| 2341 | + // since FastRegAlloc does the replacements itself. |
| 2342 | + // TODO: addPass(VirtRegRewriterPass(false)); |
| 2343 | + |
| 2344 | + // At this point, the sgpr-regalloc has been done and it is good to have the |
| 2345 | + // stack slot coloring to try to optimize the SGPR spill stack indices before |
| 2346 | + // attempting the custom SGPR spill lowering. |
| 2347 | + addPass(StackSlotColoringPass()); |
| 2348 | + |
| 2349 | + // Equivalent of PEI for SGPRs. |
| 2350 | + addPass(SILowerSGPRSpillsPass()); |
| 2351 | + |
| 2352 | + // To Allocate wwm registers used in whole quad mode operations (for shaders). |
| 2353 | + addPass(SIPreAllocateWWMRegsPass()); |
| 2354 | + |
| 2355 | + // For allocating other wwm register operands. |
| 2356 | + addRegAlloc<RAGreedyPass>(addPass, RegAllocPhase::WWM); |
| 2357 | + addPass(SILowerWWMCopiesPass()); |
| 2358 | + // TODO: addPass(VirtRegRewriterPass(false)); |
| 2359 | + // TODO: addPass(AMDGPUReserveWWMRegsPass()); |
| 2360 | + |
| 2361 | + // For allocating per-thread VGPRs. |
| 2362 | + addRegAlloc<RAGreedyPass>(addPass, RegAllocPhase::VGPR); |
| 2363 | + |
| 2364 | + // TODO: addPreRewrite(); |
| 2365 | + addPass(VirtRegRewriterPass(false)); |
| 2366 | + |
| 2367 | + // TODO: addPass(AMDGPUMarkLastScratchLoadPass()); |
| 2368 | + return Error::success(); |
| 2369 | +} |
| 2370 | + |
2144 | 2371 | void AMDGPUCodeGenPassBuilder::addPostRegAlloc(AddMachinePass &addPass) const { |
2145 | 2372 | addPass(SIFixVGPRCopiesPass()); |
2146 | 2373 | if (TM.getOptLevel() > CodeGenOptLevel::None) |
|
0 commit comments