@@ -81,6 +81,12 @@ class VGPRRegisterRegAlloc : public RegisterRegAllocBase<VGPRRegisterRegAlloc> {
8181 : RegisterRegAllocBase(N, D, C) {}
8282};
8383
84+ class WWMRegisterRegAlloc : public RegisterRegAllocBase <WWMRegisterRegAlloc> {
85+ public:
86+ WWMRegisterRegAlloc (const char *N, const char *D, FunctionPassCtor C)
87+ : RegisterRegAllocBase(N, D, C) {}
88+ };
89+
8490static bool onlyAllocateSGPRs (const TargetRegisterInfo &TRI,
8591 const MachineRegisterInfo &MRI,
8692 const Register Reg) {
@@ -95,13 +101,24 @@ static bool onlyAllocateVGPRs(const TargetRegisterInfo &TRI,
95101 return !static_cast <const SIRegisterInfo &>(TRI).isSGPRClass (RC);
96102}
97103
98- // / -{sgpr|vgpr}-regalloc=... command line option.
104+ static bool onlyAllocateWWMRegs (const TargetRegisterInfo &TRI,
105+ const MachineRegisterInfo &MRI,
106+ const Register Reg) {
107+ const SIMachineFunctionInfo *MFI =
108+ MRI.getMF ().getInfo <SIMachineFunctionInfo>();
109+ const TargetRegisterClass *RC = MRI.getRegClass (Reg);
110+ return !static_cast <const SIRegisterInfo &>(TRI).isSGPRClass (RC) &&
111+ MFI->checkFlag (Reg, AMDGPU::VirtRegFlag::WWM_REG);
112+ }
113+
114+ // / -{sgpr|wwm|vgpr}-regalloc=... command line option.
99115static FunctionPass *useDefaultRegisterAllocator () { return nullptr ; }
100116
101117// / A dummy default pass factory indicates whether the register allocator is
102118// / overridden on the command line.
103119static llvm::once_flag InitializeDefaultSGPRRegisterAllocatorFlag;
104120static llvm::once_flag InitializeDefaultVGPRRegisterAllocatorFlag;
121+ static llvm::once_flag InitializeDefaultWWMRegisterAllocatorFlag;
105122
106123static SGPRRegisterRegAlloc
107124defaultSGPRRegAlloc (" default" ,
@@ -118,6 +135,11 @@ static cl::opt<VGPRRegisterRegAlloc::FunctionPassCtor, false,
118135VGPRRegAlloc (" vgpr-regalloc" , cl::Hidden, cl::init(&useDefaultRegisterAllocator),
119136 cl::desc (" Register allocator to use for VGPRs" ));
120137
138+ static cl::opt<WWMRegisterRegAlloc::FunctionPassCtor, false ,
139+ RegisterPassParser<WWMRegisterRegAlloc>>
140+ WWMRegAlloc (" wwm-regalloc" , cl::Hidden,
141+ cl::init (&useDefaultRegisterAllocator),
142+ cl::desc(" Register allocator to use for WWM registers" ));
121143
122144static void initializeDefaultSGPRRegisterAllocatorOnce () {
123145 RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault ();
@@ -137,6 +159,15 @@ static void initializeDefaultVGPRRegisterAllocatorOnce() {
137159 }
138160}
139161
162+ static void initializeDefaultWWMRegisterAllocatorOnce () {
163+ RegisterRegAlloc::FunctionPassCtor Ctor = WWMRegisterRegAlloc::getDefault ();
164+
165+ if (!Ctor) {
166+ Ctor = WWMRegAlloc;
167+ WWMRegisterRegAlloc::setDefault (WWMRegAlloc);
168+ }
169+ }
170+
140171static FunctionPass *createBasicSGPRRegisterAllocator () {
141172 return createBasicRegisterAllocator (onlyAllocateSGPRs);
142173}
@@ -161,6 +192,18 @@ static FunctionPass *createFastVGPRRegisterAllocator() {
161192 return createFastRegisterAllocator (onlyAllocateVGPRs, true );
162193}
163194
195+ static FunctionPass *createBasicWWMRegisterAllocator () {
196+ return createBasicRegisterAllocator (onlyAllocateWWMRegs);
197+ }
198+
199+ static FunctionPass *createGreedyWWMRegisterAllocator () {
200+ return createGreedyRegisterAllocator (onlyAllocateWWMRegs);
201+ }
202+
203+ static FunctionPass *createFastWWMRegisterAllocator () {
204+ return createFastRegisterAllocator (onlyAllocateWWMRegs, false );
205+ }
206+
164207static SGPRRegisterRegAlloc basicRegAllocSGPR (
165208 " basic" , " basic register allocator" , createBasicSGPRRegisterAllocator);
166209static SGPRRegisterRegAlloc greedyRegAllocSGPR (
@@ -177,7 +220,16 @@ static VGPRRegisterRegAlloc greedyRegAllocVGPR(
177220
178221static VGPRRegisterRegAlloc fastRegAllocVGPR (
179222 " fast" , " fast register allocator" , createFastVGPRRegisterAllocator);
180- }
223+
224+ static WWMRegisterRegAlloc basicRegAllocWWMReg (" basic" ,
225+ " basic register allocator" ,
226+ createBasicWWMRegisterAllocator);
227+ static WWMRegisterRegAlloc
228+ greedyRegAllocWWMReg (" greedy" , " greedy register allocator" ,
229+ createGreedyWWMRegisterAllocator);
230+ static WWMRegisterRegAlloc fastRegAllocWWMReg (" fast" , " fast register allocator" ,
231+ createFastWWMRegisterAllocator);
232+ } // anonymous namespace
181233
182234static cl::opt<bool >
183235EnableEarlyIfConversion (" amdgpu-early-ifcvt" , cl::Hidden,
@@ -425,6 +477,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
425477 initializeAMDGPURemoveIncompatibleFunctionsPass (*PR);
426478 initializeAMDGPUSwLowerLDSLegacyPass (*PR);
427479 initializeAMDGPULowerModuleLDSLegacyPass (*PR);
480+ initializeAMDGPUReserveWWMRegsPass (*PR);
428481 initializeAMDGPURewriteOutArgumentsPass (*PR);
429482 initializeAMDGPURewriteUndefForPHILegacyPass (*PR);
430483 initializeAMDGPUUnifyMetadataPass (*PR);
@@ -1014,6 +1067,7 @@ class GCNPassConfig final : public AMDGPUPassConfig {
10141067
10151068 FunctionPass *createSGPRAllocPass (bool Optimized);
10161069 FunctionPass *createVGPRAllocPass (bool Optimized);
1070+ FunctionPass *createWWMRegAllocPass (bool Optimized);
10171071 FunctionPass *createRegAllocPass (bool Optimized) override ;
10181072
10191073 bool addRegAssignAndRewriteFast () override ;
@@ -1410,7 +1464,6 @@ void GCNPassConfig::addOptimizedRegAlloc() {
14101464}
14111465
14121466bool GCNPassConfig::addPreRewrite () {
1413- addPass (&SILowerWWMCopiesID);
14141467 if (EnableRegReassign)
14151468 addPass (&GCNNSAReassignID);
14161469 return true ;
@@ -1446,12 +1499,28 @@ FunctionPass *GCNPassConfig::createVGPRAllocPass(bool Optimized) {
14461499 return createFastVGPRRegisterAllocator ();
14471500}
14481501
1502+ FunctionPass *GCNPassConfig::createWWMRegAllocPass (bool Optimized) {
1503+ // Initialize the global default.
1504+ llvm::call_once (InitializeDefaultWWMRegisterAllocatorFlag,
1505+ initializeDefaultWWMRegisterAllocatorOnce);
1506+
1507+ RegisterRegAlloc::FunctionPassCtor Ctor = WWMRegisterRegAlloc::getDefault ();
1508+ if (Ctor != useDefaultRegisterAllocator)
1509+ return Ctor ();
1510+
1511+ if (Optimized)
1512+ return createGreedyWWMRegisterAllocator ();
1513+
1514+ return createFastWWMRegisterAllocator ();
1515+ }
1516+
14491517FunctionPass *GCNPassConfig::createRegAllocPass (bool Optimized) {
14501518 llvm_unreachable (" should not be used" );
14511519}
14521520
14531521static const char RegAllocOptNotSupportedMessage[] =
1454- " -regalloc not supported with amdgcn. Use -sgpr-regalloc and -vgpr-regalloc" ;
1522+ " -regalloc not supported with amdgcn. Use -sgpr-regalloc, -wwm-regalloc, "
1523+ " and -vgpr-regalloc" ;
14551524
14561525bool GCNPassConfig::addRegAssignAndRewriteFast () {
14571526 if (!usingDefaultRegAlloc ())
@@ -1463,11 +1532,19 @@ bool GCNPassConfig::addRegAssignAndRewriteFast() {
14631532
14641533 // Equivalent of PEI for SGPRs.
14651534 addPass (&SILowerSGPRSpillsID);
1535+
1536+ // To Allocate wwm registers used in whole quad mode operations (for shaders).
14661537 addPass (&SIPreAllocateWWMRegsID);
14671538
1468- addPass (createVGPRAllocPass (false ));
1539+ // For allocating other wwm register operands.
1540+ addPass (createWWMRegAllocPass (false ));
14691541
14701542 addPass (&SILowerWWMCopiesID);
1543+ addPass (&AMDGPUReserveWWMRegsID);
1544+
1545+ // For allocating regular VGPRs.
1546+ addPass (createVGPRAllocPass (false ));
1547+
14711548 return true ;
14721549}
14731550
@@ -1487,8 +1564,17 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
14871564
14881565 // Equivalent of PEI for SGPRs.
14891566 addPass (&SILowerSGPRSpillsID);
1567+
1568+ // To Allocate wwm registers used in whole quad mode operations (for shaders).
14901569 addPass (&SIPreAllocateWWMRegsID);
14911570
1571+ // For allocating other whole wave mode registers.
1572+ addPass (createWWMRegAllocPass (true ));
1573+ addPass (&SILowerWWMCopiesID);
1574+ addPass (createVirtRegRewriter (false ));
1575+ addPass (&AMDGPUReserveWWMRegsID);
1576+
1577+ // For allocating regular VGPRs.
14921578 addPass (createVGPRAllocPass (true ));
14931579
14941580 addPreRewrite ();
0 commit comments