@@ -108,6 +108,12 @@ class VGPRRegisterRegAlloc : public RegisterRegAllocBase<VGPRRegisterRegAlloc> {
108108 : RegisterRegAllocBase(N, D, C) {}
109109};
110110
111+ class WWMRegisterRegAlloc : public RegisterRegAllocBase <WWMRegisterRegAlloc> {
112+ public:
113+ WWMRegisterRegAlloc (const char *N, const char *D, FunctionPassCtor C)
114+ : RegisterRegAllocBase(N, D, C) {}
115+ };
116+
111117static bool onlyAllocateSGPRs (const TargetRegisterInfo &TRI,
112118 const MachineRegisterInfo &MRI,
113119 const Register Reg) {
@@ -122,13 +128,24 @@ static bool onlyAllocateVGPRs(const TargetRegisterInfo &TRI,
122128 return !static_cast <const SIRegisterInfo &>(TRI).isSGPRClass (RC);
123129}
124130
125- // / -{sgpr|vgpr}-regalloc=... command line option.
131+ static bool onlyAllocateWWMRegs (const TargetRegisterInfo &TRI,
132+ const MachineRegisterInfo &MRI,
133+ const Register Reg) {
134+ const SIMachineFunctionInfo *MFI =
135+ MRI.getMF ().getInfo <SIMachineFunctionInfo>();
136+ const TargetRegisterClass *RC = MRI.getRegClass (Reg);
137+ return !static_cast <const SIRegisterInfo &>(TRI).isSGPRClass (RC) &&
138+ MFI->checkFlag (Reg, AMDGPU::VirtRegFlag::WWM_REG);
139+ }
140+
141+ // / -{sgpr|wwm|vgpr}-regalloc=... command line option.
126142static FunctionPass *useDefaultRegisterAllocator () { return nullptr ; }
127143
128144// / A dummy default pass factory indicates whether the register allocator is
129145// / overridden on the command line.
130146static llvm::once_flag InitializeDefaultSGPRRegisterAllocatorFlag;
131147static llvm::once_flag InitializeDefaultVGPRRegisterAllocatorFlag;
148+ static llvm::once_flag InitializeDefaultWWMRegisterAllocatorFlag;
132149
133150static SGPRRegisterRegAlloc
134151defaultSGPRRegAlloc (" default" ,
@@ -145,6 +162,11 @@ static cl::opt<VGPRRegisterRegAlloc::FunctionPassCtor, false,
145162VGPRRegAlloc (" vgpr-regalloc" , cl::Hidden, cl::init(&useDefaultRegisterAllocator),
146163 cl::desc (" Register allocator to use for VGPRs" ));
147164
165+ static cl::opt<WWMRegisterRegAlloc::FunctionPassCtor, false ,
166+ RegisterPassParser<WWMRegisterRegAlloc>>
167+ WWMRegAlloc (" wwm-regalloc" , cl::Hidden,
168+ cl::init (&useDefaultRegisterAllocator),
169+ cl::desc(" Register allocator to use for WWM registers" ));
148170
149171static void initializeDefaultSGPRRegisterAllocatorOnce () {
150172 RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault ();
@@ -164,6 +186,15 @@ static void initializeDefaultVGPRRegisterAllocatorOnce() {
164186 }
165187}
166188
189+ static void initializeDefaultWWMRegisterAllocatorOnce () {
190+ RegisterRegAlloc::FunctionPassCtor Ctor = WWMRegisterRegAlloc::getDefault ();
191+
192+ if (!Ctor) {
193+ Ctor = WWMRegAlloc;
194+ WWMRegisterRegAlloc::setDefault (WWMRegAlloc);
195+ }
196+ }
197+
167198static FunctionPass *createBasicSGPRRegisterAllocator () {
168199 return createBasicRegisterAllocator (onlyAllocateSGPRs);
169200}
@@ -188,6 +219,18 @@ static FunctionPass *createFastVGPRRegisterAllocator() {
188219 return createFastRegisterAllocator (onlyAllocateVGPRs, true );
189220}
190221
222+ static FunctionPass *createBasicWWMRegisterAllocator () {
223+ return createBasicRegisterAllocator (onlyAllocateWWMRegs);
224+ }
225+
226+ static FunctionPass *createGreedyWWMRegisterAllocator () {
227+ return createGreedyRegisterAllocator (onlyAllocateWWMRegs);
228+ }
229+
230+ static FunctionPass *createFastWWMRegisterAllocator () {
231+ return createFastRegisterAllocator (onlyAllocateWWMRegs, false );
232+ }
233+
191234static SGPRRegisterRegAlloc basicRegAllocSGPR (
192235 " basic" , " basic register allocator" , createBasicSGPRRegisterAllocator);
193236static SGPRRegisterRegAlloc greedyRegAllocSGPR (
@@ -204,6 +247,15 @@ static VGPRRegisterRegAlloc greedyRegAllocVGPR(
204247
205248static VGPRRegisterRegAlloc fastRegAllocVGPR (
206249 " fast" , " fast register allocator" , createFastVGPRRegisterAllocator);
250+
251+ static WWMRegisterRegAlloc basicRegAllocWWMReg (" basic" ,
252+ " basic register allocator" ,
253+ createBasicWWMRegisterAllocator);
254+ static WWMRegisterRegAlloc
255+ greedyRegAllocWWMReg (" greedy" , " greedy register allocator" ,
256+ createGreedyWWMRegisterAllocator);
257+ static WWMRegisterRegAlloc fastRegAllocWWMReg (" fast" , " fast register allocator" ,
258+ createFastWWMRegisterAllocator);
207259} // anonymous namespace
208260
209261static cl::opt<bool >
@@ -440,6 +492,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
440492 initializeAMDGPURemoveIncompatibleFunctionsPass (*PR);
441493 initializeAMDGPULowerModuleLDSLegacyPass (*PR);
442494 initializeAMDGPULowerBufferFatPointersPass (*PR);
495+ initializeAMDGPUReserveWWMRegsPass (*PR);
443496 initializeAMDGPURewriteOutArgumentsPass (*PR);
444497 initializeAMDGPURewriteUndefForPHILegacyPass (*PR);
445498 initializeAMDGPUUnifyMetadataPass (*PR);
@@ -1021,6 +1074,7 @@ class GCNPassConfig final : public AMDGPUPassConfig {
10211074
10221075 FunctionPass *createSGPRAllocPass (bool Optimized);
10231076 FunctionPass *createVGPRAllocPass (bool Optimized);
1077+ FunctionPass *createWWMRegAllocPass (bool Optimized);
10241078 FunctionPass *createRegAllocPass (bool Optimized) override ;
10251079
10261080 bool addRegAssignAndRewriteFast () override ;
@@ -1417,7 +1471,6 @@ void GCNPassConfig::addOptimizedRegAlloc() {
14171471}
14181472
14191473bool GCNPassConfig::addPreRewrite () {
1420- addPass (&SILowerWWMCopiesID);
14211474 if (EnableRegReassign)
14221475 addPass (&GCNNSAReassignID);
14231476 return true ;
@@ -1453,12 +1506,28 @@ FunctionPass *GCNPassConfig::createVGPRAllocPass(bool Optimized) {
14531506 return createFastVGPRRegisterAllocator ();
14541507}
14551508
1509+ FunctionPass *GCNPassConfig::createWWMRegAllocPass (bool Optimized) {
1510+ // Initialize the global default.
1511+ llvm::call_once (InitializeDefaultWWMRegisterAllocatorFlag,
1512+ initializeDefaultWWMRegisterAllocatorOnce);
1513+
1514+ RegisterRegAlloc::FunctionPassCtor Ctor = WWMRegisterRegAlloc::getDefault ();
1515+ if (Ctor != useDefaultRegisterAllocator)
1516+ return Ctor ();
1517+
1518+ if (Optimized)
1519+ return createGreedyWWMRegisterAllocator ();
1520+
1521+ return createFastWWMRegisterAllocator ();
1522+ }
1523+
14561524FunctionPass *GCNPassConfig::createRegAllocPass (bool Optimized) {
14571525 llvm_unreachable (" should not be used" );
14581526}
14591527
14601528static const char RegAllocOptNotSupportedMessage[] =
1461- " -regalloc not supported with amdgcn. Use -sgpr-regalloc and -vgpr-regalloc" ;
1529+ " -regalloc not supported with amdgcn. Use -sgpr-regalloc, -wwm-regalloc, "
1530+ " and -vgpr-regalloc" ;
14621531
14631532bool GCNPassConfig::addRegAssignAndRewriteFast () {
14641533 if (!usingDefaultRegAlloc ())
@@ -1470,11 +1539,19 @@ bool GCNPassConfig::addRegAssignAndRewriteFast() {
14701539
14711540 // Equivalent of PEI for SGPRs.
14721541 addPass (&SILowerSGPRSpillsLegacyID);
1542+
1543+ // To Allocate wwm registers used in whole quad mode operations (for shaders).
14731544 addPass (&SIPreAllocateWWMRegsID);
14741545
1475- addPass (createVGPRAllocPass (false ));
1546+ // For allocating other wwm register operands.
1547+ addPass (createWWMRegAllocPass (false ));
14761548
14771549 addPass (&SILowerWWMCopiesID);
1550+ addPass (&AMDGPUReserveWWMRegsID);
1551+
1552+ // For allocating regular VGPRs.
1553+ addPass (createVGPRAllocPass (false ));
1554+
14781555 return true ;
14791556}
14801557
@@ -1494,8 +1571,17 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
14941571
14951572 // Equivalent of PEI for SGPRs.
14961573 addPass (&SILowerSGPRSpillsLegacyID);
1574+
1575+ // To Allocate wwm registers used in whole quad mode operations (for shaders).
14971576 addPass (&SIPreAllocateWWMRegsID);
14981577
1578+ // For allocating other whole wave mode registers.
1579+ addPass (createWWMRegAllocPass (true ));
1580+ addPass (&SILowerWWMCopiesID);
1581+ addPass (createVirtRegRewriter (false ));
1582+ addPass (&AMDGPUReserveWWMRegsID);
1583+
1584+ // For allocating regular VGPRs.
14991585 addPass (createVGPRAllocPass (true ));
15001586
15011587 addPreRewrite ();
0 commit comments