@@ -84,6 +84,12 @@ class VGPRRegisterRegAlloc : public RegisterRegAllocBase<VGPRRegisterRegAlloc> {
8484 : RegisterRegAllocBase(N, D, C) {}
8585};
8686
87+ class WWMRegisterRegAlloc : public RegisterRegAllocBase <WWMRegisterRegAlloc> {
88+ public:
89+ WWMRegisterRegAlloc (const char *N, const char *D, FunctionPassCtor C)
90+ : RegisterRegAllocBase(N, D, C) {}
91+ };
92+
8793static bool onlyAllocateSGPRs (const TargetRegisterInfo &TRI,
8894 const MachineRegisterInfo &MRI,
8995 const Register Reg) {
@@ -98,13 +104,24 @@ static bool onlyAllocateVGPRs(const TargetRegisterInfo &TRI,
98104 return !static_cast <const SIRegisterInfo &>(TRI).isSGPRClass (RC);
99105}
100106
101- // / -{sgpr|vgpr}-regalloc=... command line option.
107+ static bool onlyAllocateWWMRegs (const TargetRegisterInfo &TRI,
108+ const MachineRegisterInfo &MRI,
109+ const Register Reg) {
110+ const SIMachineFunctionInfo *MFI =
111+ MRI.getMF ().getInfo <SIMachineFunctionInfo>();
112+ const TargetRegisterClass *RC = MRI.getRegClass (Reg);
113+ return !static_cast <const SIRegisterInfo &>(TRI).isSGPRClass (RC) &&
114+ MFI->checkFlag (Reg, AMDGPU::VirtRegFlag::WWM_REG);
115+ }
116+
117+ // / -{sgpr|wwm|vgpr}-regalloc=... command line option.
102118static FunctionPass *useDefaultRegisterAllocator () { return nullptr ; }
103119
104120// / A dummy default pass factory indicates whether the register allocator is
105121// / overridden on the command line.
106122static llvm::once_flag InitializeDefaultSGPRRegisterAllocatorFlag;
107123static llvm::once_flag InitializeDefaultVGPRRegisterAllocatorFlag;
124+ static llvm::once_flag InitializeDefaultWWMRegisterAllocatorFlag;
108125
109126static SGPRRegisterRegAlloc
110127defaultSGPRRegAlloc (" default" ,
@@ -121,6 +138,11 @@ static cl::opt<VGPRRegisterRegAlloc::FunctionPassCtor, false,
121138VGPRRegAlloc (" vgpr-regalloc" , cl::Hidden, cl::init(&useDefaultRegisterAllocator),
122139 cl::desc (" Register allocator to use for VGPRs" ));
123140
141+ static cl::opt<WWMRegisterRegAlloc::FunctionPassCtor, false ,
142+ RegisterPassParser<WWMRegisterRegAlloc>>
143+ WWMRegAlloc (" wwm-regalloc" , cl::Hidden,
144+ cl::init (&useDefaultRegisterAllocator),
145+ cl::desc(" Register allocator to use for WWM registers" ));
124146
125147static void initializeDefaultSGPRRegisterAllocatorOnce () {
126148 RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault ();
@@ -140,6 +162,15 @@ static void initializeDefaultVGPRRegisterAllocatorOnce() {
140162 }
141163}
142164
165+ static void initializeDefaultWWMRegisterAllocatorOnce () {
166+ RegisterRegAlloc::FunctionPassCtor Ctor = WWMRegisterRegAlloc::getDefault ();
167+
168+ if (!Ctor) {
169+ Ctor = WWMRegAlloc;
170+ WWMRegisterRegAlloc::setDefault (WWMRegAlloc);
171+ }
172+ }
173+
143174static FunctionPass *createBasicSGPRRegisterAllocator () {
144175 return createBasicRegisterAllocator (onlyAllocateSGPRs);
145176}
@@ -164,6 +195,18 @@ static FunctionPass *createFastVGPRRegisterAllocator() {
164195 return createFastRegisterAllocator (onlyAllocateVGPRs, true );
165196}
166197
198+ static FunctionPass *createBasicWWMRegisterAllocator () {
199+ return createBasicRegisterAllocator (onlyAllocateWWMRegs);
200+ }
201+
202+ static FunctionPass *createGreedyWWMRegisterAllocator () {
203+ return createGreedyRegisterAllocator (onlyAllocateWWMRegs);
204+ }
205+
206+ static FunctionPass *createFastWWMRegisterAllocator () {
207+ return createFastRegisterAllocator (onlyAllocateWWMRegs, false );
208+ }
209+
167210static SGPRRegisterRegAlloc basicRegAllocSGPR (
168211 " basic" , " basic register allocator" , createBasicSGPRRegisterAllocator);
169212static SGPRRegisterRegAlloc greedyRegAllocSGPR (
@@ -180,6 +223,15 @@ static VGPRRegisterRegAlloc greedyRegAllocVGPR(
180223
181224static VGPRRegisterRegAlloc fastRegAllocVGPR (
182225 " fast" , " fast register allocator" , createFastVGPRRegisterAllocator);
226+
227+ static WWMRegisterRegAlloc basicRegAllocWWMReg (" basic" ,
228+ " basic register allocator" ,
229+ createBasicWWMRegisterAllocator);
230+ static WWMRegisterRegAlloc
231+ greedyRegAllocWWMReg (" greedy" , " greedy register allocator" ,
232+ createGreedyWWMRegisterAllocator);
233+ static WWMRegisterRegAlloc fastRegAllocWWMReg (" fast" , " fast register allocator" ,
234+ createFastWWMRegisterAllocator);
183235} // anonymous namespace
184236
185237static cl::opt<bool >
@@ -437,6 +489,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
437489 initializeAMDGPUSwLowerLDSLegacyPass (*PR);
438490 initializeAMDGPULowerModuleLDSLegacyPass (*PR);
439491 initializeAMDGPULowerBufferFatPointersPass (*PR);
492+ initializeAMDGPUReserveWWMRegsPass (*PR);
440493 initializeAMDGPURewriteOutArgumentsPass (*PR);
441494 initializeAMDGPURewriteUndefForPHILegacyPass (*PR);
442495 initializeAMDGPUUnifyMetadataPass (*PR);
@@ -995,6 +1048,7 @@ class GCNPassConfig final : public AMDGPUPassConfig {
9951048
9961049 FunctionPass *createSGPRAllocPass (bool Optimized);
9971050 FunctionPass *createVGPRAllocPass (bool Optimized);
1051+ FunctionPass *createWWMRegAllocPass (bool Optimized);
9981052 FunctionPass *createRegAllocPass (bool Optimized) override ;
9991053
10001054 bool addRegAssignAndRewriteFast () override ;
@@ -1409,7 +1463,6 @@ void GCNPassConfig::addOptimizedRegAlloc() {
14091463}
14101464
14111465bool GCNPassConfig::addPreRewrite () {
1412- addPass (&SILowerWWMCopiesID);
14131466 if (EnableRegReassign)
14141467 addPass (&GCNNSAReassignID);
14151468 return true ;
@@ -1445,12 +1498,28 @@ FunctionPass *GCNPassConfig::createVGPRAllocPass(bool Optimized) {
14451498 return createFastVGPRRegisterAllocator ();
14461499}
14471500
1501+ FunctionPass *GCNPassConfig::createWWMRegAllocPass (bool Optimized) {
1502+ // Initialize the global default.
1503+ llvm::call_once (InitializeDefaultWWMRegisterAllocatorFlag,
1504+ initializeDefaultWWMRegisterAllocatorOnce);
1505+
1506+ RegisterRegAlloc::FunctionPassCtor Ctor = WWMRegisterRegAlloc::getDefault ();
1507+ if (Ctor != useDefaultRegisterAllocator)
1508+ return Ctor ();
1509+
1510+ if (Optimized)
1511+ return createGreedyWWMRegisterAllocator ();
1512+
1513+ return createFastWWMRegisterAllocator ();
1514+ }
1515+
14481516FunctionPass *GCNPassConfig::createRegAllocPass (bool Optimized) {
14491517 llvm_unreachable (" should not be used" );
14501518}
14511519
14521520static const char RegAllocOptNotSupportedMessage[] =
1453- " -regalloc not supported with amdgcn. Use -sgpr-regalloc and -vgpr-regalloc" ;
1521+ " -regalloc not supported with amdgcn. Use -sgpr-regalloc, -wwm-regalloc, "
1522+ " and -vgpr-regalloc" ;
14541523
14551524bool GCNPassConfig::addRegAssignAndRewriteFast () {
14561525 if (!usingDefaultRegAlloc ())
@@ -1462,11 +1531,19 @@ bool GCNPassConfig::addRegAssignAndRewriteFast() {
14621531
14631532 // Equivalent of PEI for SGPRs.
14641533 addPass (&SILowerSGPRSpillsID);
1534+
1535+ // To Allocate wwm registers used in whole quad mode operations (for shaders).
14651536 addPass (&SIPreAllocateWWMRegsID);
14661537
1467- addPass (createVGPRAllocPass (false ));
1538+ // For allocating other wwm register operands.
1539+ addPass (createWWMRegAllocPass (false ));
14681540
14691541 addPass (&SILowerWWMCopiesID);
1542+ addPass (&AMDGPUReserveWWMRegsID);
1543+
1544+ // For allocating regular VGPRs.
1545+ addPass (createVGPRAllocPass (false ));
1546+
14701547 return true ;
14711548}
14721549
@@ -1486,8 +1563,17 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
14861563
14871564 // Equivalent of PEI for SGPRs.
14881565 addPass (&SILowerSGPRSpillsID);
1566+
1567+ // To Allocate wwm registers used in whole quad mode operations (for shaders).
14891568 addPass (&SIPreAllocateWWMRegsID);
14901569
1570+ // For allocating other whole wave mode registers.
1571+ addPass (createWWMRegAllocPass (true ));
1572+ addPass (&SILowerWWMCopiesID);
1573+ addPass (createVirtRegRewriter (false ));
1574+ addPass (&AMDGPUReserveWWMRegsID);
1575+
1576+ // For allocating regular VGPRs.
14911577 addPass (createVGPRAllocPass (true ));
14921578
14931579 addPreRewrite ();
0 commit comments