@@ -108,6 +108,12 @@ class VGPRRegisterRegAlloc : public RegisterRegAllocBase<VGPRRegisterRegAlloc> {
108108 : RegisterRegAllocBase(N, D, C) {}
109109};
110110
111+ class WWMRegisterRegAlloc : public RegisterRegAllocBase <WWMRegisterRegAlloc> {
112+ public:
113+ WWMRegisterRegAlloc (const char *N, const char *D, FunctionPassCtor C)
114+ : RegisterRegAllocBase(N, D, C) {}
115+ };
116+
111117static bool onlyAllocateSGPRs (const TargetRegisterInfo &TRI,
112118 const MachineRegisterInfo &MRI,
113119 const Register Reg) {
@@ -122,13 +128,24 @@ static bool onlyAllocateVGPRs(const TargetRegisterInfo &TRI,
122128 return !static_cast <const SIRegisterInfo &>(TRI).isSGPRClass (RC);
123129}
124130
125- // / -{sgpr|vgpr}-regalloc=... command line option.
131+ static bool onlyAllocateWWMRegs (const TargetRegisterInfo &TRI,
132+ const MachineRegisterInfo &MRI,
133+ const Register Reg) {
134+ const SIMachineFunctionInfo *MFI =
135+ MRI.getMF ().getInfo <SIMachineFunctionInfo>();
136+ const TargetRegisterClass *RC = MRI.getRegClass (Reg);
137+ return !static_cast <const SIRegisterInfo &>(TRI).isSGPRClass (RC) &&
138+ MFI->checkFlag (Reg, AMDGPU::VirtRegFlag::WWM_REG);
139+ }
140+
141+ // / -{sgpr|wwm|vgpr}-regalloc=... command line option.
126142static FunctionPass *useDefaultRegisterAllocator () { return nullptr ; }
127143
128144// / A dummy default pass factory indicates whether the register allocator is
129145// / overridden on the command line.
130146static llvm::once_flag InitializeDefaultSGPRRegisterAllocatorFlag;
131147static llvm::once_flag InitializeDefaultVGPRRegisterAllocatorFlag;
148+ static llvm::once_flag InitializeDefaultWWMRegisterAllocatorFlag;
132149
133150static SGPRRegisterRegAlloc
134151defaultSGPRRegAlloc (" default" ,
@@ -145,6 +162,11 @@ static cl::opt<VGPRRegisterRegAlloc::FunctionPassCtor, false,
145162VGPRRegAlloc (" vgpr-regalloc" , cl::Hidden, cl::init(&useDefaultRegisterAllocator),
146163 cl::desc (" Register allocator to use for VGPRs" ));
147164
165+ static cl::opt<WWMRegisterRegAlloc::FunctionPassCtor, false ,
166+ RegisterPassParser<WWMRegisterRegAlloc>>
167+ WWMRegAlloc (" wwm-regalloc" , cl::Hidden,
168+ cl::init (&useDefaultRegisterAllocator),
169+ cl::desc(" Register allocator to use for WWM registers" ));
148170
149171static void initializeDefaultSGPRRegisterAllocatorOnce () {
150172 RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault ();
@@ -164,6 +186,15 @@ static void initializeDefaultVGPRRegisterAllocatorOnce() {
164186 }
165187}
166188
189+ static void initializeDefaultWWMRegisterAllocatorOnce () {
190+ RegisterRegAlloc::FunctionPassCtor Ctor = WWMRegisterRegAlloc::getDefault ();
191+
192+ if (!Ctor) {
193+ Ctor = WWMRegAlloc;
194+ WWMRegisterRegAlloc::setDefault (WWMRegAlloc);
195+ }
196+ }
197+
167198static FunctionPass *createBasicSGPRRegisterAllocator () {
168199 return createBasicRegisterAllocator (onlyAllocateSGPRs);
169200}
@@ -188,6 +219,18 @@ static FunctionPass *createFastVGPRRegisterAllocator() {
188219 return createFastRegisterAllocator (onlyAllocateVGPRs, true );
189220}
190221
222+ static FunctionPass *createBasicWWMRegisterAllocator () {
223+ return createBasicRegisterAllocator (onlyAllocateWWMRegs);
224+ }
225+
226+ static FunctionPass *createGreedyWWMRegisterAllocator () {
227+ return createGreedyRegisterAllocator (onlyAllocateWWMRegs);
228+ }
229+
230+ static FunctionPass *createFastWWMRegisterAllocator () {
231+ return createFastRegisterAllocator (onlyAllocateWWMRegs, false );
232+ }
233+
191234static SGPRRegisterRegAlloc basicRegAllocSGPR (
192235 " basic" , " basic register allocator" , createBasicSGPRRegisterAllocator);
193236static SGPRRegisterRegAlloc greedyRegAllocSGPR (
@@ -204,6 +247,14 @@ static VGPRRegisterRegAlloc greedyRegAllocVGPR(
204247
205248static VGPRRegisterRegAlloc fastRegAllocVGPR (
206249 " fast" , " fast register allocator" , createFastVGPRRegisterAllocator);
250+ static WWMRegisterRegAlloc basicRegAllocWWMReg (" basic" ,
251+ " basic register allocator" ,
252+ createBasicWWMRegisterAllocator);
253+ static WWMRegisterRegAlloc
254+ greedyRegAllocWWMReg (" greedy" , " greedy register allocator" ,
255+ createGreedyWWMRegisterAllocator);
256+ static WWMRegisterRegAlloc fastRegAllocWWMReg (" fast" , " fast register allocator" ,
257+ createFastWWMRegisterAllocator);
207258} // anonymous namespace
208259
209260static cl::opt<bool >
@@ -440,6 +491,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
440491 initializeAMDGPURemoveIncompatibleFunctionsPass (*PR);
441492 initializeAMDGPULowerModuleLDSLegacyPass (*PR);
442493 initializeAMDGPULowerBufferFatPointersPass (*PR);
494+ initializeAMDGPUReserveWWMRegsPass (*PR);
443495 initializeAMDGPURewriteOutArgumentsPass (*PR);
444496 initializeAMDGPURewriteUndefForPHILegacyPass (*PR);
445497 initializeAMDGPUUnifyMetadataPass (*PR);
@@ -989,6 +1041,7 @@ class GCNPassConfig final : public AMDGPUPassConfig {
9891041
9901042 FunctionPass *createSGPRAllocPass (bool Optimized);
9911043 FunctionPass *createVGPRAllocPass (bool Optimized);
1044+ FunctionPass *createWWMRegAllocPass (bool Optimized);
9921045 FunctionPass *createRegAllocPass (bool Optimized) override ;
9931046
9941047 bool addRegAssignAndRewriteFast () override ;
@@ -1382,7 +1435,6 @@ void GCNPassConfig::addOptimizedRegAlloc() {
13821435}
13831436
13841437bool GCNPassConfig::addPreRewrite () {
1385- addPass (&SILowerWWMCopiesID);
13861438 if (EnableRegReassign)
13871439 addPass (&GCNNSAReassignID);
13881440 return true ;
@@ -1418,12 +1470,28 @@ FunctionPass *GCNPassConfig::createVGPRAllocPass(bool Optimized) {
14181470 return createFastVGPRRegisterAllocator ();
14191471}
14201472
1473+ FunctionPass *GCNPassConfig::createWWMRegAllocPass (bool Optimized) {
1474+ // Initialize the global default.
1475+ llvm::call_once (InitializeDefaultWWMRegisterAllocatorFlag,
1476+ initializeDefaultWWMRegisterAllocatorOnce);
1477+
1478+ RegisterRegAlloc::FunctionPassCtor Ctor = WWMRegisterRegAlloc::getDefault ();
1479+ if (Ctor != useDefaultRegisterAllocator)
1480+ return Ctor ();
1481+
1482+ if (Optimized)
1483+ return createGreedyWWMRegisterAllocator ();
1484+
1485+ return createFastWWMRegisterAllocator ();
1486+ }
1487+
14211488FunctionPass *GCNPassConfig::createRegAllocPass (bool Optimized) {
14221489 llvm_unreachable (" should not be used" );
14231490}
14241491
14251492static const char RegAllocOptNotSupportedMessage[] =
1426- " -regalloc not supported with amdgcn. Use -sgpr-regalloc and -vgpr-regalloc" ;
1493+ " -regalloc not supported with amdgcn. Use -sgpr-regalloc, -wwm-regalloc, "
1494+ " and -vgpr-regalloc" ;
14271495
14281496bool GCNPassConfig::addRegAssignAndRewriteFast () {
14291497 if (!usingDefaultRegAlloc ())
@@ -1435,11 +1503,19 @@ bool GCNPassConfig::addRegAssignAndRewriteFast() {
14351503
14361504 // Equivalent of PEI for SGPRs.
14371505 addPass (&SILowerSGPRSpillsLegacyID);
1506+
1507+ // To Allocate wwm registers used in whole quad mode operations (for shaders).
14381508 addPass (&SIPreAllocateWWMRegsID);
14391509
1440- addPass (createVGPRAllocPass (false ));
1510+ // For allocating other wwm register operands.
1511+ addPass (createWWMRegAllocPass (false ));
14411512
14421513 addPass (&SILowerWWMCopiesID);
1514+ addPass (&AMDGPUReserveWWMRegsID);
1515+
1516+ // For allocating per-thread VGPRs.
1517+ addPass (createVGPRAllocPass (false ));
1518+
14431519 return true ;
14441520}
14451521
@@ -1459,8 +1535,17 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
14591535
14601536 // Equivalent of PEI for SGPRs.
14611537 addPass (&SILowerSGPRSpillsLegacyID);
1538+
1539+ // To Allocate wwm registers used in whole quad mode operations (for shaders).
14621540 addPass (&SIPreAllocateWWMRegsID);
14631541
1542+ // For allocating other whole wave mode registers.
1543+ addPass (createWWMRegAllocPass (true ));
1544+ addPass (&SILowerWWMCopiesID);
1545+ addPass (createVirtRegRewriter (false ));
1546+ addPass (&AMDGPUReserveWWMRegsID);
1547+
1548+ // For allocating per-thread VGPRs.
14641549 addPass (createVGPRAllocPass (true ));
14651550
14661551 addPreRewrite ();
0 commit comments