1- #include < llvm/CodeGen/MachineBasicBlock.h>
2- #include < llvm/CodeGen/MachineBranchProbabilityInfo.h>
3- #include < llvm/CodeGen/MachineDominators.h>
4- #include < llvm/CodeGen/MachineFunctionPass.h>
5- #include < llvm/CodeGen/MachineLoopInfo.h>
6- #include < llvm/CodeGen/SSAIfConv.h>
7- #include < llvm/CodeGen/TargetInstrInfo.h>
8- #include < llvm/CodeGen/TargetRegisterInfo.h>
9- #include < llvm/CodeGen/TargetSchedule.h>
10- #include < llvm/CodeGen/TargetSubtargetInfo.h>
11- #include < llvm/InitializePasses.h>
1+ #include " llvm/CodeGen/MachineFunctionPass.h"
2+ #include " llvm/CodeGen/MachineRegisterInfo.h"
3+ #include " llvm/CodeGen/SSAIfConv.h"
124
135#include " AMDGPU.h"
6+ #include " GCNSubtarget.h"
7+ #include " SIInstrInfo.h"
8+ #include " SIMachineFunctionInfo.h"
149
1510using namespace llvm ;
1611
17- namespace {
1812#define DEBUG_TYPE " amdgpu-if-cvt"
19- const char PassName[] = " AMDGPU if conversion" ;
20-
21- class AMDGPUIfConverter : public MachineFunctionPass {
22- const TargetInstrInfo *TII = nullptr ;
23- const TargetRegisterInfo *TRI = nullptr ;
24- TargetSchedModel SchedModel;
25- MachineRegisterInfo *MRI = nullptr ;
26- MachineDominatorTree *DomTree = nullptr ;
27- MachineBranchProbabilityInfo *MBPI = nullptr ;
28- MachineLoopInfo *Loops = nullptr ;
29-
30- static constexpr unsigned BlockInstrLimit = 30 ;
31- static constexpr bool Stress = false ;
32- SSAIfConv IfConv{DEBUG_TYPE, BlockInstrLimit, Stress};
33-
34- public:
35- static char ID;
3613
37- AMDGPUIfConverter () : MachineFunctionPass(ID) {}
14+ namespace {
15+ unsigned getReversedVCMPXOpcode (unsigned Opcode) {
16+ // TODO: this is a placeholder for the real function
17+ switch (Opcode) {
18+ case AMDGPU::V_CMPX_LT_I32_nosdst_e64:
19+ return AMDGPU::V_CMPX_GE_I32_nosdst_e64;
20+ default :
21+ errs () << " unhandled: " << Opcode << " \n " ;
22+ llvm_unreachable (" unhandled vcmp opcode" );
23+ }
24+ }
3825
39- bool runOnMachineFunction (MachineFunction &MF) override ;
26+ bool needsExecPredication (const SIInstrInfo *TII, const MachineInstr &I) {
27+ return TII->isVALU (I) || TII->isVMEM (I);
28+ }
4029
41- void getAnalysisUsage (AnalysisUsage &AU) const override ;
30+ struct ExecPredicate : SSAIfConv::PredicationStrategyBase {
31+ const SIInstrInfo *TII;
32+ const SIRegisterInfo *RegInfo;
33+
34+ MachineInstr *Cmp = nullptr ;
35+
36+ ExecPredicate (const SIInstrInfo *TII)
37+ : TII(TII), RegInfo(&TII->getRegisterInfo ()) {}
38+
39+ bool canConvertIf (MachineBasicBlock *Head, MachineBasicBlock *TBB,
40+ MachineBasicBlock *FBB, MachineBasicBlock *Tail,
41+ ArrayRef<MachineOperand> Cond) override {
42+
43+ // check that the cmp is just before the branch and that it is promotable to
44+ // v_cmpx
45+ const unsigned SupportedBranchOpc[]{
46+ AMDGPU::S_CBRANCH_SCC0, AMDGPU::S_CBRANCH_SCC1, AMDGPU::S_CBRANCH_VCCNZ,
47+ AMDGPU::S_CBRANCH_VCCZ};
4248
43- bool tryConvertIf (MachineBasicBlock *);
49+ MachineInstr &CBranch = *Head->getFirstInstrTerminator ();
50+ if (!llvm::is_contained (SupportedBranchOpc, CBranch.getOpcode ()))
51+ return false ;
52+
53+ auto CmpInstr = std::next (CBranch.getReverseIterator ());
54+ if (CmpInstr == Head->instr_rend ())
55+ return false ;
56+
57+ Register SCCorVCC = Cond[1 ].getReg ();
58+ bool ModifiesConditionReg = CmpInstr->modifiesRegister (SCCorVCC, RegInfo);
59+ if (!ModifiesConditionReg)
60+ return false ;
61+
62+ Cmp = &*CmpInstr;
63+
64+ unsigned CmpOpc = Cmp->getOpcode ();
65+ if (TII->isSALU (*Cmp))
66+ CmpOpc = TII->getVALUOp (*Cmp);
67+ if (AMDGPU::getVCMPXOpFromVCMP (CmpOpc) == -1 ) {
68+ errs () << " unhandled branch " << *Cmp << " \n " ;
69+ return false ;
70+ }
71+
72+ return true ;
73+ }
74+
75+ bool canPredicateInstr (const MachineInstr &I) override {
76+
77+ // TODO: relax this condition, if exec is masked, check that it goes back to
78+ // normal
79+ // TODO: what about scc or vcc ? Are they taken into acount in the MBB
80+ // live-ins ?
81+ MCRegister Exec = RegInfo->getExec ();
82+ bool ModifiesExec = I.modifiesRegister (Exec, RegInfo);
83+ if (ModifiesExec)
84+ return false ;
85+
86+ if (needsExecPredication (TII, I))
87+ return true ;
88+
89+ bool DontMoveAcrossStore = true ;
90+ bool IsSpeculatable = I.isDereferenceableInvariantLoad () ||
91+ I.isSafeToMove (DontMoveAcrossStore);
92+ if (IsSpeculatable)
93+ return true ;
94+
95+ return false ;
96+ }
4497
98+ bool shouldConvertIf (SSAIfConv &IfConv) override {
99+ // TODO: cost model
100+ return true ;
101+ }
102+
103+ void predicateBlock (MachineBasicBlock *MBB, ArrayRef<MachineOperand> Cond,
104+ bool Reverse) override {
105+ // save exec
106+ MachineFunction &MF = *MBB->getParent ();
107+ SIMachineFunctionInfo *MFI = MF.getInfo <SIMachineFunctionInfo>();
108+
109+ Register ExecBackup = MFI->getSGPRForEXECCopy ();
110+
111+ const DebugLoc &CmpLoc = Cmp->getDebugLoc ();
112+
113+ auto FirstInstruction = MBB->begin ();
114+ const bool IsSCCLive =
115+ false ; // asume not since the live-ins are supposed to be empty
116+ TII->insertScratchExecCopy (MF, *MBB, FirstInstruction, CmpLoc, ExecBackup,
117+ IsSCCLive);
118+
119+ // mask exec
120+ unsigned CmpOpc = Cmp->getOpcode ();
121+ if (TII->isSALU (*Cmp))
122+ CmpOpc = TII->getVALUOp (*Cmp);
123+
124+ CmpOpc = AMDGPU::getVCMPXOpFromVCMP (CmpOpc);
125+ if (Reverse)
126+ CmpOpc = getReversedVCMPXOpcode (CmpOpc);
127+
128+ // TODO: handle this properly. The second block may kill those registers.
129+ Cmp->getOperand (0 ).setIsKill (false );
130+ Cmp->getOperand (1 ).setIsKill (false );
131+
132+ auto VCmpX = BuildMI (*MBB, FirstInstruction, CmpLoc, TII->get (CmpOpc));
133+ VCmpX->addOperand (Cmp->getOperand (0 ));
134+ VCmpX->addOperand (Cmp->getOperand (1 ));
135+
136+ // restore exec
137+ TII->restoreExec (MF, *MBB, MBB->end (), DebugLoc (), ExecBackup);
138+ }
139+
140+ ~ExecPredicate () override = default ;
141+ };
142+
143+ const char PassName[] = " AMDGPU If Conversion" ;
144+
145+ struct AMDGPUIfConverter : MachineFunctionPass {
146+ static char ID;
147+ AMDGPUIfConverter () : MachineFunctionPass(ID) {}
148+ void getAnalysisUsage (AnalysisUsage &AU) const override ;
149+ bool runOnMachineFunction (MachineFunction &MF) override ;
45150 StringRef getPassName () const override { return PassName; }
46151};
47152
48153char AMDGPUIfConverter::ID = 0 ;
49154
50155void AMDGPUIfConverter::getAnalysisUsage (AnalysisUsage &AU) const {
51- AU.addRequired <MachineBranchProbabilityInfoWrapperPass>();
52156 AU.addRequired <MachineDominatorTreeWrapperPass>();
53157 AU.addPreserved <MachineDominatorTreeWrapperPass>();
54158 AU.addRequired <MachineLoopInfoWrapperPass>();
@@ -60,29 +164,19 @@ bool AMDGPUIfConverter::runOnMachineFunction(MachineFunction &MF) {
60164 if (skipFunction (MF.getFunction ()))
61165 return false ;
62166
63- const TargetSubtargetInfo &STI = MF.getSubtarget ();
64- TII = STI.getInstrInfo ();
65- TRI = STI.getRegisterInfo ();
66- MRI = &MF.getRegInfo ();
67- SchedModel.init (&STI);
68- DomTree = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree ();
69- Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI ();
70- MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI ();
71-
72- bool Changed = false ;
73- IfConv.runOnMachineFunction (MF);
167+ const auto &STI = MF.getSubtarget <GCNSubtarget>();
168+ if (!STI.hasGFX10_3Insts ())
169+ return false ;
74170
75- for ( auto *DomNode : post_order (DomTree))
76- if ( tryConvertIf (DomNode-> getBlock ()))
77- Changed = true ;
171+ const SIInstrInfo *TII = STI. getInstrInfo ();
172+ auto *DomTree = &getAnalysis<MachineDominatorTreeWrapperPass>(). getDomTree ();
173+ auto *Loops = &getAnalysis<MachineLoopInfoWrapperPass>(). getLI () ;
78174
79- return Changed;
175+ ExecPredicate Predicate (TII);
176+ SSAIfConv IfConv (Predicate, MF, DomTree, Loops);
177+ return IfConv.run ();
80178}
81-
82- bool AMDGPUIfConverter::tryConvertIf (MachineBasicBlock *MBB) { return false ; }
83-
84179} // namespace
85-
86180char &llvm::AMDGPUIfConverterID = AMDGPUIfConverter::ID;
87181INITIALIZE_PASS_BEGIN (AMDGPUIfConverter, DEBUG_TYPE, PassName, false , false )
88182INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
0 commit comments