@@ -44,19 +44,18 @@ class SIInsertSkips : public MachineFunctionPass {
44
44
bool shouldSkip (const MachineBasicBlock &From,
45
45
const MachineBasicBlock &To) const ;
46
46
47
- bool dominatesAllReachable (MachineBasicBlock &MBB);
48
47
void ensureEarlyExitBlock (MachineBasicBlock &MBB, bool ClearExec);
49
- void skipIfDead (MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
50
- DebugLoc DL);
51
48
52
- bool kill (MachineInstr &MI);
53
49
void earlyTerm (MachineInstr &MI);
54
50
55
51
bool skipMaskBranch (MachineInstr &MI, MachineBasicBlock &MBB);
56
52
57
53
public:
58
54
static char ID;
59
55
56
+ unsigned MovOpc;
57
+ Register ExecReg;
58
+
60
59
SIInsertSkips () : MachineFunctionPass(ID) {}
61
60
62
61
bool runOnMachineFunction (MachineFunction &MF) override ;
@@ -138,15 +137,6 @@ bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From,
138
137
return false ;
139
138
}
140
139
141
- // / Check whether \p MBB dominates all blocks that are reachable from it.
142
- bool SIInsertSkips::dominatesAllReachable (MachineBasicBlock &MBB) {
143
- for (MachineBasicBlock *Other : depth_first (&MBB)) {
144
- if (!MDT->dominates (&MBB, Other))
145
- return false ;
146
- }
147
- return true ;
148
- }
149
-
150
140
static void generateEndPgm (MachineBasicBlock &MBB,
151
141
MachineBasicBlock::iterator I, DebugLoc DL,
152
142
const SIInstrInfo *TII, bool IsPS) {
@@ -181,11 +171,8 @@ void SIInsertSkips::ensureEarlyExitBlock(MachineBasicBlock &MBB,
181
171
}
182
172
183
173
if (ClearExec && !EarlyExitClearsExec) {
184
- const GCNSubtarget &ST = MF->getSubtarget <GCNSubtarget>();
185
- unsigned Mov = ST.isWave32 () ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
186
- Register Exec = ST.isWave32 () ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
187
174
auto ExitI = EarlyExitBlock->getFirstNonPHI ();
188
- BuildMI (*EarlyExitBlock, ExitI, DL, TII->get (Mov ), Exec ).addImm (0 );
175
+ BuildMI (*EarlyExitBlock, ExitI, DL, TII->get (MovOpc ), ExecReg ).addImm (0 );
189
176
EarlyExitClearsExec = true ;
190
177
}
191
178
}
@@ -205,175 +192,6 @@ static void splitBlock(MachineBasicBlock &MBB, MachineInstr &MI,
205
192
MDT->getBase ().applyUpdates (DTUpdates);
206
193
}
207
194
208
- // / Insert an "if exec=0 { null export; s_endpgm }" sequence before the given
209
- // / iterator. Only applies to pixel shaders.
210
- void SIInsertSkips::skipIfDead (MachineBasicBlock &MBB,
211
- MachineBasicBlock::iterator I, DebugLoc DL) {
212
- MachineFunction *MF = MBB.getParent ();
213
- (void )MF;
214
- assert (MF->getFunction ().getCallingConv () == CallingConv::AMDGPU_PS);
215
-
216
- // It is possible for an SI_KILL_*_TERMINATOR to sit at the bottom of a
217
- // basic block that has no further successors (e.g., there was an
218
- // `unreachable` there in IR). This can happen with original source of the
219
- // form:
220
- //
221
- // if (uniform_condition) {
222
- // write_to_memory();
223
- // discard;
224
- // }
225
- //
226
- // In this case, we write the "null_export; s_endpgm" skip code in the
227
- // already-existing basic block.
228
- auto NextBBI = std::next (MBB.getIterator ());
229
- bool NoSuccessor =
230
- I == MBB.end () && !llvm::is_contained (MBB.successors (), &*NextBBI);
231
-
232
- if (NoSuccessor) {
233
- generateEndPgm (MBB, I, DL, TII, true );
234
- } else {
235
- ensureEarlyExitBlock (MBB, false );
236
-
237
- MachineInstr *BranchMI =
238
- BuildMI (MBB, I, DL, TII->get (AMDGPU::S_CBRANCH_EXECZ))
239
- .addMBB (EarlyExitBlock);
240
-
241
- // Split the block if the branch will not come at the end.
242
- auto Next = std::next (BranchMI->getIterator ());
243
- if (Next != MBB.end () && !Next->isTerminator ())
244
- splitBlock (MBB, *BranchMI, MDT);
245
-
246
- MBB.addSuccessor (EarlyExitBlock);
247
- MDT->getBase ().insertEdge (&MBB, EarlyExitBlock);
248
- }
249
- }
250
-
251
- // / Translate a SI_KILL_*_TERMINATOR into exec-manipulating instructions.
252
- // / Return true unless the terminator is a no-op.
253
- bool SIInsertSkips::kill (MachineInstr &MI) {
254
- MachineBasicBlock &MBB = *MI.getParent ();
255
- DebugLoc DL = MI.getDebugLoc ();
256
-
257
- switch (MI.getOpcode ()) {
258
- case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR: {
259
- unsigned Opcode = 0 ;
260
-
261
- // The opcodes are inverted because the inline immediate has to be
262
- // the first operand, e.g. from "x < imm" to "imm > x"
263
- switch (MI.getOperand (2 ).getImm ()) {
264
- case ISD::SETOEQ:
265
- case ISD::SETEQ:
266
- Opcode = AMDGPU::V_CMPX_EQ_F32_e64;
267
- break ;
268
- case ISD::SETOGT:
269
- case ISD::SETGT:
270
- Opcode = AMDGPU::V_CMPX_LT_F32_e64;
271
- break ;
272
- case ISD::SETOGE:
273
- case ISD::SETGE:
274
- Opcode = AMDGPU::V_CMPX_LE_F32_e64;
275
- break ;
276
- case ISD::SETOLT:
277
- case ISD::SETLT:
278
- Opcode = AMDGPU::V_CMPX_GT_F32_e64;
279
- break ;
280
- case ISD::SETOLE:
281
- case ISD::SETLE:
282
- Opcode = AMDGPU::V_CMPX_GE_F32_e64;
283
- break ;
284
- case ISD::SETONE:
285
- case ISD::SETNE:
286
- Opcode = AMDGPU::V_CMPX_LG_F32_e64;
287
- break ;
288
- case ISD::SETO:
289
- Opcode = AMDGPU::V_CMPX_O_F32_e64;
290
- break ;
291
- case ISD::SETUO:
292
- Opcode = AMDGPU::V_CMPX_U_F32_e64;
293
- break ;
294
- case ISD::SETUEQ:
295
- Opcode = AMDGPU::V_CMPX_NLG_F32_e64;
296
- break ;
297
- case ISD::SETUGT:
298
- Opcode = AMDGPU::V_CMPX_NGE_F32_e64;
299
- break ;
300
- case ISD::SETUGE:
301
- Opcode = AMDGPU::V_CMPX_NGT_F32_e64;
302
- break ;
303
- case ISD::SETULT:
304
- Opcode = AMDGPU::V_CMPX_NLE_F32_e64;
305
- break ;
306
- case ISD::SETULE:
307
- Opcode = AMDGPU::V_CMPX_NLT_F32_e64;
308
- break ;
309
- case ISD::SETUNE:
310
- Opcode = AMDGPU::V_CMPX_NEQ_F32_e64;
311
- break ;
312
- default :
313
- llvm_unreachable (" invalid ISD:SET cond code" );
314
- }
315
-
316
- const GCNSubtarget &ST = MBB.getParent ()->getSubtarget <GCNSubtarget>();
317
- if (ST.hasNoSdstCMPX ())
318
- Opcode = AMDGPU::getVCMPXNoSDstOp (Opcode);
319
-
320
- assert (MI.getOperand (0 ).isReg ());
321
-
322
- if (TRI->isVGPR (MBB.getParent ()->getRegInfo (),
323
- MI.getOperand (0 ).getReg ())) {
324
- Opcode = AMDGPU::getVOPe32 (Opcode);
325
- BuildMI (MBB, &MI, DL, TII->get (Opcode))
326
- .add (MI.getOperand (1 ))
327
- .add (MI.getOperand (0 ));
328
- } else {
329
- auto I = BuildMI (MBB, &MI, DL, TII->get (Opcode));
330
- if (!ST.hasNoSdstCMPX ())
331
- I.addReg (AMDGPU::VCC, RegState::Define);
332
-
333
- I.addImm (0 ) // src0 modifiers
334
- .add (MI.getOperand (1 ))
335
- .addImm (0 ) // src1 modifiers
336
- .add (MI.getOperand (0 ));
337
-
338
- I.addImm (0 ); // omod
339
- }
340
- return true ;
341
- }
342
- case AMDGPU::SI_KILL_I1_TERMINATOR: {
343
- const MachineFunction *MF = MI.getParent ()->getParent ();
344
- const GCNSubtarget &ST = MF->getSubtarget <GCNSubtarget>();
345
- unsigned Exec = ST.isWave32 () ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
346
- const MachineOperand &Op = MI.getOperand (0 );
347
- int64_t KillVal = MI.getOperand (1 ).getImm ();
348
- assert (KillVal == 0 || KillVal == -1 );
349
-
350
- // Kill all threads if Op0 is an immediate and equal to the Kill value.
351
- if (Op.isImm ()) {
352
- int64_t Imm = Op.getImm ();
353
- assert (Imm == 0 || Imm == -1 );
354
-
355
- if (Imm == KillVal) {
356
- BuildMI (MBB, &MI, DL, TII->get (ST.isWave32 () ? AMDGPU::S_MOV_B32
357
- : AMDGPU::S_MOV_B64), Exec)
358
- .addImm (0 );
359
- return true ;
360
- }
361
- return false ;
362
- }
363
-
364
- unsigned Opcode = KillVal ? AMDGPU::S_ANDN2_B64 : AMDGPU::S_AND_B64;
365
- if (ST.isWave32 ())
366
- Opcode = KillVal ? AMDGPU::S_ANDN2_B32 : AMDGPU::S_AND_B32;
367
- BuildMI (MBB, &MI, DL, TII->get (Opcode), Exec)
368
- .addReg (Exec)
369
- .add (Op);
370
- return true ;
371
- }
372
- default :
373
- llvm_unreachable (" invalid opcode, expected SI_KILL_*_TERMINATOR" );
374
- }
375
- }
376
-
377
195
void SIInsertSkips::earlyTerm (MachineInstr &MI) {
378
196
MachineBasicBlock &MBB = *MI.getParent ();
379
197
const DebugLoc DL = MI.getDebugLoc ();
@@ -415,7 +233,9 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
415
233
MDT = &getAnalysis<MachineDominatorTree>();
416
234
SkipThreshold = SkipThresholdFlag;
417
235
418
- SmallVector<MachineInstr *, 4 > KillInstrs;
236
+ MovOpc = ST.isWave32 () ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
237
+ ExecReg = ST.isWave32 () ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
238
+
419
239
SmallVector<MachineInstr *, 4 > EarlyTermInstrs;
420
240
bool MadeChange = false ;
421
241
@@ -440,41 +260,6 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
440
260
}
441
261
break ;
442
262
443
- case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
444
- case AMDGPU::SI_KILL_I1_TERMINATOR: {
445
- MadeChange = true ;
446
- bool CanKill = kill (MI);
447
-
448
- // Check if we can add an early "if exec=0 { end shader }".
449
- //
450
- // Note that we _always_ do this if it is correct, even if the kill
451
- // happens fairly late in the shader, because the null export should
452
- // generally still be cheaper than normal export(s).
453
- //
454
- // TODO: The dominatesAllReachable check is conservative: if the
455
- // dominance is only missing due to _uniform_ branches, we could
456
- // in fact insert the early-exit as well.
457
- if (CanKill &&
458
- MF.getFunction ().getCallingConv () == CallingConv::AMDGPU_PS &&
459
- dominatesAllReachable (MBB)) {
460
- // Mark the instruction for kill-if-dead insertion. We delay this
461
- // change because it modifies the CFG.
462
- KillInstrs.push_back (&MI);
463
- } else {
464
- MI.eraseFromParent ();
465
- }
466
- break ;
467
- }
468
-
469
- case AMDGPU::SI_KILL_CLEANUP:
470
- if (MF.getFunction ().getCallingConv () == CallingConv::AMDGPU_PS &&
471
- dominatesAllReachable (MBB)) {
472
- KillInstrs.push_back (&MI);
473
- } else {
474
- MI.eraseFromParent ();
475
- }
476
- break ;
477
-
478
263
case AMDGPU::SI_EARLY_TERMINATE_SCC0:
479
264
EarlyTermInstrs.push_back (&MI);
480
265
break ;
@@ -491,12 +276,6 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
491
276
earlyTerm (*Instr);
492
277
Instr->eraseFromParent ();
493
278
}
494
- for (MachineInstr *Kill : KillInstrs) {
495
- skipIfDead (*Kill->getParent (), std::next (Kill->getIterator ()),
496
- Kill->getDebugLoc ());
497
- Kill->eraseFromParent ();
498
- }
499
- KillInstrs.clear ();
500
279
EarlyTermInstrs.clear ();
501
280
EarlyExitBlock = nullptr ;
502
281
0 commit comments