|
25 | 25 | /// %vgpr0 = V_ADD_F32 %vgpr0, %vgpr0 |
26 | 26 | /// %sgpr0 = SI_ELSE %sgpr0 |
27 | 27 | /// %vgpr0 = V_SUB_F32 %vgpr0, %vgpr0 |
28 | | -/// SI_END_CF %sgpr0 |
| 28 | +/// SI_WAVE_RECONVERGE %sgpr0 |
29 | 29 | /// |
30 | 30 | /// becomes: |
31 | 31 | /// |
@@ -103,10 +103,7 @@ class SILowerControlFlow : public MachineFunctionPass { |
103 | 103 | void emitWaveDiverge(MachineInstr &MI, Register EnabledLanesMask, |
104 | 104 | Register DisableLanesMask); |
105 | 105 |
|
106 | | - void emitWaveInvert(MachineInstr &MI, Register EnabledLanesMask, |
107 | | - Register DisableLanesMask); |
108 | | - |
109 | | - void emitEndCf(MachineInstr &MI); |
| 106 | + void emitWaveReconverge(MachineInstr &MI); |
110 | 107 |
|
111 | 108 | void lowerInitExec(MachineBasicBlock *MBB, MachineInstr &MI); |
112 | 109 |
|
@@ -198,7 +195,7 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) { |
198 | 195 | void SILowerControlFlow::emitElse(MachineInstr &MI) { |
199 | 196 | Register InvCondReg = MI.getOperand(0).getReg(); |
200 | 197 | Register CondReg = MI.getOperand(1).getReg(); |
201 | | - emitWaveInvert(MI, CondReg, InvCondReg); |
| 198 | + emitWaveDiverge(MI, CondReg, InvCondReg); |
202 | 199 | } |
203 | 200 |
|
204 | 201 | void SILowerControlFlow::emitIfBreak(MachineInstr &MI) { |
@@ -375,89 +372,7 @@ void SILowerControlFlow::emitWaveDiverge(MachineInstr &MI, |
375 | 372 | LIS->removeAllRegUnitsForPhysReg(Exec); |
376 | 373 | } |
377 | 374 |
|
378 | | -void SILowerControlFlow::emitWaveInvert(MachineInstr &MI, |
379 | | - Register EnabledLanesMask, |
380 | | - Register DisableLanesMask) { |
381 | | - MachineBasicBlock &MBB = *MI.getParent(); |
382 | | - const DebugLoc &DL = MI.getDebugLoc(); |
383 | | - MachineBasicBlock::iterator I(MI); |
384 | | - |
385 | | - MachineInstr *CondInverted = |
386 | | - BuildMI(MBB, I, DL, TII->get(XorOpc), DisableLanesMask) |
387 | | - .addReg(EnabledLanesMask) |
388 | | - .addReg(Exec); |
389 | | - |
390 | | - if (LV) { |
391 | | - LV->replaceKillInstruction(DisableLanesMask, MI, *CondInverted); |
392 | | - } |
393 | | - |
394 | | - Register TestResultReg = MRI->createVirtualRegister(BoolRC); |
395 | | - // If the EnableLanesMask is zero we have to restore the masked bits on the |
396 | | - // skip way |
397 | | - Register ExitMask = MRI->createVirtualRegister(BoolRC); |
398 | | - MachineInstr *ExitMaskSet = BuildMI(MBB, I, DL, TII->get(OrOpc), ExitMask) |
399 | | - .addReg(Exec) |
400 | | - .addReg(DisableLanesMask); |
401 | | - |
402 | | - MachineInstr *IfZeroMask = |
403 | | - BuildMI(MBB, I, DL, TII->get(AndOpc), TestResultReg) |
404 | | - .addReg(EnabledLanesMask) |
405 | | - .addImm(TestMask); |
406 | | - |
407 | | - MachineInstr *SetExecForSucc = BuildMI(MBB, I, DL, TII->get(Select), Exec) |
408 | | - .addReg(EnabledLanesMask) |
409 | | - .addReg(ExitMask); |
410 | | - |
411 | | - MachineBasicBlock *FlowBB = MI.getOperand(2).getMBB(); |
412 | | - MachineBasicBlock *TargetBB = nullptr; |
413 | | - // determine target BBs |
414 | | - I = skipToUncondBrOrEnd(MBB, I); |
415 | | - if (I != MBB.end()) { |
416 | | - // skipToUncondBrOrEnd returns either unconditional branch or end() |
417 | | - TargetBB = I->getOperand(0).getMBB(); |
418 | | - I->getOperand(0).setMBB(FlowBB); |
419 | | - } else { |
420 | | - // assert(MBB.succ_size() == 2); |
421 | | - for (auto Succ : successors(&MBB)) { |
422 | | - if (Succ != FlowBB) { |
423 | | - TargetBB = Succ; |
424 | | - break; |
425 | | - } |
426 | | - } |
427 | | - I = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_BRANCH)).addMBB(FlowBB); |
428 | | - if (LIS) |
429 | | - LIS->InsertMachineInstrInMaps(*I); |
430 | | - } |
431 | | - |
432 | | - if (TargetBB) { |
433 | | - MachineInstr *NewBr = |
434 | | - BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CBRANCH_SCC1)).addMBB(TargetBB); |
435 | | - if (LIS) |
436 | | - LIS->InsertMachineInstrInMaps(*NewBr); |
437 | | - } |
438 | | - |
439 | | - if (!LIS) { |
440 | | - MI.eraseFromParent(); |
441 | | - return; |
442 | | - } |
443 | | - |
444 | | - LIS->InsertMachineInstrInMaps(*CondInverted); |
445 | | - LIS->InsertMachineInstrInMaps(*ExitMaskSet); |
446 | | - LIS->InsertMachineInstrInMaps(*IfZeroMask); |
447 | | - LIS->ReplaceMachineInstrInMaps(MI, *SetExecForSucc); |
448 | | - |
449 | | - RecomputeRegs.insert(MI.getOperand(0).getReg()); |
450 | | - RecomputeRegs.insert(MI.getOperand(1).getReg()); |
451 | | - |
452 | | - MI.eraseFromParent(); |
453 | | - |
454 | | - LIS->createAndComputeVirtRegInterval(TestResultReg); |
455 | | - LIS->createAndComputeVirtRegInterval(ExitMask); |
456 | | - |
457 | | - LIS->removeAllRegUnitsForPhysReg(Exec); |
458 | | -} |
459 | | - |
460 | | -void SILowerControlFlow::emitEndCf(MachineInstr &MI) { |
| 375 | +void SILowerControlFlow::emitWaveReconverge(MachineInstr &MI) { |
461 | 376 |
|
462 | 377 | MachineBasicBlock &BB = *MI.getParent(); |
463 | 378 | Register Mask = MI.getOperand(0).getReg(); |
@@ -558,8 +473,8 @@ MachineBasicBlock *SILowerControlFlow::process(MachineInstr &MI) { |
558 | 473 | MI.setDesc(TII->get(AMDGPU::S_CBRANCH_EXECNZ)); |
559 | 474 | break; |
560 | 475 |
|
561 | | - case AMDGPU::SI_END_CF: |
562 | | - emitEndCf(MI); |
| 476 | + case AMDGPU::SI_WAVE_RECONVERGE: |
| 477 | + emitWaveReconverge(MI); |
563 | 478 | break; |
564 | 479 |
|
565 | 480 | default: |
@@ -762,7 +677,7 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) { |
762 | 677 | case AMDGPU::SI_IF_BREAK: |
763 | 678 | case AMDGPU::SI_WATERFALL_LOOP: |
764 | 679 | case AMDGPU::SI_LOOP: |
765 | | - case AMDGPU::SI_END_CF: |
| 680 | + case AMDGPU::SI_WAVE_RECONVERGE: |
766 | 681 | SplitMBB = process(MI); |
767 | 682 | Changed = true; |
768 | 683 | break; |
|
0 commit comments