|
24 | 24 | #include "GCNSubtarget.h" |
25 | 25 | #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
26 | 26 | #include "llvm/ADT/MapVector.h" |
| 27 | +#include "llvm/ADT/STLExtras.h" |
27 | 28 | #include "llvm/ADT/Statistic.h" |
28 | 29 | #include "llvm/CodeGen/MachineFunctionPass.h" |
29 | 30 | #include <optional> |
@@ -66,6 +67,7 @@ class SIPeepholeSDWA { |
66 | 67 | MachineInstr *createSDWAVersion(MachineInstr &MI); |
67 | 68 | bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands); |
68 | 69 | void legalizeScalarOperands(MachineInstr &MI, const GCNSubtarget &ST) const; |
| 70 | + bool strengthReduceCSelect64(MachineFunction &MF); |
69 | 71 |
|
70 | 72 | public: |
71 | 73 | bool run(MachineFunction &MF); |
@@ -1362,6 +1364,40 @@ void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI, |
1362 | 1364 | } |
1363 | 1365 | } |
1364 | 1366 |
|
| 1367 | +bool SIPeepholeSDWA::strengthReduceCSelect64(MachineFunction &MF) { |
| 1368 | + bool Changed = false; |
| 1369 | + |
| 1370 | + for (MachineBasicBlock &MBB : MF) |
| 1371 | + for (MachineInstr &MI : make_early_inc_range(MBB)) { |
| 1372 | + if (MI.getOpcode() != AMDGPU::S_CSELECT_B64) |
| 1373 | + continue; |
| 1374 | + |
| 1375 | + Register Reg = MI.getOperand(0).getReg(); |
| 1376 | + MachineInstr *MustBeVCNDMASK = MRI->getOneNonDBGUser(Reg); |
| 1377 | + if (!MustBeVCNDMASK || |
| 1378 | + MustBeVCNDMASK->getOpcode() != AMDGPU::V_CNDMASK_B32_e64 || |
| 1379 | + !MustBeVCNDMASK->getOperand(1).isImm() || |
| 1380 | + !MustBeVCNDMASK->getOperand(2).isImm()) |
| 1381 | + continue; |
| 1382 | + |
| 1383 | + MachineInstr *MustBeVREADFIRSTLANE = |
| 1384 | + MRI->getOneNonDBGUser(MustBeVCNDMASK->getOperand(0).getReg()); |
| 1385 | + if (!MustBeVREADFIRSTLANE || |
| 1386 | + MustBeVREADFIRSTLANE->getOpcode() != AMDGPU::V_READFIRSTLANE_B32) |
| 1387 | + continue; |
| 1388 | + |
| 1389 | + BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(AMDGPU::S_CSELECT_B32), |
| 1390 | + MustBeVREADFIRSTLANE->getOperand(0).getReg()) |
| 1391 | + .addImm(MI.getOperand(1).getImm()) |
| 1392 | + .addImm(MI.getOperand(2).getImm()) |
| 1393 | + .addReg(AMDGPU::SCC, RegState::Implicit); |
| 1394 | + |
| 1395 | + MustBeVREADFIRSTLANE->eraseFromParent(); |
| 1396 | + } |
| 1397 | + |
| 1398 | + return Changed; |
| 1399 | +} |
| 1400 | + |
1365 | 1401 | bool SIPeepholeSDWALegacy::runOnMachineFunction(MachineFunction &MF) { |
1366 | 1402 | if (skipFunction(MF.getFunction())) |
1367 | 1403 | return false; |
@@ -1436,6 +1472,9 @@ bool SIPeepholeSDWA::run(MachineFunction &MF) { |
1436 | 1472 | } while (Changed); |
1437 | 1473 | } |
1438 | 1474 |
|
| 1475 | + // Other target-specific SSA-form peephole optimizations |
| 1476 | + Ret |= strengthReduceCSelect64(MF); |
| 1477 | + |
1439 | 1478 | return Ret; |
1440 | 1479 | } |
1441 | 1480 |
|
|
0 commit comments