Skip to content

Commit f10db27

Browse files
committed
Simplify logic
1 parent 3d25933 commit f10db27

File tree

2 files changed

+67
-47
lines changed

2 files changed

+67
-47
lines changed

llvm/lib/CodeGen/RegisterCoalescer.cpp

Lines changed: 57 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1330,59 +1330,69 @@ bool RegisterCoalescer::reMaterializeDef(const CoalescerPair &CP,
13301330
// the same basic block to enable better coalescing.
13311331
if (DstReg.isPhysical()) {
13321332
MachineBasicBlock *MBB = CopyMI->getParent();
1333-
if (DefMI->getParent() == MBB) {
1334-
// Check if there's already an identical instruction before CopyMI
1335-
// If so, allow rematerialization to avoid redundant instructions
1336-
bool FoundCopy = false;
1337-
for (MachineInstr &MI : *MBB) {
1338-
if (&MI == CopyMI) {
1339-
FoundCopy = true;
1340-
continue;
1341-
}
1333+
if (DefMI->getParent() == MBB && !MBB->empty()) {
1334+
// Quick check: is the last instruction a return using DstReg?
1335+
const MachineInstr &LastInstr = MBB->back();
1336+
if (LastInstr.isReturn() && LastInstr.readsRegister(DstReg, TRI)) {
1337+
// This is a return register, perform checks
1338+
1339+
// Exception: allow rematerialization for zero-idiom instructions
1340+
// (e.g., xorps %xmm0, %xmm0) because rematerialization produces
1341+
// independent zero-latency instructions, which is better than copying
1342+
const TargetSubtargetInfo &STI = MF->getSubtarget();
1343+
APInt Mask;
1344+
if (STI.isZeroIdiom(DefMI, Mask)) {
1345+
LLVM_DEBUG(dbgs() << "\tAllow remat: zero-idiom instruction\n");
1346+
} else {
1347+
// Check for duplicate DefMI before CopyMI
1348+
bool HasDuplicateDef = false;
1349+
for (MachineBasicBlock::iterator I = MBB->begin(); &*I != CopyMI;
1350+
++I) {
1351+
if (&*I != DefMI &&
1352+
I->isIdenticalTo(*DefMI, MachineInstr::IgnoreDefs)) {
1353+
HasDuplicateDef = true;
1354+
break;
1355+
}
1356+
}
13421357

1343-
// Before CopyMI: check for duplicate instructions
1344-
if (!FoundCopy && &MI != DefMI &&
1345-
MI.isIdenticalTo(*DefMI, MachineInstr::IgnoreDefs)) {
1346-
break; // Found duplicate, allow rematerialization
1347-
} else if (FoundCopy) {
1348-
// After CopyMI: check if used as return register
1349-
// If the register is redefined, it's not a return register
1350-
if (MI.modifiesRegister(DstReg, TRI))
1351-
break;
1352-
// If there's a return instruction that uses this register, skip remat
1353-
if (MI.isReturn() && MI.readsRegister(DstReg, TRI)) {
1354-
// Exception: if DefMI is moving a constant and SrcReg has no other
1355-
// uses (besides copies), rematerialization is beneficial to
1356-
// eliminate the def
1357-
if (DefMI->isMoveImmediate()) {
1358-
// Quick check: if there's only one use and it's this copy,
1359-
// definitely remat
1360-
if (MRI->hasOneNonDBGUse(SrcReg)) {
1361-
LLVM_DEBUG(dbgs()
1362-
<< "\tAllow remat: single use constant move\n");
1363-
break;
1364-
}
1358+
// Check if register is redefined after CopyMI
1359+
bool RegRedefinedAfterCopy = false;
1360+
for (MachineBasicBlock::iterator I = std::next(CopyMI->getIterator());
1361+
I != MBB->end(); ++I) {
1362+
if (I->modifiesRegister(DstReg, TRI)) {
1363+
RegRedefinedAfterCopy = true;
1364+
break;
1365+
}
1366+
if (I->isReturn())
1367+
break;
1368+
}
13651369

1366-
// Check all uses to see if they're all copies
1367-
bool OnlyUsedByCopies = true;
1368-
unsigned UseCount = 0;
1369-
for (const MachineOperand &MO : MRI->use_operands(SrcReg)) {
1370-
const MachineInstr *UseMI = MO.getParent();
1371-
if (!UseMI->isCopy() && !UseMI->isSubregToReg()) {
1372-
OnlyUsedByCopies = false;
1373-
break;
1370+
// Skip remat only if: no duplicate def AND reg not redefined
1371+
if (!HasDuplicateDef && !RegRedefinedAfterCopy) {
1372+
// Exception: allow remat for constant moves with limited uses
1373+
if (DefMI->isMoveImmediate()) {
1374+
if (!MRI->hasOneNonDBGUse(SrcReg)) {
1375+
// Check if all uses are copies
1376+
bool OnlyUsedByCopies = true;
1377+
for (const MachineOperand &MO : MRI->use_operands(SrcReg)) {
1378+
const MachineInstr *UseMI = MO.getParent();
1379+
if (!UseMI->isCopy() && !UseMI->isSubregToReg()) {
1380+
OnlyUsedByCopies = false;
1381+
break;
1382+
}
13741383
}
1375-
UseCount++;
1376-
}
13771384

1378-
if (OnlyUsedByCopies && UseCount > 0) {
1379-
break;
1385+
if (!OnlyUsedByCopies || MRI->use_empty(SrcReg)) {
1386+
LLVM_DEBUG(dbgs() << "\tSkip remat for return register: "
1387+
<< printReg(DstReg, TRI) << '\n');
1388+
return false;
1389+
}
13801390
}
1391+
} else {
1392+
LLVM_DEBUG(dbgs() << "\tSkip remat for return register: "
1393+
<< printReg(DstReg, TRI) << '\n');
1394+
return false;
13811395
}
1382-
1383-
LLVM_DEBUG(dbgs() << "\tSkip remat for return register: "
1384-
<< printReg(DstReg, TRI) << '\n');
1385-
return false;
13861396
}
13871397
}
13881398
}

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.ll

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,11 +96,21 @@ define i64 @test_s_wqm_constant_i64() {
9696
}
9797

9898
define i64 @test_s_wqm_constant_zero_i64() {
99+
; GFX11-LABEL: test_s_wqm_constant_zero_i64:
100+
; GFX11: ; %bb.0:
101+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102+
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0
103+
; GFX11-NEXT: s_setpc_b64 s[30:31]
99104
%br = call i64 @llvm.amdgcn.s.wqm.i64(i64 0)
100105
ret i64 %br
101106
}
102107

103108
define i64 @test_s_wqm_constant_neg_one_i64() {
109+
; GFX11-LABEL: test_s_wqm_constant_neg_one_i64:
110+
; GFX11: ; %bb.0:
111+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
112+
; GFX11-NEXT: v_dual_mov_b32 v0, -1 :: v_dual_mov_b32 v1, -1
113+
; GFX11-NEXT: s_setpc_b64 s[30:31]
104114
%br = call i64 @llvm.amdgcn.s.wqm.i64(i64 -1)
105115
ret i64 %br
106116
}

0 commit comments

Comments
 (0)