Skip to content

Commit 5900729

Browse files
committed
[AMDGPU] Skip non-first termintors when forcing emit zero flag
1 parent 0b6952a commit 5900729

File tree

2 files changed

+36
-1
lines changed

2 files changed

+36
-1
lines changed

llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1600,6 +1600,17 @@ static bool callWaitsOnFunctionReturn(const MachineInstr &MI) {
16001600
return true;
16011601
}
16021602

1603+
/// \returns true if \p MI is not the first terminator of its associated MBB.
1604+
static bool checkIfMBBNonFirstTerminator(const MachineInstr &MI) {
1605+
const auto &MBB = MI.getParent();
1606+
if (MBB->getFirstTerminator() == MI)
1607+
return false;
1608+
for (const auto &I : MBB->terminators())
1609+
if (&I == &MI)
1610+
return true;
1611+
return false;
1612+
}
1613+
16031614
/// Generate s_waitcnt instruction to be placed before cur_Inst.
16041615
/// Instructions of a given type are returned in order,
16051616
/// but instructions of different types can complete out of order.
@@ -1825,7 +1836,9 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
18251836
// Verify that the wait is actually needed.
18261837
ScoreBrackets.simplifyWaitcnt(Wait);
18271838

1828-
if (ForceEmitZeroFlag)
1839+
// When forcing emit, we need to skip non-first terminators of a MBB because
1840+
// that would break the terminators of the MBB.
1841+
if (ForceEmitZeroFlag && !checkIfMBBNonFirstTerminator(MI))
18291842
Wait = WCG->getAllZeroWaitcnt(/*IncludeVSCnt=*/false);
18301843

18311844
if (ForceEmitWaitcnt[LOAD_CNT])
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass si-insert-waitcnts -amdgpu-waitcnt-forcezero=1 %s -o - | FileCheck %s
2+
3+
...
4+
5+
# CHECK-LABEL: waitcnt-debug-non-first-terminators
6+
# CHECK: S_WAITCNT 0
7+
# CHECK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc
8+
# CHECK-NEXT: S_BRANCH %bb.2, implicit $scc
9+
10+
name: waitcnt-debug-non-first-terminators
11+
liveins:
12+
machineFunctionInfo:
13+
isEntryFunction: true
14+
body: |
15+
bb.0:
16+
S_CBRANCH_SCC1 %bb.1, implicit $scc
17+
S_BRANCH %bb.2, implicit $scc
18+
bb.1:
19+
S_NOP 0
20+
bb.2:
21+
S_NOP 0
22+
...

0 commit comments

Comments
 (0)