Skip to content

Commit 9ed3ad2

Browse files
committed
[AMDGPU][SIInsertWaitCnts] move vmem access check and apply waitcnt
1 parent 80a9c24 commit 9ed3ad2

File tree

1 file changed

+16
-15
lines changed

1 file changed

+16
-15
lines changed

llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -637,7 +637,7 @@ class WaitcntBrackets {
637637
void simplifyWaitcnt(InstCounterType T, unsigned &Count) const;
638638
bool hasRedundantXCntWithKmCnt(const AMDGPU::Waitcnt &Wait);
639639
bool canOptimizeXCntWithLoadCnt(const AMDGPU::Waitcnt &Wait);
640-
void simplifyXcnt(AMDGPU::Waitcnt &Wait);
640+
void simplifyXcnt(AMDGPU::Waitcnt &CheckWait, AMDGPU::Waitcnt &UpdateWait);
641641

642642
void determineWait(InstCounterType T, RegInterval Interval,
643643
AMDGPU::Waitcnt &Wait) const;
@@ -1202,7 +1202,7 @@ void WaitcntBrackets::simplifyWaitcnt(AMDGPU::Waitcnt &Wait) {
12021202
simplifyWaitcnt(SAMPLE_CNT, Wait.SampleCnt);
12031203
simplifyWaitcnt(BVH_CNT, Wait.BvhCnt);
12041204
simplifyWaitcnt(KM_CNT, Wait.KmCnt);
1205-
simplifyXcnt(Wait);
1205+
simplifyXcnt(Wait, Wait);
12061206
}
12071207

12081208
void WaitcntBrackets::simplifyWaitcnt(InstCounterType T,
@@ -1304,23 +1304,23 @@ bool WaitcntBrackets::canOptimizeXCntWithLoadCnt(const AMDGPU::Waitcnt &Wait) {
13041304
!hasPendingEvent(STORE_CNT) && !hasPendingEvent(SMEM_GROUP);
13051305
}
13061306

1307-
void WaitcntBrackets::simplifyXcnt(AMDGPU::Waitcnt &Wait) {
1307+
void WaitcntBrackets::simplifyXcnt(AMDGPU::Waitcnt &CheckWait, AMDGPU::Waitcnt &UpdateWait) {
13081308
// Try to simplify xcnt further by checking for joint kmcnt and loadcnt
13091309
// optimizations. On entry to a block with multiple predescessors, there may
13101310
// be pending SMEM and VMEM events active at the same time.
13111311
// In such cases, only clear one active event at a time.
1312-
if (hasRedundantXCntWithKmCnt(Wait)) {
1312+
if (hasRedundantXCntWithKmCnt(CheckWait)) {
13131313
if (hasPendingEvent(VMEM_GROUP)) {
13141314
// Only clear the SMEM_GROUP event, but VMEM_GROUP could still require
13151315
// handling.
13161316
PendingEvents &= ~(1 << SMEM_GROUP);
13171317
} else {
13181318
applyWaitcnt(X_CNT, 0);
13191319
}
1320-
} else if (canOptimizeXCntWithLoadCnt(Wait)) {
1321-
applyWaitcnt(X_CNT, std::min(Wait.XCnt, Wait.LoadCnt));
1320+
} else if (canOptimizeXCntWithLoadCnt(CheckWait)) {
1321+
applyWaitcnt(X_CNT, std::min(CheckWait.XCnt, CheckWait.LoadCnt));
13221322
}
1323-
simplifyWaitcnt(X_CNT, Wait.XCnt);
1323+
simplifyWaitcnt(X_CNT, UpdateWait.XCnt);
13241324
}
13251325

13261326
// Where there are multiple types of event in the bracket of a counter,
@@ -1752,7 +1752,7 @@ bool WaitcntGeneratorGFX12Plus::applyPreexistingWaitcnt(
17521752
ScoreBrackets.canOptimizeXCntWithLoadCnt(PreCombine))) {
17531753
// Xcnt may need to be updated depending on a pre-existing KM/LOAD_CNT
17541754
// due to taking the backedge of a block.
1755-
ScoreBrackets.simplifyXcnt(PreCombine);
1755+
ScoreBrackets.simplifyXcnt(PreCombine, Wait);
17561756
}
17571757
if (!WaitInstrs[CT])
17581758
continue;
@@ -2100,6 +2100,14 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
21002100
// Verify that the wait is actually needed.
21012101
ScoreBrackets.simplifyWaitcnt(Wait);
21022102

2103+
// Since the translation for VMEM addresses occur in-order, we can apply the
2104+
// XCnt if the current instruction is of VMEM type and has a memory
2105+
// dependency with another VMEM instruction in flight.
2106+
if (Wait.XCnt != ~0u && isVmemAccess(MI)) {
2107+
ScoreBrackets.applyWaitcnt(X_CNT, Wait.XCnt);
2108+
Wait.XCnt = ~0u;
2109+
}
2110+
21032111
// When forcing emit, we need to skip terminators because that would break the
21042112
// terminators of the MBB if we emit a waitcnt between terminators.
21052113
if (ForceEmitZeroFlag && !MI.isTerminator())
@@ -2168,13 +2176,6 @@ bool SIInsertWaitcnts::generateWaitcnt(AMDGPU::Waitcnt Wait,
21682176
<< "Update Instr: " << *It);
21692177
}
21702178

2171-
// Since the translation for VMEM addresses occur in-order, we can skip the
2172-
// XCnt if the current instruction is of VMEM type and has a memory
2173-
// dependency with another VMEM instruction in flight.
2174-
if (Wait.XCnt != ~0u && isVmemAccess(*It)) {
2175-
Wait.XCnt = ~0u;
2176-
}
2177-
21782179
if (WCG->createNewWaitcnt(Block, It, Wait))
21792180
Modified = true;
21802181

0 commit comments

Comments
 (0)