Skip to content

Commit a1c8750

Browse files
jayfoadgit-crd
authored andcommitted
[AMDGPU] Fix missing S_WAIT_XCNT with multiple pending VMEMs (llvm#166779)
1 parent 309b078 commit a1c8750

File tree

2 files changed

+11
-17
lines changed

2 files changed

+11
-17
lines changed

llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1291,21 +1291,15 @@ void WaitcntBrackets::applyXcnt(const AMDGPU::Waitcnt &Wait) {
12911291
// On entry to a block with multiple predescessors, there may
12921292
// be pending SMEM and VMEM events active at the same time.
12931293
// In such cases, only clear one active event at a time.
1294-
auto applyPendingXcntGroup = [this](unsigned E) {
1295-
unsigned LowerBound = getScoreLB(X_CNT);
1296-
applyWaitcnt(X_CNT, 0);
1297-
PendingEvents |= (1 << E);
1298-
setScoreLB(X_CNT, LowerBound);
1299-
};
13001294

13011295
// Wait on XCNT is redundant if we are already waiting for a load to complete.
13021296
// SMEM can return out of order, so only omit XCNT wait if we are waiting till
13031297
// zero.
13041298
if (Wait.KmCnt == 0 && hasPendingEvent(SMEM_GROUP)) {
1305-
if (hasPendingEvent(VMEM_GROUP))
1306-
applyPendingXcntGroup(VMEM_GROUP);
1307-
else
1299+
if (!hasMixedPendingEvents(X_CNT))
13081300
applyWaitcnt(X_CNT, 0);
1301+
else
1302+
PendingEvents &= ~(1 << SMEM_GROUP);
13091303
return;
13101304
}
13111305

@@ -1314,10 +1308,10 @@ void WaitcntBrackets::applyXcnt(const AMDGPU::Waitcnt &Wait) {
13141308
// decremented to the same number as LOADCnt.
13151309
if (Wait.LoadCnt != ~0u && hasPendingEvent(VMEM_GROUP) &&
13161310
!hasPendingEvent(STORE_CNT)) {
1317-
if (hasPendingEvent(SMEM_GROUP))
1318-
applyPendingXcntGroup(SMEM_GROUP);
1319-
else
1311+
if (!hasMixedPendingEvents(X_CNT))
13201312
applyWaitcnt(X_CNT, std::min(Wait.XCnt, Wait.LoadCnt));
1313+
else if (Wait.LoadCnt == 0)
1314+
PendingEvents &= ~(1 << VMEM_GROUP);
13211315
return;
13221316
}
13231317

llvm/test/CodeGen/AMDGPU/wait-xcnt.mir

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1069,7 +1069,6 @@ body: |
10691069
$sgpr0 = S_MOV_B32 $sgpr0
10701070
...
10711071

1072-
# FIXME: Missing S_WAIT_XCNT before overwriting vgpr0.
10731072
---
10741073
name: mixed_pending_events
10751074
tracksRegLiveness: true
@@ -1088,8 +1087,8 @@ body: |
10881087
; GCN-NEXT: successors: %bb.2(0x80000000)
10891088
; GCN-NEXT: liveins: $vgpr0_vgpr1, $sgpr2
10901089
; GCN-NEXT: {{ $}}
1091-
; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
1092-
; GCN-NEXT: $vgpr3 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
1090+
; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 100, 0, implicit $exec
1091+
; GCN-NEXT: $vgpr3 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 200, 0, implicit $exec
10931092
; GCN-NEXT: {{ $}}
10941093
; GCN-NEXT: bb.2:
10951094
; GCN-NEXT: liveins: $sgpr2, $vgpr2
@@ -1098,15 +1097,16 @@ body: |
10981097
; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr2, implicit $exec
10991098
; GCN-NEXT: S_WAIT_KMCNT 0
11001099
; GCN-NEXT: $sgpr2 = S_MOV_B32 $sgpr2
1100+
; GCN-NEXT: S_WAIT_XCNT 0
11011101
; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
11021102
bb.0:
11031103
liveins: $vgpr0_vgpr1, $sgpr0_sgpr1, $scc
11041104
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
11051105
S_CBRANCH_SCC1 %bb.2, implicit $scc
11061106
bb.1:
11071107
liveins: $vgpr0_vgpr1, $sgpr2
1108-
$vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
1109-
$vgpr3 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec
1108+
$vgpr2 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 100, 0, implicit $exec
1109+
$vgpr3 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 200, 0, implicit $exec
11101110
bb.2:
11111111
liveins: $sgpr2, $vgpr2
11121112
$vgpr2 = V_MOV_B32_e32 $vgpr2, implicit $exec

0 commit comments

Comments
 (0)