Skip to content

Commit c1b5840

Browse files
committed
Rebase
1 parent c72c865 commit c1b5840

File tree

2 files changed

+4
-3
lines changed

2 files changed

+4
-3
lines changed

llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1092,12 +1092,13 @@ void WaitcntBrackets::updateByEvent(WaitEventType E, MachineInstr &Inst) {
10921092
}
10931093
}
10941094
}
1095-
if (Slot || LDSDMAStores.size() == NUM_LDSDMA - 1)
1095+
if (Slot)
10961096
break;
10971097
// The slot may not be valid because it can be >= NUM_LDSDMA which
10981098
// means the scoreboard cannot track it. We still want to preserve the
10991099
// MI in order to check alias information, though.
11001100
LDSDMAStores.push_back(&Inst);
1101+
Slot = LDSDMAStores.size();
11011102
break;
11021103
}
11031104
setVMemScore(LDSDMA_BEGIN, T, CurrScore);

llvm/test/CodeGen/AMDGPU/lds-dma-waits.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ define amdgpu_kernel void @buffer_load_lds_dword_10_arrays(<4 x i32> %rsrc, i32
221221
; GFX9-NEXT: s_waitcnt vmcnt(2)
222222
; GFX9-NEXT: ds_read_b32 v7, v9 offset:1792
223223
; GFX9-NEXT: ; wave barrier
224-
; GFX9-NEXT: s_waitcnt vmcnt(0)
224+
; GFX9-NEXT: s_waitcnt vmcnt(1)
225225
; GFX9-NEXT: ds_read_b32 v8, v9 offset:2048
226226
; GFX9-NEXT: ; wave barrier
227227
; GFX9-NEXT: s_waitcnt vmcnt(0)
@@ -288,7 +288,7 @@ define amdgpu_kernel void @buffer_load_lds_dword_10_arrays(<4 x i32> %rsrc, i32
288288
; GFX10-NEXT: s_waitcnt vmcnt(2)
289289
; GFX10-NEXT: ds_read_b32 v7, v9 offset:1792
290290
; GFX10-NEXT: ; wave barrier
291-
; GFX10-NEXT: s_waitcnt vmcnt(0)
291+
; GFX10-NEXT: s_waitcnt vmcnt(1)
292292
; GFX10-NEXT: ds_read_b32 v8, v9 offset:2048
293293
; GFX10-NEXT: ; wave barrier
294294
; GFX10-NEXT: s_waitcnt vmcnt(0)

0 commit comments

Comments
 (0)