Skip to content

Commit 1a77571

Browse files
committed
Comments
1 parent 96be4e9 commit 1a77571

File tree

2 files changed

+7
-3
lines changed

2 files changed

+7
-3
lines changed

llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -792,6 +792,8 @@ class WaitcntBrackets {
792792
auto STy = getSgprScoresIdx(T);
793793
for (MCRegUnit RU : regunits(Reg))
794794
SGPRs[RU].Scores[STy] = Val;
795+
} else {
796+
llvm_unreachable("Register cannot be tracked/unknown register!");
795797
}
796798
}
797799

@@ -815,7 +817,7 @@ class WaitcntBrackets {
815817
// The score tracking logic is fragmented as follows:
816818
// - VMem: VGPR RegUnits and LDS DMA IDs, see the VMEMID encoding.
817819
// - SGPRs: SGPR RegUnits
818-
// - SCC
820+
// - SCC: Non-allocatable and not general purpose: not a SGPR.
819821
//
820822
// For the VMem case, if the key is within the range of LDS DMA IDs,
821823
// then the corresponding index into the `LDSDMAStores` vector below is:

llvm/test/CodeGen/AMDGPU/lds-dma-waits.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -155,8 +155,6 @@ main_body:
155155
ret void
156156
}
157157

158-
; There are 8 pseudo registers defined to track LDS DMA dependencies.
159-
160158
define amdgpu_kernel void @buffer_load_lds_dword_10_arrays(<4 x i32> %rsrc, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, ptr addrspace(1) %out) {
161159
; GFX9-LABEL: buffer_load_lds_dword_10_arrays:
162160
; GFX9: ; %bb.0: ; %main_body
@@ -222,8 +220,10 @@ define amdgpu_kernel void @buffer_load_lds_dword_10_arrays(<4 x i32> %rsrc, i32
222220
; GFX9-NEXT: s_waitcnt vmcnt(2)
223221
; GFX9-NEXT: ds_read_b32 v7, v9 offset:1792
224222
; GFX9-NEXT: ; wave barrier
223+
; GFX9-NEXT: s_waitcnt vmcnt(1)
225224
; GFX9-NEXT: ds_read_b32 v8, v9 offset:2048
226225
; GFX9-NEXT: ; wave barrier
226+
; GFX9-NEXT: s_waitcnt vmcnt(0)
227227
; GFX9-NEXT: ds_read_b32 v9, v9 offset:2304
228228
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
229229
; GFX9-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
@@ -287,8 +287,10 @@ define amdgpu_kernel void @buffer_load_lds_dword_10_arrays(<4 x i32> %rsrc, i32
287287
; GFX10-NEXT: s_waitcnt vmcnt(2)
288288
; GFX10-NEXT: ds_read_b32 v7, v9 offset:1792
289289
; GFX10-NEXT: ; wave barrier
290+
; GFX10-NEXT: s_waitcnt vmcnt(1)
290291
; GFX10-NEXT: ds_read_b32 v8, v9 offset:2048
291292
; GFX10-NEXT: ; wave barrier
293+
; GFX10-NEXT: s_waitcnt vmcnt(0)
292294
; GFX10-NEXT: ds_read_b32 v9, v9 offset:2304
293295
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
294296
; GFX10-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]

0 commit comments

Comments
 (0)