Skip to content

Commit 4b41adf

Browse files
committed
Comments
1 parent 960c900 commit 4b41adf

File tree

2 files changed

+5
-3
lines changed

2 files changed

+5
-3
lines changed

llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -785,6 +785,8 @@ class WaitcntBrackets {
785785
auto STy = getSgprScoresIdx(T);
786786
for (MCRegUnit RU : regunits(Reg))
787787
SGPRs[RU].Scores[STy] = Val;
788+
} else {
789+
llvm_unreachable("Register cannot be tracked/unknown register!");
788790
}
789791
}
790792

@@ -808,7 +810,7 @@ class WaitcntBrackets {
808810
// The score tracking logic is fragmented as follows:
809811
// - VMem: VGPR RegUnits and LDS DMA IDs, see the VMEMID encoding.
810812
// - SGPRs: SGPR RegUnits
811-
// - SCC
813+
// - SCC: Non-allocatable and not general purpose: not a SGPR.
812814
//
813815
// For the VMem case, if the key is within the range of LDS DMA IDs,
814816
// then the corresponding index into the `LDSDMAStores` vector below is:

llvm/test/CodeGen/AMDGPU/lds-dma-waits.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,8 +156,6 @@ main_body:
156156
ret void
157157
}
158158

159-
; There are 8 pseudo registers defined to track LDS DMA dependencies.
160-
161159
define amdgpu_kernel void @buffer_load_lds_dword_10_arrays(<4 x i32> %rsrc, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, ptr addrspace(1) %out) {
162160
; GFX9-LABEL: buffer_load_lds_dword_10_arrays:
163161
; GFX9: ; %bb.0: ; %main_body
@@ -226,6 +224,7 @@ define amdgpu_kernel void @buffer_load_lds_dword_10_arrays(<4 x i32> %rsrc, i32
226224
; GFX9-NEXT: s_waitcnt vmcnt(0)
227225
; GFX9-NEXT: ds_read_b32 v8, v9 offset:2048
228226
; GFX9-NEXT: ; wave barrier
227+
; GFX9-NEXT: s_waitcnt vmcnt(0)
229228
; GFX9-NEXT: ds_read_b32 v9, v9 offset:2304
230229
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
231230
; GFX9-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
@@ -292,6 +291,7 @@ define amdgpu_kernel void @buffer_load_lds_dword_10_arrays(<4 x i32> %rsrc, i32
292291
; GFX10-NEXT: s_waitcnt vmcnt(0)
293292
; GFX10-NEXT: ds_read_b32 v8, v9 offset:2048
294293
; GFX10-NEXT: ; wave barrier
294+
; GFX10-NEXT: s_waitcnt vmcnt(0)
295295
; GFX10-NEXT: ds_read_b32 v9, v9 offset:2304
296296
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
297297
; GFX10-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]

0 commit comments

Comments
 (0)