@@ -8,28 +8,46 @@ define amdgpu_kernel void @test_waitcnt(ptr addrspace(1) %global_buffer, ptr add
88; This test checks if SIInsertWaitcnts pass inserts S_WAITCNT VMCNT(0) before DS_READ
99; CHECK-LABEL: test_waitcnt:
1010; CHECK: ; %bb.0: ; %entry
11- ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
12- ; CHECK-NEXT: v_mov_b32_e32 v0, 0
11+ ; CHECK-NEXT: s_load_dwordx4 s[36:39], s[4:5], 0x0
12+ ; CHECK-NEXT: s_mov_b32 s12, s8
13+ ; CHECK-NEXT: s_mov_b32 s13, s9
14+ ; CHECK-NEXT: s_mov_b32 s14, s10
15+ ; CHECK-NEXT: s_mov_b64 s[10:11], s[6:7]
1316; CHECK-NEXT: s_waitcnt lgkmcnt(0)
14- ; CHECK-NEXT: s_add_u32 s4, s0, 64
15- ; CHECK-NEXT: s_addc_u32 s5, s1, 0
16- ; CHECK-NEXT: s_mov_b32 m0, s2
17- ; CHECK-NEXT: s_nop 0
18- ; CHECK-NEXT: global_load_lds_dword v0, s[4:5] offset:4
19- ; CHECK-NEXT: s_load_dword s4, s[0:1], 0x0
17+ ; CHECK-NEXT: s_add_u32 s15, s36, 64
18+ ; CHECK-NEXT: s_addc_u32 s18, s37, 0
19+ ; CHECK-NEXT: s_add_u32 s8, s4, 16
20+ ; CHECK-NEXT: s_addc_u32 s9, s5, 0
21+ ; CHECK-NEXT: s_load_dword s6, s[36:37], 0x0
22+ ; CHECK-NEXT: s_getpc_b64 s[4:5]
23+ ; CHECK-NEXT: s_add_u32 s4, s4, llvm.amdgcn.load.to.lds@gotpcrel32@lo+4
24+ ; CHECK-NEXT: s_addc_u32 s5, s5, llvm.amdgcn.load.to.lds@gotpcrel32@hi+12
25+ ; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
26+ ; CHECK-NEXT: v_mov_b32_e32 v40, 0
27+ ; CHECK-NEXT: s_mov_b64 s[4:5], s[0:1]
2028; CHECK-NEXT: s_waitcnt lgkmcnt(0)
21- ; CHECK-NEXT: v_mov_b32_e32 v3, s4
22- ; CHECK-NEXT: global_store_dword v0, v3, s[0:1] offset:64
29+ ; CHECK-NEXT: v_mov_b32_e32 v41, s6
30+ ; CHECK-NEXT: s_mov_b64 s[6:7], s[2:3]
31+ ; CHECK-NEXT: v_mov_b32_e32 v31, v0
32+ ; CHECK-NEXT: v_mov_b32_e32 v0, s15
33+ ; CHECK-NEXT: v_mov_b32_e32 v1, s18
34+ ; CHECK-NEXT: v_mov_b32_e32 v2, s38
35+ ; CHECK-NEXT: v_mov_b32_e32 v3, 4
36+ ; CHECK-NEXT: v_mov_b32_e32 v4, 4
37+ ; CHECK-NEXT: v_mov_b32_e32 v5, 0
38+ ; CHECK-NEXT: s_mov_b32 s32, 0
39+ ; CHECK-NEXT: global_store_dword v40, v41, s[36:37] offset:64
40+ ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
2341; CHECK-NEXT: ; sched_barrier mask(0x00000000)
24- ; CHECK-NEXT: v_mov_b32_e32 v1, s2
25- ; CHECK-NEXT: v_mov_b32_e32 v2, s3
26- ; CHECK-NEXT: ds_write_b32 v1, v3
27- ; CHECK-NEXT: ds_write_b32 v2, v3
42+ ; CHECK-NEXT: v_mov_b32_e32 v0, s38
43+ ; CHECK-NEXT: v_mov_b32_e32 v1, s39
44+ ; CHECK-NEXT: ds_write_b32 v0, v41
45+ ; CHECK-NEXT: ds_write_b32 v1, v41
2846; CHECK-NEXT: ; sched_barrier mask(0x00000000)
29- ; CHECK-NEXT: ds_read_b32 v1, v1
47+ ; CHECK-NEXT: ds_read_b32 v0, v0
3048; CHECK-NEXT: s_waitcnt lgkmcnt(0)
31- ; CHECK-NEXT: global_store_dword v0, v1 , s[0:1 ] offset:16
32- ; CHECK-NEXT: global_store_dword v0, v3 , s[0:1 ] offset:32
49+ ; CHECK-NEXT: global_store_dword v40, v0 , s[36:37 ] offset:16
50+ ; CHECK-NEXT: global_store_dword v40, v41 , s[36:37 ] offset:32
3351; CHECK-NEXT: s_endpgm
3452entry:
3553 ; VMEM accesses with alias.scope
0 commit comments