@@ -12,9 +12,7 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
1212; GFX8V4: ; %bb.0:
1313; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
1414; GFX8V4-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x40
15- ; GFX8V4-NEXT: s_add_i32 s12, s12, s17
16- ; GFX8V4-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
17- ; GFX8V4-NEXT: s_mov_b32 flat_scratch_lo, s13
15+ ; GFX8V4-NEXT: v_mov_b32_e32 v2, 1
1816; GFX8V4-NEXT: s_waitcnt lgkmcnt(0)
1917; GFX8V4-NEXT: s_mov_b32 s4, s0
2018; GFX8V4-NEXT: s_mov_b32 s5, s3
@@ -25,7 +23,6 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
2523; GFX8V4-NEXT: s_cmp_lg_u32 s1, -1
2624; GFX8V4-NEXT: v_mov_b32_e32 v0, s4
2725; GFX8V4-NEXT: s_cselect_b64 s[0:1], s[6:7], 0
28- ; GFX8V4-NEXT: v_mov_b32_e32 v2, 1
2926; GFX8V4-NEXT: v_mov_b32_e32 v1, s5
3027; GFX8V4-NEXT: flat_store_dword v[0:1], v2
3128; GFX8V4-NEXT: s_waitcnt vmcnt(0)
@@ -40,9 +37,7 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
4037; GFX8V5: ; %bb.0:
4138; GFX8V5-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
4239; GFX8V5-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0xc8
43- ; GFX8V5-NEXT: s_add_i32 s10, s10, s15
44- ; GFX8V5-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8
45- ; GFX8V5-NEXT: s_mov_b32 flat_scratch_lo, s11
40+ ; GFX8V5-NEXT: v_mov_b32_e32 v2, 1
4641; GFX8V5-NEXT: s_waitcnt lgkmcnt(0)
4742; GFX8V5-NEXT: s_mov_b32 s4, s0
4843; GFX8V5-NEXT: s_mov_b32 s5, s2
@@ -52,7 +47,6 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
5247; GFX8V5-NEXT: s_cmp_lg_u32 s1, -1
5348; GFX8V5-NEXT: v_mov_b32_e32 v0, s4
5449; GFX8V5-NEXT: s_cselect_b64 s[0:1], s[2:3], 0
55- ; GFX8V5-NEXT: v_mov_b32_e32 v2, 1
5650; GFX8V5-NEXT: v_mov_b32_e32 v1, s5
5751; GFX8V5-NEXT: flat_store_dword v[0:1], v2
5852; GFX8V5-NEXT: s_waitcnt vmcnt(0)
@@ -66,10 +60,9 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
6660; GFX9V4-LABEL: addrspacecast:
6761; GFX9V4: ; %bb.0:
6862; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
69- ; GFX9V4-NEXT: s_add_u32 flat_scratch_lo, s12, s17
70- ; GFX9V4-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
7163; GFX9V4-NEXT: s_mov_b64 s[2:3], src_private_base
7264; GFX9V4-NEXT: s_mov_b64 s[4:5], src_shared_base
65+ ; GFX9V4-NEXT: v_mov_b32_e32 v2, 1
7366; GFX9V4-NEXT: s_waitcnt lgkmcnt(0)
7467; GFX9V4-NEXT: s_mov_b32 s2, s0
7568; GFX9V4-NEXT: s_cmp_lg_u32 s0, -1
@@ -78,7 +71,6 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
7871; GFX9V4-NEXT: s_cmp_lg_u32 s1, -1
7972; GFX9V4-NEXT: v_mov_b32_e32 v0, s2
8073; GFX9V4-NEXT: s_cselect_b64 s[0:1], s[4:5], 0
81- ; GFX9V4-NEXT: v_mov_b32_e32 v2, 1
8274; GFX9V4-NEXT: v_mov_b32_e32 v1, s3
8375; GFX9V4-NEXT: flat_store_dword v[0:1], v2
8476; GFX9V4-NEXT: s_waitcnt vmcnt(0)
@@ -92,10 +84,9 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
9284; GFX9V5-LABEL: addrspacecast:
9385; GFX9V5: ; %bb.0:
9486; GFX9V5-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
95- ; GFX9V5-NEXT: s_add_u32 flat_scratch_lo, s10, s15
96- ; GFX9V5-NEXT: s_addc_u32 flat_scratch_hi, s11, 0
9787; GFX9V5-NEXT: s_mov_b64 s[2:3], src_private_base
9888; GFX9V5-NEXT: s_mov_b64 s[4:5], src_shared_base
89+ ; GFX9V5-NEXT: v_mov_b32_e32 v2, 1
9990; GFX9V5-NEXT: s_waitcnt lgkmcnt(0)
10091; GFX9V5-NEXT: s_mov_b32 s2, s0
10192; GFX9V5-NEXT: s_cmp_lg_u32 s0, -1
@@ -104,7 +95,6 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
10495; GFX9V5-NEXT: s_cmp_lg_u32 s1, -1
10596; GFX9V5-NEXT: v_mov_b32_e32 v0, s2
10697; GFX9V5-NEXT: s_cselect_b64 s[0:1], s[4:5], 0
107- ; GFX9V5-NEXT: v_mov_b32_e32 v2, 1
10898; GFX9V5-NEXT: v_mov_b32_e32 v1, s3
10999; GFX9V5-NEXT: flat_store_dword v[0:1], v2
110100; GFX9V5-NEXT: s_waitcnt vmcnt(0)
@@ -127,9 +117,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) {
127117; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
128118; GFX8V4-NEXT: s_waitcnt lgkmcnt(0)
129119; GFX8V4-NEXT: s_load_dword s0, s[6:7], 0x40
130- ; GFX8V4-NEXT: s_add_i32 s12, s12, s17
131- ; GFX8V4-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
132- ; GFX8V4-NEXT: s_mov_b32 flat_scratch_lo, s13
133120; GFX8V4-NEXT: s_waitcnt lgkmcnt(0)
134121; GFX8V4-NEXT: s_cmp_eq_u32 s1, s0
135122; GFX8V4-NEXT: s_cselect_b32 s0, 1, 0
@@ -143,9 +130,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) {
143130; GFX8V5-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
144131; GFX8V5-NEXT: s_waitcnt lgkmcnt(0)
145132; GFX8V5-NEXT: s_load_dword s0, s[6:7], 0xcc
146- ; GFX8V5-NEXT: s_add_i32 s10, s10, s15
147- ; GFX8V5-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8
148- ; GFX8V5-NEXT: s_mov_b32 flat_scratch_lo, s11
149133; GFX8V5-NEXT: s_waitcnt lgkmcnt(0)
150134; GFX8V5-NEXT: s_cmp_eq_u32 s1, s0
151135; GFX8V5-NEXT: s_cselect_b32 s0, 1, 0
@@ -189,9 +173,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) {
189173; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
190174; GFX8V4-NEXT: s_waitcnt lgkmcnt(0)
191175; GFX8V4-NEXT: s_load_dword s0, s[6:7], 0x44
192- ; GFX8V4-NEXT: s_add_i32 s12, s12, s17
193- ; GFX8V4-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
194- ; GFX8V4-NEXT: s_mov_b32 flat_scratch_lo, s13
195176; GFX8V4-NEXT: s_waitcnt lgkmcnt(0)
196177; GFX8V4-NEXT: s_cmp_eq_u32 s1, s0
197178; GFX8V4-NEXT: s_cselect_b32 s0, 1, 0
@@ -205,9 +186,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) {
205186; GFX8V5-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
206187; GFX8V5-NEXT: s_waitcnt lgkmcnt(0)
207188; GFX8V5-NEXT: s_load_dword s0, s[6:7], 0xc8
208- ; GFX8V5-NEXT: s_add_i32 s10, s10, s15
209- ; GFX8V5-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8
210- ; GFX8V5-NEXT: s_mov_b32 flat_scratch_lo, s11
211189; GFX8V5-NEXT: s_waitcnt lgkmcnt(0)
212190; GFX8V5-NEXT: s_cmp_eq_u32 s1, s0
213191; GFX8V5-NEXT: s_cselect_b32 s0, 1, 0
@@ -291,10 +269,7 @@ define amdgpu_kernel void @llvm_debugtrap() {
291269define amdgpu_kernel void @llvm_amdgcn_queue_ptr (ptr addrspace (1 ) %ptr ) {
292270; GFX8V4-LABEL: llvm_amdgcn_queue_ptr:
293271; GFX8V4: ; %bb.0:
294- ; GFX8V4-NEXT: s_add_i32 s12, s12, s17
295272; GFX8V4-NEXT: v_mov_b32_e32 v0, s6
296- ; GFX8V4-NEXT: s_mov_b32 flat_scratch_lo, s13
297- ; GFX8V4-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8
298273; GFX8V4-NEXT: v_mov_b32_e32 v1, s7
299274; GFX8V4-NEXT: s_add_u32 s0, s8, 8
300275; GFX8V4-NEXT: flat_load_ubyte v0, v[0:1] glc
@@ -320,9 +295,6 @@ define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr addrspace(1) %ptr) {
320295;
321296; GFX8V5-LABEL: llvm_amdgcn_queue_ptr:
322297; GFX8V5: ; %bb.0:
323- ; GFX8V5-NEXT: s_add_i32 s10, s10, s15
324- ; GFX8V5-NEXT: s_mov_b32 flat_scratch_lo, s11
325- ; GFX8V5-NEXT: s_lshr_b32 flat_scratch_hi, s10, 8
326298; GFX8V5-NEXT: s_add_u32 s0, s6, 8
327299; GFX8V5-NEXT: flat_load_ubyte v0, v[0:1] glc
328300; GFX8V5-NEXT: s_addc_u32 s1, s7, 0
0 commit comments