1010define amdgpu_kernel void @addrspacecast (ptr addrspace (5 ) %ptr.private , ptr addrspace (3 ) %ptr.local ) {
1111; GFX8V4-LABEL: addrspacecast:
1212; GFX8V4: ; %bb.0:
13- ; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
14- ; GFX8V4-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x40
15- ; GFX8V4-NEXT: v_mov_b32_e32 v2, 1
13+ ; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
14+ ; GFX8V4-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x40
15+ ; GFX8V4-NEXT: s_add_i32 s8, s8, s11
16+ ; GFX8V4-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8
17+ ; GFX8V4-NEXT: s_mov_b32 flat_scratch_lo, s9
1618; GFX8V4-NEXT: s_waitcnt lgkmcnt(0)
1719; GFX8V4-NEXT: s_mov_b32 s4, s0
1820; GFX8V4-NEXT: s_mov_b32 s5, s3
@@ -23,6 +25,7 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
2325; GFX8V4-NEXT: s_cmp_lg_u32 s1, -1
2426; GFX8V4-NEXT: v_mov_b32_e32 v0, s4
2527; GFX8V4-NEXT: s_cselect_b64 s[0:1], s[6:7], 0
28+ ; GFX8V4-NEXT: v_mov_b32_e32 v2, 1
2629; GFX8V4-NEXT: v_mov_b32_e32 v1, s5
2730; GFX8V4-NEXT: flat_store_dword v[0:1], v2
2831; GFX8V4-NEXT: s_waitcnt vmcnt(0)
@@ -35,9 +38,11 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
3538;
3639; GFX8V5-LABEL: addrspacecast:
3740; GFX8V5: ; %bb.0:
38- ; GFX8V5-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
39- ; GFX8V5-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0xc8
40- ; GFX8V5-NEXT: v_mov_b32_e32 v2, 1
41+ ; GFX8V5-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
42+ ; GFX8V5-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0xc8
43+ ; GFX8V5-NEXT: s_add_i32 s6, s6, s9
44+ ; GFX8V5-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8
45+ ; GFX8V5-NEXT: s_mov_b32 flat_scratch_lo, s7
4146; GFX8V5-NEXT: s_waitcnt lgkmcnt(0)
4247; GFX8V5-NEXT: s_mov_b32 s4, s0
4348; GFX8V5-NEXT: s_mov_b32 s5, s2
@@ -47,6 +52,7 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
4752; GFX8V5-NEXT: s_cmp_lg_u32 s1, -1
4853; GFX8V5-NEXT: v_mov_b32_e32 v0, s4
4954; GFX8V5-NEXT: s_cselect_b64 s[0:1], s[2:3], 0
55+ ; GFX8V5-NEXT: v_mov_b32_e32 v2, 1
5056; GFX8V5-NEXT: v_mov_b32_e32 v1, s5
5157; GFX8V5-NEXT: flat_store_dword v[0:1], v2
5258; GFX8V5-NEXT: s_waitcnt vmcnt(0)
@@ -59,10 +65,11 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
5965;
6066; GFX9V4-LABEL: addrspacecast:
6167; GFX9V4: ; %bb.0:
62- ; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
68+ ; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
69+ ; GFX9V4-NEXT: s_add_u32 flat_scratch_lo, s6, s9
70+ ; GFX9V4-NEXT: s_addc_u32 flat_scratch_hi, s7, 0
6371; GFX9V4-NEXT: s_mov_b64 s[2:3], src_private_base
6472; GFX9V4-NEXT: s_mov_b64 s[4:5], src_shared_base
65- ; GFX9V4-NEXT: v_mov_b32_e32 v2, 1
6673; GFX9V4-NEXT: s_waitcnt lgkmcnt(0)
6774; GFX9V4-NEXT: s_mov_b32 s2, s0
6875; GFX9V4-NEXT: s_cmp_lg_u32 s0, -1
@@ -71,6 +78,7 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
7178; GFX9V4-NEXT: s_cmp_lg_u32 s1, -1
7279; GFX9V4-NEXT: v_mov_b32_e32 v0, s2
7380; GFX9V4-NEXT: s_cselect_b64 s[0:1], s[4:5], 0
81+ ; GFX9V4-NEXT: v_mov_b32_e32 v2, 1
7482; GFX9V4-NEXT: v_mov_b32_e32 v1, s3
7583; GFX9V4-NEXT: flat_store_dword v[0:1], v2
7684; GFX9V4-NEXT: s_waitcnt vmcnt(0)
@@ -83,10 +91,11 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
8391;
8492; GFX9V5-LABEL: addrspacecast:
8593; GFX9V5: ; %bb.0:
86- ; GFX9V5-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
94+ ; GFX9V5-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
95+ ; GFX9V5-NEXT: s_add_u32 flat_scratch_lo, s6, s9
96+ ; GFX9V5-NEXT: s_addc_u32 flat_scratch_hi, s7, 0
8797; GFX9V5-NEXT: s_mov_b64 s[2:3], src_private_base
8898; GFX9V5-NEXT: s_mov_b64 s[4:5], src_shared_base
89- ; GFX9V5-NEXT: v_mov_b32_e32 v2, 1
9099; GFX9V5-NEXT: s_waitcnt lgkmcnt(0)
91100; GFX9V5-NEXT: s_mov_b32 s2, s0
92101; GFX9V5-NEXT: s_cmp_lg_u32 s0, -1
@@ -95,6 +104,7 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
95104; GFX9V5-NEXT: s_cmp_lg_u32 s1, -1
96105; GFX9V5-NEXT: v_mov_b32_e32 v0, s2
97106; GFX9V5-NEXT: s_cselect_b64 s[0:1], s[4:5], 0
107+ ; GFX9V5-NEXT: v_mov_b32_e32 v2, 1
98108; GFX9V5-NEXT: v_mov_b32_e32 v1, s3
99109; GFX9V5-NEXT: flat_store_dword v[0:1], v2
100110; GFX9V5-NEXT: s_waitcnt vmcnt(0)
0 commit comments