@@ -50,15 +50,22 @@ define amdgpu_kernel void @load_global_v3i64(ptr addrspace(1) %dst, ptr addrspac
5050define amdgpu_kernel void @load_global_v3i64_invariant (ptr addrspace (1 ) %dst , ptr addrspace (1 ) %src ) #0 {
5151; CHECK-LABEL: load_global_v3i64_invariant:
5252; CHECK: ; %bb.0:
53- ; CHECK-NEXT: v_mov_b32_e32 v6 , 0
54- ; CHECK-NEXT: s_load_dwordx2 s[0:1 ], s[8:9], 0x0
55- ; CHECK-NEXT: s_load_dwordx2 s[2:3 ], s[8:9], 0x8
53+ ; CHECK-NEXT: v_mov_b32_e32 v4 , 0
54+ ; CHECK-NEXT: s_load_dwordx2 s[4:5 ], s[8:9], 0x0
55+ ; CHECK-NEXT: s_load_dwordx2 s[6:7 ], s[8:9], 0x8
5656; CHECK-NEXT: s_waitcnt lgkmcnt(0)
57- ; CHECK-NEXT: global_load_dwordx4 v[0:3], v6, s[2:3]
58- ; CHECK-NEXT: global_load_dwordx2 v[4:5], v6, s[2:3] offset:16
59- ; CHECK-NEXT: s_waitcnt vmcnt(0)
60- ; CHECK-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16
61- ; CHECK-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
57+ ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
58+ ; CHECK-NEXT: s_nop 0
59+ ; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x10
60+ ; CHECK-NEXT: s_waitcnt lgkmcnt(0)
61+ ; CHECK-NEXT: v_mov_b32_e32 v0, s6
62+ ; CHECK-NEXT: v_mov_b32_e32 v1, s7
63+ ; CHECK-NEXT: global_store_dwordx2 v4, v[0:1], s[4:5] offset:16
64+ ; CHECK-NEXT: v_mov_b32_e32 v0, s0
65+ ; CHECK-NEXT: v_mov_b32_e32 v1, s1
66+ ; CHECK-NEXT: v_mov_b32_e32 v2, s2
67+ ; CHECK-NEXT: v_mov_b32_e32 v3, s3
68+ ; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5]
6269; CHECK-NEXT: s_endpgm
6370 %ld = load <3 x i64 >, ptr addrspace (1 ) %src , align 32 , !invariant.load !0
6471 store <3 x i64 > %ld , ptr addrspace (1 ) %dst , align 32
0 commit comments