@@ -20,8 +20,6 @@ define amdgpu_ps float @readanylane_to_physical_vgpr(ptr addrspace(1) inreg %ptr
2020; CHECK-NEXT: v_mov_b32_e32 v0, 0
2121; CHECK-NEXT: global_load_dword v0, v0, s[0:1] glc dlc
2222; CHECK-NEXT: s_waitcnt vmcnt(0)
23- ; CHECK-NEXT: v_readfirstlane_b32 s0, v0
24- ; CHECK-NEXT: v_mov_b32_e32 v0, s0
2523; CHECK-NEXT: ; return to shader part epilog
2624 %load = load volatile float , ptr addrspace (1 ) %ptr
2725 ret float %load
@@ -33,8 +31,6 @@ define amdgpu_ps void @readanylane_to_bitcast_to_virtual_vgpr(ptr addrspace(1) i
3331; CHECK-NEXT: v_mov_b32_e32 v0, 0
3432; CHECK-NEXT: global_load_dword v1, v0, s[0:1] glc dlc
3533; CHECK-NEXT: s_waitcnt vmcnt(0)
36- ; CHECK-NEXT: v_readfirstlane_b32 s0, v1
37- ; CHECK-NEXT: v_mov_b32_e32 v1, s0
3834; CHECK-NEXT: global_store_dword v0, v1, s[2:3]
3935; CHECK-NEXT: s_endpgm
4036 %load = load volatile <2 x i16 >, ptr addrspace (1 ) %ptr0
@@ -49,8 +45,6 @@ define amdgpu_ps float @readanylane_to_bitcast_to_physical_vgpr(ptr addrspace(1)
4945; CHECK-NEXT: v_mov_b32_e32 v0, 0
5046; CHECK-NEXT: global_load_dword v0, v0, s[0:1] glc dlc
5147; CHECK-NEXT: s_waitcnt vmcnt(0)
52- ; CHECK-NEXT: v_readfirstlane_b32 s0, v0
53- ; CHECK-NEXT: v_mov_b32_e32 v0, s0
5448; CHECK-NEXT: ; return to shader part epilog
5549 %load = load volatile <2 x i16 >, ptr addrspace (1 ) %ptr0
5650 %bitcast = bitcast <2 x i16 > %load to float
@@ -63,10 +57,6 @@ define amdgpu_ps void @unmerge_readanylane_merge_to_virtual_vgpr(ptr addrspace(1
6357; CHECK-NEXT: v_mov_b32_e32 v2, 0
6458; CHECK-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] glc dlc
6559; CHECK-NEXT: s_waitcnt vmcnt(0)
66- ; CHECK-NEXT: v_readfirstlane_b32 s0, v0
67- ; CHECK-NEXT: v_readfirstlane_b32 s1, v1
68- ; CHECK-NEXT: v_mov_b32_e32 v0, s0
69- ; CHECK-NEXT: v_mov_b32_e32 v1, s1
7060; CHECK-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
7161; CHECK-NEXT: s_endpgm
7262 %load = load volatile i64 , ptr addrspace (1 ) %ptr0
@@ -85,10 +75,6 @@ define amdgpu_ps void @unmerge_readanylane_merge_bitcast_to_virtual_vgpr(ptr add
8575; CHECK-NEXT: v_mov_b32_e32 v2, 0
8676; CHECK-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] glc dlc
8777; CHECK-NEXT: s_waitcnt vmcnt(0)
88- ; CHECK-NEXT: v_readfirstlane_b32 s0, v0
89- ; CHECK-NEXT: v_readfirstlane_b32 s1, v1
90- ; CHECK-NEXT: v_mov_b32_e32 v0, s0
91- ; CHECK-NEXT: v_mov_b32_e32 v1, s1
9278; CHECK-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
9379; CHECK-NEXT: s_endpgm
9480 %load = load volatile <2 x i32 >, ptr addrspace (1 ) %ptr0
@@ -109,9 +95,7 @@ define amdgpu_ps void @unmerge_readanylane_merge_extract_to_virtual_vgpr(ptr add
10995; CHECK-NEXT: v_mov_b32_e32 v2, 0
11096; CHECK-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] glc dlc
11197; CHECK-NEXT: s_waitcnt vmcnt(0)
112- ; CHECK-NEXT: v_readfirstlane_b32 s0, v1
113- ; CHECK-NEXT: v_mov_b32_e32 v0, s0
114- ; CHECK-NEXT: global_store_dword v2, v0, s[2:3]
98+ ; CHECK-NEXT: global_store_dword v2, v1, s[2:3]
11599; CHECK-NEXT: s_endpgm
116100 %load = load volatile <2 x i32 >, ptr addrspace (1 ) %ptr0
117101 %extracted = extractelement <2 x i32 > %load , i32 1
@@ -125,8 +109,7 @@ define amdgpu_ps float @unmerge_readanylane_merge_extract_to_physical_vgpr(ptr a
125109; CHECK-NEXT: v_mov_b32_e32 v0, 0
126110; CHECK-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1] glc dlc
127111; CHECK-NEXT: s_waitcnt vmcnt(0)
128- ; CHECK-NEXT: v_readfirstlane_b32 s0, v1
129- ; CHECK-NEXT: v_mov_b32_e32 v0, s0
112+ ; CHECK-NEXT: v_mov_b32_e32 v0, v1
130113; CHECK-NEXT: ; return to shader part epilog
131114 %load = load volatile <2 x float >, ptr addrspace (1 ) %ptr0
132115 %extracted = extractelement <2 x float > %load , i32 1
@@ -139,8 +122,6 @@ define amdgpu_ps void @unmerge_readanylane_merge_extract_bitcast_to_virtual_vgpr
139122; CHECK-NEXT: v_mov_b32_e32 v2, 0
140123; CHECK-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] glc dlc
141124; CHECK-NEXT: s_waitcnt vmcnt(0)
142- ; CHECK-NEXT: v_readfirstlane_b32 s0, v0
143- ; CHECK-NEXT: v_mov_b32_e32 v0, s0
144125; CHECK-NEXT: global_store_dword v2, v0, s[2:3]
145126; CHECK-NEXT: s_endpgm
146127 %load = load volatile <4 x i16 >, ptr addrspace (1 ) %ptr0
@@ -156,8 +137,6 @@ define amdgpu_ps float @unmerge_readanylane_merge_extract_bitcast_to_physical_vg
156137; CHECK-NEXT: v_mov_b32_e32 v0, 0
157138; CHECK-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1] glc dlc
158139; CHECK-NEXT: s_waitcnt vmcnt(0)
159- ; CHECK-NEXT: v_readfirstlane_b32 s0, v0
160- ; CHECK-NEXT: v_mov_b32_e32 v0, s0
161140; CHECK-NEXT: ; return to shader part epilog
162141 %load = load volatile <4 x i16 >, ptr addrspace (1 ) %ptr0
163142 %extracted = shufflevector <4 x i16 > %load , <4 x i16 > %load , <2 x i32 > <i32 0 , i32 1 >
0 commit comments