@@ -3851,9 +3851,9 @@ define amdgpu_kernel void @local_zextload_v64i16_to_v64i32(ptr addrspace(3) %out
3851
3851
; VI-DS128-NEXT: v_and_b32_e32 v32, 0xffff, v24
3852
3852
; VI-DS128-NEXT: ds_read_b128 v[24:27], v0 offset:80
3853
3853
; VI-DS128-NEXT: ds_read_b128 v[55:58], v0 offset:96
3854
- ; VI-DS128-NEXT: v_lshrrev_b32_e32 v15 , 16, v11
3854
+ ; VI-DS128-NEXT: v_lshrrev_b32_e32 v31 , 16, v11
3855
3855
; VI-DS128-NEXT: v_lshrrev_b32_e32 v29, 16, v10
3856
- ; VI-DS128-NEXT: v_mov_b32_e32 v31, v15
3856
+ ; VI-DS128-NEXT: v_lshrrev_b32_e32 v15, 16, v9
3857
3857
; VI-DS128-NEXT: s_waitcnt lgkmcnt(1)
3858
3858
; VI-DS128-NEXT: v_lshrrev_b32_e32 v50, 16, v27
3859
3859
; VI-DS128-NEXT: v_lshrrev_b32_e32 v48, 16, v26
@@ -3864,17 +3864,16 @@ define amdgpu_kernel void @local_zextload_v64i16_to_v64i32(ptr addrspace(3) %out
3864
3864
; VI-DS128-NEXT: v_and_b32_e32 v53, 0xffff, v25
3865
3865
; VI-DS128-NEXT: v_and_b32_e32 v51, 0xffff, v24
3866
3866
; VI-DS128-NEXT: ds_read_b128 v[24:27], v0 offset:112
3867
- ; VI-DS128-NEXT: v_lshrrev_b32_e32 v15, 16, v9
3868
3867
; VI-DS128-NEXT: v_lshrrev_b32_e32 v13, 16, v8
3869
3868
; VI-DS128-NEXT: v_and_b32_e32 v30, 0xffff, v11
3870
3869
; VI-DS128-NEXT: v_and_b32_e32 v28, 0xffff, v10
3870
+ ; VI-DS128-NEXT: v_and_b32_e32 v14, 0xffff, v9
3871
3871
; VI-DS128-NEXT: s_waitcnt lgkmcnt(0)
3872
3872
; VI-DS128-NEXT: v_lshrrev_b32_e32 v3, 16, v25
3873
3873
; VI-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v24
3874
3874
; VI-DS128-NEXT: v_and_b32_e32 v2, 0xffff, v25
3875
3875
; VI-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v24
3876
3876
; VI-DS128-NEXT: v_mov_b32_e32 v24, s0
3877
- ; VI-DS128-NEXT: v_and_b32_e32 v14, 0xffff, v9
3878
3877
; VI-DS128-NEXT: v_and_b32_e32 v12, 0xffff, v8
3879
3878
; VI-DS128-NEXT: v_lshrrev_b32_e32 v42, 16, v39
3880
3879
; VI-DS128-NEXT: v_lshrrev_b32_e32 v40, 16, v38
@@ -3944,7 +3943,7 @@ define amdgpu_kernel void @local_zextload_v64i16_to_v64i32(ptr addrspace(3) %out
3944
3943
; GFX9-DS128-NEXT: s_addc_u32 s13, s13, 0
3945
3944
; GFX9-DS128-NEXT: ds_read_b128 v[20:23], v0 offset:32
3946
3945
; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(2)
3947
- ; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v15 , 16, v11
3946
+ ; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v31 , 16, v11
3948
3947
; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(1)
3949
3948
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v4, 16, v19
3950
3949
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v2, 16, v18
@@ -3992,8 +3991,8 @@ define amdgpu_kernel void @local_zextload_v64i16_to_v64i32(ptr addrspace(3) %out
3992
3991
; GFX9-DS128-NEXT: ds_read_b128 v[24:27], v0 offset:80
3993
3992
; GFX9-DS128-NEXT: ds_read_b128 v[55:58], v0 offset:96
3994
3993
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v29, 16, v10
3995
- ; GFX9-DS128-NEXT: v_mov_b32_e32 v31, v15
3996
3994
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v15, 16, v9
3995
+ ; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v13, 16, v8
3997
3996
; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(1)
3998
3997
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v50, 16, v27
3999
3998
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v48, 16, v26
@@ -4004,17 +4003,16 @@ define amdgpu_kernel void @local_zextload_v64i16_to_v64i32(ptr addrspace(3) %out
4004
4003
; GFX9-DS128-NEXT: v_and_b32_e32 v53, 0xffff, v25
4005
4004
; GFX9-DS128-NEXT: v_and_b32_e32 v51, 0xffff, v24
4006
4005
; GFX9-DS128-NEXT: ds_read_b128 v[24:27], v0 offset:112
4007
- ; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v13, 16, v8
4008
4006
; GFX9-DS128-NEXT: v_and_b32_e32 v30, 0xffff, v11
4009
4007
; GFX9-DS128-NEXT: v_and_b32_e32 v28, 0xffff, v10
4010
4008
; GFX9-DS128-NEXT: v_and_b32_e32 v14, 0xffff, v9
4009
+ ; GFX9-DS128-NEXT: v_and_b32_e32 v12, 0xffff, v8
4011
4010
; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0)
4012
4011
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v3, 16, v25
4013
4012
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v1, 16, v24
4014
4013
; GFX9-DS128-NEXT: v_and_b32_e32 v2, 0xffff, v25
4015
4014
; GFX9-DS128-NEXT: v_and_b32_e32 v0, 0xffff, v24
4016
4015
; GFX9-DS128-NEXT: v_mov_b32_e32 v24, s0
4017
- ; GFX9-DS128-NEXT: v_and_b32_e32 v12, 0xffff, v8
4018
4016
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v42, 16, v39
4019
4017
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v40, 16, v38
4020
4018
; GFX9-DS128-NEXT: v_lshrrev_b32_e32 v46, 16, v37
@@ -4890,7 +4888,7 @@ define amdgpu_kernel void @local_sextload_v64i16_to_v64i32(ptr addrspace(3) %out
4890
4888
; VI-DS128-NEXT: s_waitcnt lgkmcnt(2)
4891
4889
; VI-DS128-NEXT: v_ashrrev_i32_e32 v53, 16, v40
4892
4890
; VI-DS128-NEXT: v_bfe_i32 v52, v40, 0, 16
4893
- ; VI-DS128-NEXT: v_ashrrev_i32_e32 v15 , 16, v11
4891
+ ; VI-DS128-NEXT: v_ashrrev_i32_e32 v23 , 16, v11
4894
4892
; VI-DS128-NEXT: s_waitcnt lgkmcnt(1)
4895
4893
; VI-DS128-NEXT: v_ashrrev_i32_e32 v47, 16, v39
4896
4894
; VI-DS128-NEXT: v_ashrrev_i32_e32 v45, 16, v38
@@ -4901,14 +4899,13 @@ define amdgpu_kernel void @local_sextload_v64i16_to_v64i32(ptr addrspace(3) %out
4901
4899
; VI-DS128-NEXT: ds_read_b128 v[37:40], v32 offset:112
4902
4900
; VI-DS128-NEXT: v_mov_b32_e32 v32, s0
4903
4901
; VI-DS128-NEXT: v_ashrrev_i32_e32 v21, 16, v10
4904
- ; VI-DS128-NEXT: v_mov_b32_e32 v23, v15
4905
4902
; VI-DS128-NEXT: v_ashrrev_i32_e32 v15, 16, v9
4903
+ ; VI-DS128-NEXT: v_ashrrev_i32_e32 v13, 16, v8
4906
4904
; VI-DS128-NEXT: s_waitcnt lgkmcnt(0)
4907
4905
; VI-DS128-NEXT: v_ashrrev_i32_e32 v3, 16, v38
4908
4906
; VI-DS128-NEXT: v_ashrrev_i32_e32 v1, 16, v37
4909
4907
; VI-DS128-NEXT: v_bfe_i32 v2, v38, 0, 16
4910
4908
; VI-DS128-NEXT: v_bfe_i32 v0, v37, 0, 16
4911
- ; VI-DS128-NEXT: v_ashrrev_i32_e32 v13, 16, v8
4912
4909
; VI-DS128-NEXT: v_bfe_i32 v22, v11, 0, 16
4913
4910
; VI-DS128-NEXT: v_bfe_i32 v20, v10, 0, 16
4914
4911
; VI-DS128-NEXT: v_bfe_i32 v14, v9, 0, 16
@@ -4986,7 +4983,7 @@ define amdgpu_kernel void @local_sextload_v64i16_to_v64i32(ptr addrspace(3) %out
4986
4983
; GFX9-DS128-NEXT: s_addc_u32 s13, s13, 0
4987
4984
; GFX9-DS128-NEXT: ds_read_b128 v[24:27], v32 offset:32
4988
4985
; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(2)
4989
- ; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v15 , 16, v11
4986
+ ; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v23 , 16, v11
4990
4987
; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(1)
4991
4988
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v3, 16, v19
4992
4989
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v1, 16, v18
@@ -5031,15 +5028,14 @@ define amdgpu_kernel void @local_sextload_v64i16_to_v64i32(ptr addrspace(3) %out
5031
5028
; GFX9-DS128-NEXT: v_bfe_i32 v50, v37, 0, 16
5032
5029
; GFX9-DS128-NEXT: ds_read_b128 v[37:40], v32 offset:112
5033
5030
; GFX9-DS128-NEXT: v_mov_b32_e32 v32, s0
5034
- ; GFX9-DS128-NEXT: v_mov_b32_e32 v23, v15
5035
5031
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v15, 16, v9
5036
5032
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v13, 16, v8
5033
+ ; GFX9-DS128-NEXT: v_bfe_i32 v22, v11, 0, 16
5037
5034
; GFX9-DS128-NEXT: s_waitcnt lgkmcnt(0)
5038
5035
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v3, 16, v38
5039
5036
; GFX9-DS128-NEXT: v_ashrrev_i32_e32 v1, 16, v37
5040
5037
; GFX9-DS128-NEXT: v_bfe_i32 v2, v38, 0, 16
5041
5038
; GFX9-DS128-NEXT: v_bfe_i32 v0, v37, 0, 16
5042
- ; GFX9-DS128-NEXT: v_bfe_i32 v22, v11, 0, 16
5043
5039
; GFX9-DS128-NEXT: v_bfe_i32 v20, v10, 0, 16
5044
5040
; GFX9-DS128-NEXT: v_bfe_i32 v14, v9, 0, 16
5045
5041
; GFX9-DS128-NEXT: v_bfe_i32 v12, v8, 0, 16
0 commit comments