@@ -17,8 +17,9 @@ define amdgpu_kernel void @v_ubfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1
1717; SI-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 glc
1818; SI-NEXT: s_waitcnt vmcnt(0)
1919; SI-NEXT: s_mov_b64 s[2:3], s[6:7]
20- ; SI-NEXT: v_and_b32_e32 v3, 31, v3
21- ; SI-NEXT: v_bfe_u32 v2, v2, 0, v3
20+ ; SI-NEXT: v_sub_i32_e32 v3, vcc, 32, v3
21+ ; SI-NEXT: v_lshlrev_b32_e32 v2, v3, v2
22+ ; SI-NEXT: v_lshrrev_b32_e32 v2, v3, v2
2223; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
2324; SI-NEXT: s_endpgm
2425;
@@ -37,8 +38,9 @@ define amdgpu_kernel void @v_ubfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1
3738; VI-NEXT: v_mov_b32_e32 v1, s1
3839; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2
3940; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
40- ; VI-NEXT: v_and_b32_e32 v2, 31, v4
41- ; VI-NEXT: v_bfe_u32 v2, v3, 0, v2
41+ ; VI-NEXT: v_sub_u32_e32 v2, vcc, 32, v4
42+ ; VI-NEXT: v_lshlrev_b32_e32 v3, v2, v3
43+ ; VI-NEXT: v_lshrrev_b32_e32 v2, v2, v3
4244; VI-NEXT: flat_store_dword v[0:1], v2
4345; VI-NEXT: s_endpgm
4446 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x ()
@@ -47,8 +49,7 @@ define amdgpu_kernel void @v_ubfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1
4749 %out.gep = getelementptr i32 , ptr addrspace (1 ) %out , i32 %id.x
4850 %src = load volatile i32 , ptr addrspace (1 ) %in0.gep
4951 %width = load volatile i32 , ptr addrspace (1 ) %in0.gep
50- %width5 = and i32 %width , 31
51- %sub = sub i32 32 , %width5
52+ %sub = sub i32 32 , %width
5253 %shl = shl i32 %src , %sub
5354 %bfe = lshr i32 %shl , %sub
5455 store i32 %bfe , ptr addrspace (1 ) %out.gep
@@ -71,7 +72,6 @@ define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, p
7172; SI-NEXT: s_waitcnt vmcnt(0)
7273; SI-NEXT: s_mov_b64 s[2:3], s[6:7]
7374; SI-NEXT: s_mov_b32 s6, -1
74- ; SI-NEXT: v_and_b32_e32 v3, 31, v3
7575; SI-NEXT: v_sub_i32_e32 v3, vcc, 32, v3
7676; SI-NEXT: v_lshlrev_b32_e32 v2, v3, v2
7777; SI-NEXT: v_lshrrev_b32_e32 v3, v3, v2
@@ -95,8 +95,7 @@ define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, p
9595; VI-NEXT: v_mov_b32_e32 v1, s1
9696; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2
9797; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
98- ; VI-NEXT: v_and_b32_e32 v2, 31, v4
99- ; VI-NEXT: v_sub_u32_e32 v2, vcc, 32, v2
98+ ; VI-NEXT: v_sub_u32_e32 v2, vcc, 32, v4
10099; VI-NEXT: v_lshlrev_b32_e32 v3, v2, v3
101100; VI-NEXT: v_lshrrev_b32_e32 v2, v2, v3
102101; VI-NEXT: flat_store_dword v[0:1], v2
@@ -109,8 +108,7 @@ define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, p
109108 %out.gep = getelementptr i32 , ptr addrspace (1 ) %out , i32 %id.x
110109 %src = load volatile i32 , ptr addrspace (1 ) %in0.gep
111110 %width = load volatile i32 , ptr addrspace (1 ) %in0.gep
112- %width5 = and i32 %width , 31
113- %sub = sub i32 32 , %width5
111+ %sub = sub i32 32 , %width
114112 %shl = shl i32 %src , %sub
115113 %bfe = lshr i32 %shl , %sub
116114 store i32 %bfe , ptr addrspace (1 ) %out.gep
@@ -221,8 +219,9 @@ define amdgpu_kernel void @v_sbfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1
221219; SI-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 glc
222220; SI-NEXT: s_waitcnt vmcnt(0)
223221; SI-NEXT: s_mov_b64 s[2:3], s[6:7]
224- ; SI-NEXT: v_and_b32_e32 v3, 31, v3
225- ; SI-NEXT: v_bfe_i32 v2, v2, 0, v3
222+ ; SI-NEXT: v_sub_i32_e32 v3, vcc, 32, v3
223+ ; SI-NEXT: v_lshlrev_b32_e32 v2, v3, v2
224+ ; SI-NEXT: v_ashrrev_i32_e32 v2, v3, v2
226225; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
227226; SI-NEXT: s_endpgm
228227;
@@ -241,8 +240,9 @@ define amdgpu_kernel void @v_sbfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1
241240; VI-NEXT: v_mov_b32_e32 v1, s1
242241; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2
243242; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
244- ; VI-NEXT: v_and_b32_e32 v2, 31, v4
245- ; VI-NEXT: v_bfe_i32 v2, v3, 0, v2
243+ ; VI-NEXT: v_sub_u32_e32 v2, vcc, 32, v4
244+ ; VI-NEXT: v_lshlrev_b32_e32 v3, v2, v3
245+ ; VI-NEXT: v_ashrrev_i32_e32 v2, v2, v3
246246; VI-NEXT: flat_store_dword v[0:1], v2
247247; VI-NEXT: s_endpgm
248248 %id.x = tail call i32 @llvm.amdgcn.workitem.id.x ()
@@ -251,8 +251,7 @@ define amdgpu_kernel void @v_sbfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1
251251 %out.gep = getelementptr i32 , ptr addrspace (1 ) %out , i32 %id.x
252252 %src = load volatile i32 , ptr addrspace (1 ) %in0.gep
253253 %width = load volatile i32 , ptr addrspace (1 ) %in0.gep
254- %width5 = and i32 %width , 31
255- %sub = sub i32 32 , %width5
254+ %sub = sub i32 32 , %width
256255 %shl = shl i32 %src , %sub
257256 %bfe = ashr i32 %shl , %sub
258257 store i32 %bfe , ptr addrspace (1 ) %out.gep
0 commit comments