@@ -258,46 +258,41 @@ define amdgpu_kernel void @add_x_shl_max_offset() #1 {
258258define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_alt () #1 {
259259; CI-LABEL: add_x_shl_neg_to_sub_max_offset_alt:
260260; CI: ; %bb.0:
261- ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
262- ; CI-NEXT: v_xor_b32_e32 v0, 0xffff, v0
261+ ; CI-NEXT: v_mul_i32_i24_e32 v0, -4, v0
263262; CI-NEXT: v_mov_b32_e32 v1, 13
264263; CI-NEXT: s_mov_b32 m0, -1
265- ; CI-NEXT: ds_write_b8 v0, v1
264+ ; CI-NEXT: ds_write_b8 v0, v1 offset:65535
266265; CI-NEXT: s_endpgm
267266;
268267; GFX9-LABEL: add_x_shl_neg_to_sub_max_offset_alt:
269268; GFX9: ; %bb.0:
270- ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
271- ; GFX9-NEXT: v_xor_b32_e32 v0, 0xffff, v0
269+ ; GFX9-NEXT: v_mul_i32_i24_e32 v0, -4, v0
272270; GFX9-NEXT: v_mov_b32_e32 v1, 13
273- ; GFX9-NEXT: ds_write_b8 v0, v1
271+ ; GFX9-NEXT: ds_write_b8 v0, v1 offset:65535
274272; GFX9-NEXT: s_endpgm
275273;
276274; GFX10-LABEL: add_x_shl_neg_to_sub_max_offset_alt:
277275; GFX10: ; %bb.0:
278- ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2 , v0
276+ ; GFX10-NEXT: v_mul_i32_i24_e32 v0, -4 , v0
279277; GFX10-NEXT: v_mov_b32_e32 v1, 13
280- ; GFX10-NEXT: v_xor_b32_e32 v0, 0xffff, v0
281- ; GFX10-NEXT: ds_write_b8 v0, v1
278+ ; GFX10-NEXT: ds_write_b8 v0, v1 offset:65535
282279; GFX10-NEXT: s_endpgm
283280;
284281; GFX11-TRUE16-LABEL: add_x_shl_neg_to_sub_max_offset_alt:
285282; GFX11-TRUE16: ; %bb.0:
286283; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
287- ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
288- ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0
289- ; GFX11-TRUE16-NEXT: v_xor_b32_e32 v1, 0xffff, v0
284+ ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
285+ ; GFX11-TRUE16-NEXT: v_mul_i32_i24_e32 v1, -4, v0
290286; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, 13
291- ; GFX11-TRUE16-NEXT: ds_store_b8 v1, v0
287+ ; GFX11-TRUE16-NEXT: ds_store_b8 v1, v0 offset:65535
292288; GFX11-TRUE16-NEXT: s_endpgm
293289;
294290; GFX11-FAKE16-LABEL: add_x_shl_neg_to_sub_max_offset_alt:
295291; GFX11-FAKE16: ; %bb.0:
296292; GFX11-FAKE16-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_and_b32 v0, 0x3ff, v0
297- ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
298- ; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0
299- ; GFX11-FAKE16-NEXT: v_xor_b32_e32 v0, 0xffff, v0
300- ; GFX11-FAKE16-NEXT: ds_store_b8 v0, v1
293+ ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
294+ ; GFX11-FAKE16-NEXT: v_mul_i32_i24_e32 v0, -4, v0
295+ ; GFX11-FAKE16-NEXT: ds_store_b8 v0, v1 offset:65535
301296; GFX11-FAKE16-NEXT: s_endpgm
302297 %x.i = tail call i32 @llvm.amdgcn.workitem.id.x ()
303298 %.neg = mul i32 %x.i , -4
@@ -447,9 +442,9 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_multi_use() #1 {
447442;
448443; GFX11-LABEL: add_x_shl_neg_to_sub_multi_use:
449444; GFX11: ; %bb.0:
450- ; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_lshlrev_b32 v0, 2 , v0
445+ ; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_and_b32 v0, 0x3ff , v0
451446; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
452- ; GFX11-NEXT: v_and_b32_e32 v0, 0xffc , v0
447+ ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2 , v0
453448; GFX11-NEXT: v_sub_nc_u32_e32 v0, 0, v0
454449; GFX11-NEXT: ds_store_b32 v0, v1 offset:123
455450; GFX11-NEXT: ds_store_b32 v0, v1 offset:456
0 commit comments