@@ -565,23 +565,22 @@ define amdgpu_kernel void @srem32_invariant_denom(ptr addrspace(1) nocapture %ar
565565;
566566; GFX11-LABEL: srem32_invariant_denom:
567567; GFX11: ; %bb.0: ; %bb
568- ; GFX11-NEXT: s_clause 0x1
569- ; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c
570- ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
568+ ; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x2c
571569; GFX11-NEXT: s_waitcnt lgkmcnt(0)
572- ; GFX11-NEXT: s_abs_i32 s2, s2
573- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
570+ ; GFX11-NEXT: s_abs_i32 s2, s0
571+ ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
574572; GFX11-NEXT: v_cvt_f32_u32_e32 v0, s2
575573; GFX11-NEXT: s_sub_i32 s3, 0, s2
574+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
576575; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0
577576; GFX11-NEXT: s_waitcnt_depctr 0xfff
578577; GFX11-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
579- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
580578; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0
579+ ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
581580; GFX11-NEXT: v_readfirstlane_b32 s4, v0
582581; GFX11-NEXT: v_mov_b32_e32 v0, 0
583- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(SALU_CYCLE_1)
584582; GFX11-NEXT: s_mul_i32 s3, s3, s4
583+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
585584; GFX11-NEXT: s_mul_hi_u32 s5, s4, s3
586585; GFX11-NEXT: s_mov_b32 s3, 0
587586; GFX11-NEXT: s_add_i32 s4, s4, s5
@@ -602,6 +601,7 @@ define amdgpu_kernel void @srem32_invariant_denom(ptr addrspace(1) nocapture %ar
602601; GFX11-NEXT: s_cselect_b32 s5, s6, s5
603602; GFX11-NEXT: s_add_i32 s3, s3, 1
604603; GFX11-NEXT: v_mov_b32_e32 v1, s5
604+ ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
605605; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
606606; GFX11-NEXT: s_add_u32 s0, s0, 4
607607; GFX11-NEXT: s_addc_u32 s1, s1, 0
0 commit comments