Skip to content

Commit daf4723

Browse files
committed
Reintroduce missing tests and fix existing one
1 parent 74bf930 commit daf4723

File tree

2 files changed

+1322
-61
lines changed

2 files changed

+1322
-61
lines changed

llvm/test/CodeGen/AMDGPU/idiv-licm.ll

Lines changed: 62 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -565,22 +565,23 @@ define amdgpu_kernel void @srem32_invariant_denom(ptr addrspace(1) nocapture %ar
565565
;
566566
; GFX11-LABEL: srem32_invariant_denom:
567567
; GFX11: ; %bb.0: ; %bb
568-
; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x2c
569-
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
570-
; GFX11-NEXT: s_abs_i32 s2, s0
568+
; GFX11-NEXT: s_clause 0x1
569+
; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c
571570
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
571+
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
572+
; GFX11-NEXT: s_abs_i32 s2, s2
573+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
572574
; GFX11-NEXT: v_cvt_f32_u32_e32 v0, s2
573575
; GFX11-NEXT: s_sub_i32 s3, 0, s2
574-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
575576
; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0
576577
; GFX11-NEXT: s_waitcnt_depctr 0xfff
577578
; GFX11-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
579+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
578580
; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0
579-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
580581
; GFX11-NEXT: v_readfirstlane_b32 s4, v0
581582
; GFX11-NEXT: v_mov_b32_e32 v0, 0
583+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(SALU_CYCLE_1)
582584
; GFX11-NEXT: s_mul_i32 s3, s3, s4
583-
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
584585
; GFX11-NEXT: s_mul_hi_u32 s5, s4, s3
585586
; GFX11-NEXT: s_mov_b32 s3, 0
586587
; GFX11-NEXT: s_add_i32 s4, s4, s5
@@ -601,7 +602,6 @@ define amdgpu_kernel void @srem32_invariant_denom(ptr addrspace(1) nocapture %ar
601602
; GFX11-NEXT: s_cselect_b32 s5, s6, s5
602603
; GFX11-NEXT: s_add_i32 s3, s3, 1
603604
; GFX11-NEXT: v_mov_b32_e32 v1, s5
604-
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
605605
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
606606
; GFX11-NEXT: s_add_u32 s0, s0, 4
607607
; GFX11-NEXT: s_addc_u32 s1, s1, 0
@@ -694,31 +694,32 @@ define amdgpu_kernel void @udiv16_invariant_denom(ptr addrspace(1) nocapture %ar
694694
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
695695
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
696696
; GFX11-NEXT: s_and_b32 s2, s2, 0xffff
697-
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
697+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
698698
; GFX11-NEXT: v_cvt_f32_u32_e32 v0, s2
699699
; GFX11-NEXT: s_mov_b32 s2, 0
700-
; GFX11-NEXT: v_rcp_iflag_f32_e32 v1, v0
701700
; GFX11-NEXT: .p2align 6
702701
; GFX11-NEXT: .LBB4_1: ; %bb3
703702
; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
703+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
704+
; GFX11-NEXT: v_rcp_iflag_f32_e32 v1, v0
704705
; GFX11-NEXT: s_and_b32 s3, 0xffff, s2
705706
; GFX11-NEXT: s_add_i32 s2, s2, 1
706707
; GFX11-NEXT: v_cvt_f32_u32_e32 v2, s3
707708
; GFX11-NEXT: s_lshl_b32 s3, s3, 1
708-
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
709-
; GFX11-NEXT: v_mov_b32_e32 v4, s3
709+
; GFX11-NEXT: v_mov_b32_e32 v3, s3
710710
; GFX11-NEXT: s_and_b32 s3, s2, 0xffff
711-
; GFX11-NEXT: s_waitcnt_depctr 0xfff
712-
; GFX11-NEXT: v_mul_f32_e32 v3, v2, v1
711+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
713712
; GFX11-NEXT: s_cmpk_eq_i32 s3, 0x400
714-
; GFX11-NEXT: v_trunc_f32_e32 v3, v3
713+
; GFX11-NEXT: s_waitcnt_depctr 0xfff
714+
; GFX11-NEXT: v_mul_f32_e32 v1, v2, v1
715+
; GFX11-NEXT: v_trunc_f32_e32 v1, v1
715716
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
716-
; GFX11-NEXT: v_fma_f32 v2, -v3, v0, v2
717-
; GFX11-NEXT: v_cvt_u32_f32_e32 v3, v3
717+
; GFX11-NEXT: v_fma_f32 v2, -v1, v0, v2
718+
; GFX11-NEXT: v_cvt_u32_f32_e32 v1, v1
718719
; GFX11-NEXT: v_cmp_ge_f32_e64 vcc_lo, |v2|, v0
719720
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
720-
; GFX11-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, 0, v3, vcc_lo
721-
; GFX11-NEXT: global_store_b16 v4, v2, s[0:1]
721+
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
722+
; GFX11-NEXT: global_store_b16 v3, v1, s[0:1]
722723
; GFX11-NEXT: s_cbranch_scc0 .LBB4_1
723724
; GFX11-NEXT: ; %bb.2: ; %bb2
724725
; GFX11-NEXT: s_endpgm
@@ -812,33 +813,34 @@ define amdgpu_kernel void @urem16_invariant_denom(ptr addrspace(1) nocapture %ar
812813
; GFX11-NEXT: s_mov_b32 s3, 0
813814
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
814815
; GFX11-NEXT: s_and_b32 s2, s2, 0xffff
815-
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
816+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
816817
; GFX11-NEXT: v_cvt_f32_u32_e32 v0, s2
817-
; GFX11-NEXT: v_rcp_iflag_f32_e32 v1, v0
818818
; GFX11-NEXT: .p2align 6
819819
; GFX11-NEXT: .LBB5_1: ; %bb3
820820
; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
821+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
822+
; GFX11-NEXT: v_rcp_iflag_f32_e32 v1, v0
821823
; GFX11-NEXT: s_and_b32 s4, 0xffff, s3
822824
; GFX11-NEXT: s_add_i32 s3, s3, 1
823825
; GFX11-NEXT: v_cvt_f32_u32_e32 v2, s4
824826
; GFX11-NEXT: s_lshl_b32 s5, s4, 1
825827
; GFX11-NEXT: s_waitcnt_depctr 0xfff
826-
; GFX11-NEXT: v_mul_f32_e32 v3, v2, v1
828+
; GFX11-NEXT: v_mul_f32_e32 v1, v2, v1
827829
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
828-
; GFX11-NEXT: v_trunc_f32_e32 v3, v3
829-
; GFX11-NEXT: v_fma_f32 v2, -v3, v0, v2
830-
; GFX11-NEXT: v_cvt_u32_f32_e32 v3, v3
831-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
830+
; GFX11-NEXT: v_trunc_f32_e32 v1, v1
831+
; GFX11-NEXT: v_fma_f32 v2, -v1, v0, v2
832+
; GFX11-NEXT: v_cvt_u32_f32_e32 v1, v1
833+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
832834
; GFX11-NEXT: v_cmp_ge_f32_e64 vcc_lo, |v2|, v0
833-
; GFX11-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, 0, v3, vcc_lo
834-
; GFX11-NEXT: v_mov_b32_e32 v3, s5
835-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
836-
; GFX11-NEXT: v_mul_lo_u32 v2, v2, s2
837-
; GFX11-NEXT: v_sub_nc_u32_e32 v2, s4, v2
835+
; GFX11-NEXT: v_mov_b32_e32 v2, s5
836+
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
837+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
838+
; GFX11-NEXT: v_mul_lo_u32 v1, v1, s2
839+
; GFX11-NEXT: v_sub_nc_u32_e32 v1, s4, v1
838840
; GFX11-NEXT: s_and_b32 s4, s3, 0xffff
839841
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
840842
; GFX11-NEXT: s_cmpk_eq_i32 s4, 0x400
841-
; GFX11-NEXT: global_store_b16 v3, v2, s[0:1]
843+
; GFX11-NEXT: global_store_b16 v2, v1, s[0:1]
842844
; GFX11-NEXT: s_cbranch_scc0 .LBB5_1
843845
; GFX11-NEXT: ; %bb.2: ; %bb2
844846
; GFX11-NEXT: s_endpgm
@@ -940,38 +942,37 @@ define amdgpu_kernel void @sdiv16_invariant_denom(ptr addrspace(1) nocapture %ar
940942
; GFX11-NEXT: s_mov_b32 s3, 0
941943
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
942944
; GFX11-NEXT: s_sext_i32_i16 s2, s2
943-
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
945+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
944946
; GFX11-NEXT: v_cvt_f32_i32_e32 v0, s2
945-
; GFX11-NEXT: v_rcp_iflag_f32_e32 v1, v0
946947
; GFX11-NEXT: .p2align 6
947948
; GFX11-NEXT: .LBB6_1: ; %bb3
948949
; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
950+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
951+
; GFX11-NEXT: v_rcp_iflag_f32_e32 v1, v0
949952
; GFX11-NEXT: s_sext_i32_i16 s4, s3
950-
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
951953
; GFX11-NEXT: v_cvt_f32_i32_e32 v2, s4
952954
; GFX11-NEXT: s_xor_b32 s4, s4, s2
955+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
953956
; GFX11-NEXT: s_ashr_i32 s4, s4, 30
954-
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
955957
; GFX11-NEXT: s_or_b32 s4, s4, 1
956958
; GFX11-NEXT: s_waitcnt_depctr 0xfff
957-
; GFX11-NEXT: v_mul_f32_e32 v3, v2, v1
958-
; GFX11-NEXT: v_trunc_f32_e32 v3, v3
959+
; GFX11-NEXT: v_mul_f32_e32 v1, v2, v1
959960
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
960-
; GFX11-NEXT: v_fma_f32 v2, -v3, v0, v2
961+
; GFX11-NEXT: v_trunc_f32_e32 v1, v1
962+
; GFX11-NEXT: v_fma_f32 v2, -v1, v0, v2
963+
; GFX11-NEXT: v_cvt_i32_f32_e32 v1, v1
964+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
961965
; GFX11-NEXT: v_cmp_ge_f32_e64 s5, |v2|, |v0|
962-
; GFX11-NEXT: v_cvt_i32_f32_e32 v2, v3
963-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
964966
; GFX11-NEXT: s_and_b32 s5, s5, exec_lo
965967
; GFX11-NEXT: s_cselect_b32 s4, s4, 0
966968
; GFX11-NEXT: s_and_b32 s5, 0xffff, s3
967-
; GFX11-NEXT: v_add_nc_u32_e32 v2, s4, v2
968-
; GFX11-NEXT: s_lshl_b32 s5, s5, 1
969969
; GFX11-NEXT: s_add_i32 s3, s3, 1
970-
; GFX11-NEXT: v_mov_b32_e32 v3, s5
970+
; GFX11-NEXT: s_lshl_b32 s5, s5, 1
971+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
972+
; GFX11-NEXT: v_dual_mov_b32 v2, s5 :: v_dual_add_nc_u32 v1, s4, v1
971973
; GFX11-NEXT: s_and_b32 s4, s3, 0xffff
972-
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
973974
; GFX11-NEXT: s_cmpk_eq_i32 s4, 0x400
974-
; GFX11-NEXT: global_store_b16 v3, v2, s[0:1]
975+
; GFX11-NEXT: global_store_b16 v2, v1, s[0:1]
975976
; GFX11-NEXT: s_cbranch_scc0 .LBB6_1
976977
; GFX11-NEXT: ; %bb.2: ; %bb2
977978
; GFX11-NEXT: s_endpgm
@@ -1077,42 +1078,42 @@ define amdgpu_kernel void @srem16_invariant_denom(ptr addrspace(1) nocapture %ar
10771078
; GFX11-NEXT: s_mov_b32 s3, 0
10781079
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
10791080
; GFX11-NEXT: s_sext_i32_i16 s2, s2
1080-
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1081+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
10811082
; GFX11-NEXT: v_cvt_f32_i32_e32 v0, s2
1082-
; GFX11-NEXT: v_rcp_iflag_f32_e32 v1, v0
10831083
; GFX11-NEXT: .p2align 6
10841084
; GFX11-NEXT: .LBB7_1: ; %bb3
10851085
; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
1086+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
1087+
; GFX11-NEXT: v_rcp_iflag_f32_e32 v1, v0
10861088
; GFX11-NEXT: s_sext_i32_i16 s4, s3
1087-
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
10881089
; GFX11-NEXT: v_cvt_f32_i32_e32 v2, s4
10891090
; GFX11-NEXT: s_xor_b32 s5, s4, s2
1091+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
10901092
; GFX11-NEXT: s_ashr_i32 s5, s5, 30
1091-
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
10921093
; GFX11-NEXT: s_or_b32 s5, s5, 1
10931094
; GFX11-NEXT: s_waitcnt_depctr 0xfff
1094-
; GFX11-NEXT: v_mul_f32_e32 v3, v2, v1
1095-
; GFX11-NEXT: v_trunc_f32_e32 v3, v3
1095+
; GFX11-NEXT: v_mul_f32_e32 v1, v2, v1
10961096
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1097-
; GFX11-NEXT: v_fma_f32 v2, -v3, v0, v2
1097+
; GFX11-NEXT: v_trunc_f32_e32 v1, v1
1098+
; GFX11-NEXT: v_fma_f32 v2, -v1, v0, v2
1099+
; GFX11-NEXT: v_cvt_i32_f32_e32 v1, v1
1100+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
10981101
; GFX11-NEXT: v_cmp_ge_f32_e64 s6, |v2|, |v0|
1099-
; GFX11-NEXT: v_cvt_i32_f32_e32 v2, v3
1100-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
11011102
; GFX11-NEXT: s_and_b32 s6, s6, exec_lo
11021103
; GFX11-NEXT: s_cselect_b32 s5, s5, 0
1103-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
1104-
; GFX11-NEXT: v_add_nc_u32_e32 v2, s5, v2
1104+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
1105+
; GFX11-NEXT: v_add_nc_u32_e32 v1, s5, v1
11051106
; GFX11-NEXT: s_and_b32 s5, 0xffff, s3
11061107
; GFX11-NEXT: s_add_i32 s3, s3, 1
11071108
; GFX11-NEXT: s_lshl_b32 s5, s5, 1
1108-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1109-
; GFX11-NEXT: v_mul_lo_u32 v2, v2, s2
1110-
; GFX11-NEXT: v_mov_b32_e32 v3, s5
1111-
; GFX11-NEXT: v_sub_nc_u32_e32 v2, s4, v2
1109+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
1110+
; GFX11-NEXT: v_mov_b32_e32 v2, s5
1111+
; GFX11-NEXT: v_mul_lo_u32 v1, v1, s2
1112+
; GFX11-NEXT: v_sub_nc_u32_e32 v1, s4, v1
11121113
; GFX11-NEXT: s_and_b32 s4, s3, 0xffff
11131114
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
11141115
; GFX11-NEXT: s_cmpk_eq_i32 s4, 0x400
1115-
; GFX11-NEXT: global_store_b16 v3, v2, s[0:1]
1116+
; GFX11-NEXT: global_store_b16 v2, v1, s[0:1]
11161117
; GFX11-NEXT: s_cbranch_scc0 .LBB7_1
11171118
; GFX11-NEXT: ; %bb.2: ; %bb2
11181119
; GFX11-NEXT: s_endpgm

0 commit comments

Comments
 (0)