@@ -23,7 +23,6 @@ define amdgpu_ps i32 @shl32(i32 inreg %val0, i32 inreg %val1) {
2323
2424; s_lshl_b32 sets SCC if result is non-zero.
2525; Deletion of equal to zero comparison will require inversion of use.
26- ; FIXME: Can't invert because kill flag not set on last use.
2726define amdgpu_ps i32 @shl32_eq (i32 inreg %val0 , i32 inreg %val1 ) {
2827; CHECK-LABEL: shl32_eq:
2928; CHECK: ; %bb.0:
@@ -36,39 +35,20 @@ define amdgpu_ps i32 @shl32_eq(i32 inreg %val0, i32 inreg %val1) {
3635 ret i32 %select
3736}
3837
39- ; s_lshl_b32 sets SCC if result is non-zero.
40- ; Deletion of equal to zero comparison will require inversion of use.
41- define amdgpu_ps i32 @shl32_eq_with_scc_clobber (i32 inreg %val0 , i32 inreg %val1 ) {
42- ; CHECK-LABEL: shl32_eq_with_scc_clobber:
43- ; CHECK: ; %bb.0:
44- ; CHECK-NEXT: s_lshl_b32 s0, s0, 1
45- ; CHECK-NEXT: s_cselect_b32 s0, 0, s1
46- ; CHECK-NEXT: s_xor_b32 s0, s0, s1
47- ; CHECK-NEXT: ; return to shader part epilog
48- %result = shl i32 %val0 , 1
49- %cmp = icmp eq i32 %result , 0
50- %select = select i1 %cmp , i32 %val1 , i32 0
51- %xor = xor i32 %select , %val1
52- ret i32 %xor
53- }
54-
5538; 64-bit selection will generate two 32-bit selects. Inversion of multiple
5639; uses is required.
57- define amdgpu_ps i64 @shl32_eq_multi_use_with_scc_clobber (i32 inreg %val0 , i64 inreg %val1 ) {
58- ; CHECK-LABEL: shl32_eq_multi_use_with_scc_clobber :
40+ define amdgpu_ps i64 @shl32_eq_multi_use (i32 inreg %val0 , i64 inreg %val1 ) {
41+ ; CHECK-LABEL: shl32_eq_multi_use :
5942; CHECK: ; %bb.0:
60- ; CHECK-NEXT: s_mov_b32 s3, s2
61- ; CHECK-NEXT: s_mov_b32 s2, s1
6243; CHECK-NEXT: s_lshl_b32 s0, s0, 1
63- ; CHECK-NEXT: s_cselect_b32 s1 , 0, s3
64- ; CHECK-NEXT: s_cselect_b32 s0, 0, s2
65- ; CHECK-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
44+ ; CHECK-NEXT: s_cselect_b32 s2 , 0, s2
45+ ; CHECK-NEXT: s_cselect_b32 s0, 0, s1
46+ ; CHECK-NEXT: s_mov_b32 s1, s2
6647; CHECK-NEXT: ; return to shader part epilog
6748 %result = shl i32 %val0 , 1
6849 %cmp = icmp eq i32 %result , 0
6950 %select = select i1 %cmp , i64 %val1 , i64 0
70- %xor = xor i64 %select , %val1
71- ret i64 %xor
51+ ret i64 %select
7252}
7353
7454define amdgpu_ps i32 @shl64 (i64 inreg %val0 , i64 inreg %val1 ) {
@@ -711,14 +691,14 @@ define amdgpu_ps i32 @si_pc_add_rel_offset_must_not_optimize() {
711691; CHECK-NEXT: s_add_u32 s0, s0, __unnamed_1@rel32@lo+4
712692; CHECK-NEXT: s_addc_u32 s1, s1, __unnamed_1@rel32@hi+12
713693; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
714- ; CHECK-NEXT: s_cbranch_scc0 .LBB41_2
694+ ; CHECK-NEXT: s_cbranch_scc0 .LBB40_2
715695; CHECK-NEXT: ; %bb.1: ; %endif
716696; CHECK-NEXT: s_mov_b32 s0, 1
717- ; CHECK-NEXT: s_branch .LBB41_3
718- ; CHECK-NEXT: .LBB41_2 : ; %if
697+ ; CHECK-NEXT: s_branch .LBB40_3
698+ ; CHECK-NEXT: .LBB40_2 : ; %if
719699; CHECK-NEXT: s_mov_b32 s0, 0
720- ; CHECK-NEXT: s_branch .LBB41_3
721- ; CHECK-NEXT: .LBB41_3 :
700+ ; CHECK-NEXT: s_branch .LBB40_3
701+ ; CHECK-NEXT: .LBB40_3 :
722702 %cmp = icmp ne ptr addrspace (4 ) @1 , null
723703 br i1 %cmp , label %endif , label %if
724704
0 commit comments