Skip to content

Commit d01205f

Browse files
committed
Inversion test does not require scc clobber
Signed-off-by: John Lu <[email protected]>
1 parent adc2b32 commit d01205f

File tree

1 file changed

+11
-31
lines changed

1 file changed

+11
-31
lines changed

llvm/test/CodeGen/AMDGPU/s_cmp_0.ll

Lines changed: 11 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ define amdgpu_ps i32 @shl32(i32 inreg %val0, i32 inreg %val1) {
2323

2424
; s_lshl_b32 sets SCC if result is non-zero.
2525
; Deletion of equal to zero comparison will require inversion of use.
26-
; FIXME: Can't invert because kill flag not set on last use.
2726
define amdgpu_ps i32 @shl32_eq(i32 inreg %val0, i32 inreg %val1) {
2827
; CHECK-LABEL: shl32_eq:
2928
; CHECK: ; %bb.0:
@@ -36,39 +35,20 @@ define amdgpu_ps i32 @shl32_eq(i32 inreg %val0, i32 inreg %val1) {
3635
ret i32 %select
3736
}
3837

39-
; s_lshl_b32 sets SCC if result is non-zero.
40-
; Deletion of equal to zero comparison will require inversion of use.
41-
define amdgpu_ps i32 @shl32_eq_with_scc_clobber(i32 inreg %val0, i32 inreg %val1) {
42-
; CHECK-LABEL: shl32_eq_with_scc_clobber:
43-
; CHECK: ; %bb.0:
44-
; CHECK-NEXT: s_lshl_b32 s0, s0, 1
45-
; CHECK-NEXT: s_cselect_b32 s0, 0, s1
46-
; CHECK-NEXT: s_xor_b32 s0, s0, s1
47-
; CHECK-NEXT: ; return to shader part epilog
48-
%result = shl i32 %val0, 1
49-
%cmp = icmp eq i32 %result, 0
50-
%select = select i1 %cmp, i32 %val1, i32 0
51-
%xor = xor i32 %select, %val1
52-
ret i32 %xor
53-
}
54-
5538
; 64-bit selection will generate two 32-bit selects. Inversion of multiple
5639
; uses is required.
57-
define amdgpu_ps i64 @shl32_eq_multi_use_with_scc_clobber(i32 inreg %val0, i64 inreg %val1) {
58-
; CHECK-LABEL: shl32_eq_multi_use_with_scc_clobber:
40+
define amdgpu_ps i64 @shl32_eq_multi_use(i32 inreg %val0, i64 inreg %val1) {
41+
; CHECK-LABEL: shl32_eq_multi_use:
5942
; CHECK: ; %bb.0:
60-
; CHECK-NEXT: s_mov_b32 s3, s2
61-
; CHECK-NEXT: s_mov_b32 s2, s1
6243
; CHECK-NEXT: s_lshl_b32 s0, s0, 1
63-
; CHECK-NEXT: s_cselect_b32 s1, 0, s3
64-
; CHECK-NEXT: s_cselect_b32 s0, 0, s2
65-
; CHECK-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
44+
; CHECK-NEXT: s_cselect_b32 s2, 0, s2
45+
; CHECK-NEXT: s_cselect_b32 s0, 0, s1
46+
; CHECK-NEXT: s_mov_b32 s1, s2
6647
; CHECK-NEXT: ; return to shader part epilog
6748
%result = shl i32 %val0, 1
6849
%cmp = icmp eq i32 %result, 0
6950
%select = select i1 %cmp, i64 %val1, i64 0
70-
%xor = xor i64 %select, %val1
71-
ret i64 %xor
51+
ret i64 %select
7252
}
7353

7454
define amdgpu_ps i32 @shl64(i64 inreg %val0, i64 inreg %val1) {
@@ -711,14 +691,14 @@ define amdgpu_ps i32 @si_pc_add_rel_offset_must_not_optimize() {
711691
; CHECK-NEXT: s_add_u32 s0, s0, __unnamed_1@rel32@lo+4
712692
; CHECK-NEXT: s_addc_u32 s1, s1, __unnamed_1@rel32@hi+12
713693
; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
714-
; CHECK-NEXT: s_cbranch_scc0 .LBB41_2
694+
; CHECK-NEXT: s_cbranch_scc0 .LBB40_2
715695
; CHECK-NEXT: ; %bb.1: ; %endif
716696
; CHECK-NEXT: s_mov_b32 s0, 1
717-
; CHECK-NEXT: s_branch .LBB41_3
718-
; CHECK-NEXT: .LBB41_2: ; %if
697+
; CHECK-NEXT: s_branch .LBB40_3
698+
; CHECK-NEXT: .LBB40_2: ; %if
719699
; CHECK-NEXT: s_mov_b32 s0, 0
720-
; CHECK-NEXT: s_branch .LBB41_3
721-
; CHECK-NEXT: .LBB41_3:
700+
; CHECK-NEXT: s_branch .LBB40_3
701+
; CHECK-NEXT: .LBB40_3:
722702
%cmp = icmp ne ptr addrspace(4) @1, null
723703
br i1 %cmp, label %endif, label %if
724704

0 commit comments

Comments
 (0)