Skip to content

Commit 6c12d87

Browse files
manik-mukakadutta
authored andcommitted
[X86] logical AND and OR in if-conditionals can turn to multiple branch instructions (llvm#162041)
Addresses llvm#160612
1 parent f0ff17c commit 6c12d87

File tree

4 files changed

+161
-84
lines changed

4 files changed

+161
-84
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3624,6 +3624,16 @@ X86TargetLowering::getJumpConditionMergingParams(Instruction::BinaryOps Opc,
36243624
match(Lhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value())) &&
36253625
match(Rhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value())))
36263626
BaseCost += 1;
3627+
3628+
// For OR conditions with EQ comparisons, prefer splitting into branches
3629+
// (unless CCMP is available). OR+EQ cannot be optimized via bitwise ops,
3630+
// unlike OR+NE which becomes (P|Q)!=0. Similarly, don't split signed
3631+
// comparisons (SLT, SGT) that can be optimized.
3632+
if (BaseCost >= 0 && !Subtarget.hasCCMP() && Opc == Instruction::Or &&
3633+
match(Lhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value())) &&
3634+
match(Rhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value())))
3635+
return {-1, -1, -1};
3636+
36273637
return {BaseCost, BrMergingLikelyBias.getValue(),
36283638
BrMergingUnlikelyBias.getValue()};
36293639
}

llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll

Lines changed: 53 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
4444
; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi
4545
; CHECK-NEXT: callq __ubyte_convert_to_ctype
4646
; CHECK-NEXT: testl %eax, %eax
47-
; CHECK-NEXT: js LBB0_6
47+
; CHECK-NEXT: js LBB0_4
4848
; CHECK-NEXT: ## %bb.1: ## %cond_next.i
4949
; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi
5050
; CHECK-NEXT: movq %rbx, %rdi
@@ -53,84 +53,81 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
5353
; CHECK-NEXT: sarl $31, %ecx
5454
; CHECK-NEXT: andl %eax, %ecx
5555
; CHECK-NEXT: cmpl $-2, %ecx
56-
; CHECK-NEXT: je LBB0_10
56+
; CHECK-NEXT: je LBB0_8
5757
; CHECK-NEXT: ## %bb.2: ## %cond_next.i
5858
; CHECK-NEXT: cmpl $-1, %ecx
59-
; CHECK-NEXT: jne LBB0_3
60-
; CHECK-NEXT: LBB0_8: ## %bb4
59+
; CHECK-NEXT: jne LBB0_6
60+
; CHECK-NEXT: LBB0_3: ## %bb4
6161
; CHECK-NEXT: movq _PyArray_API@GOTPCREL(%rip), %rax
6262
; CHECK-NEXT: movq (%rax), %rax
6363
; CHECK-NEXT: movq 16(%rax), %rax
64-
; CHECK-NEXT: jmp LBB0_9
65-
; CHECK-NEXT: LBB0_6: ## %_ubyte_convert2_to_ctypes.exit
64+
; CHECK-NEXT: jmp LBB0_10
65+
; CHECK-NEXT: LBB0_4: ## %_ubyte_convert2_to_ctypes.exit
6666
; CHECK-NEXT: cmpl $-2, %eax
67-
; CHECK-NEXT: je LBB0_10
68-
; CHECK-NEXT: ## %bb.7: ## %_ubyte_convert2_to_ctypes.exit
69-
; CHECK-NEXT: cmpl $-1, %eax
7067
; CHECK-NEXT: je LBB0_8
71-
; CHECK-NEXT: LBB0_3: ## %bb35
68+
; CHECK-NEXT: ## %bb.5: ## %_ubyte_convert2_to_ctypes.exit
69+
; CHECK-NEXT: cmpl $-1, %eax
70+
; CHECK-NEXT: je LBB0_3
71+
; CHECK-NEXT: LBB0_6: ## %bb35
7272
; CHECK-NEXT: movq _PyUFunc_API@GOTPCREL(%rip), %r14
7373
; CHECK-NEXT: movq (%r14), %rax
7474
; CHECK-NEXT: callq *216(%rax)
7575
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx
7676
; CHECK-NEXT: testb %dl, %dl
77-
; CHECK-NEXT: je LBB0_4
78-
; CHECK-NEXT: ## %bb.12: ## %cond_false.i
79-
; CHECK-NEXT: setne %dil
77+
; CHECK-NEXT: je LBB0_11
78+
; CHECK-NEXT: ## %bb.7: ## %cond_false.i
8079
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %esi
8180
; CHECK-NEXT: movzbl %sil, %ecx
8281
; CHECK-NEXT: movl %ecx, %eax
8382
; CHECK-NEXT: divb %dl
8483
; CHECK-NEXT: movl %eax, %r15d
8584
; CHECK-NEXT: testb %cl, %cl
86-
; CHECK-NEXT: setne %al
87-
; CHECK-NEXT: testb %dil, %al
88-
; CHECK-NEXT: jne LBB0_5
89-
; CHECK-NEXT: LBB0_13: ## %cond_true.i200
90-
; CHECK-NEXT: testb %dl, %dl
91-
; CHECK-NEXT: jne LBB0_15
92-
; CHECK-NEXT: ## %bb.14: ## %cond_true14.i
93-
; CHECK-NEXT: movl $4, %edi
94-
; CHECK-NEXT: callq _feraiseexcept
95-
; CHECK-NEXT: LBB0_15: ## %ubyte_ctype_remainder.exit
96-
; CHECK-NEXT: xorl %ebx, %ebx
97-
; CHECK-NEXT: jmp LBB0_16
98-
; CHECK-NEXT: LBB0_10: ## %bb17
85+
; CHECK-NEXT: jne LBB0_12
86+
; CHECK-NEXT: jmp LBB0_14
87+
; CHECK-NEXT: LBB0_8: ## %bb17
9988
; CHECK-NEXT: callq _PyErr_Occurred
10089
; CHECK-NEXT: testq %rax, %rax
101-
; CHECK-NEXT: jne LBB0_23
102-
; CHECK-NEXT: ## %bb.11: ## %cond_next
90+
; CHECK-NEXT: jne LBB0_27
91+
; CHECK-NEXT: ## %bb.9: ## %cond_next
10392
; CHECK-NEXT: movq _PyArray_API@GOTPCREL(%rip), %rax
10493
; CHECK-NEXT: movq (%rax), %rax
10594
; CHECK-NEXT: movq 80(%rax), %rax
106-
; CHECK-NEXT: LBB0_9: ## %bb4
95+
; CHECK-NEXT: LBB0_10: ## %bb4
10796
; CHECK-NEXT: movq 96(%rax), %rax
10897
; CHECK-NEXT: movq %r14, %rdi
10998
; CHECK-NEXT: movq %rbx, %rsi
11099
; CHECK-NEXT: callq *40(%rax)
111-
; CHECK-NEXT: jmp LBB0_24
112-
; CHECK-NEXT: LBB0_4: ## %cond_true.i
100+
; CHECK-NEXT: jmp LBB0_28
101+
; CHECK-NEXT: LBB0_11: ## %cond_true.i
113102
; CHECK-NEXT: movl $4, %edi
114103
; CHECK-NEXT: callq _feraiseexcept
115104
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx
116105
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %esi
106+
; CHECK-NEXT: xorl %r15d, %r15d
117107
; CHECK-NEXT: testb %sil, %sil
118-
; CHECK-NEXT: sete %al
108+
; CHECK-NEXT: je LBB0_14
109+
; CHECK-NEXT: LBB0_12: ## %cond_false.i
119110
; CHECK-NEXT: testb %dl, %dl
120-
; CHECK-NEXT: sete %cl
121-
; CHECK-NEXT: xorl %r15d, %r15d
122-
; CHECK-NEXT: orb %al, %cl
123-
; CHECK-NEXT: jne LBB0_13
124-
; CHECK-NEXT: LBB0_5: ## %cond_next17.i
111+
; CHECK-NEXT: je LBB0_14
112+
; CHECK-NEXT: ## %bb.13: ## %cond_next17.i
125113
; CHECK-NEXT: movzbl %sil, %eax
126114
; CHECK-NEXT: divb %dl
127115
; CHECK-NEXT: movzbl %ah, %ebx
128-
; CHECK-NEXT: LBB0_16: ## %ubyte_ctype_remainder.exit
116+
; CHECK-NEXT: jmp LBB0_18
117+
; CHECK-NEXT: LBB0_14: ## %cond_true.i200
118+
; CHECK-NEXT: testb %dl, %dl
119+
; CHECK-NEXT: jne LBB0_17
120+
; CHECK-NEXT: ## %bb.16: ## %cond_true14.i
121+
; CHECK-NEXT: movl $4, %edi
122+
; CHECK-NEXT: callq _feraiseexcept
123+
; CHECK-NEXT: LBB0_17: ## %ubyte_ctype_remainder.exit
124+
; CHECK-NEXT: xorl %ebx, %ebx
125+
; CHECK-NEXT: LBB0_18: ## %ubyte_ctype_remainder.exit
129126
; CHECK-NEXT: movq (%r14), %rax
130127
; CHECK-NEXT: callq *224(%rax)
131128
; CHECK-NEXT: testl %eax, %eax
132-
; CHECK-NEXT: je LBB0_19
133-
; CHECK-NEXT: ## %bb.17: ## %cond_true61
129+
; CHECK-NEXT: je LBB0_21
130+
; CHECK-NEXT: ## %bb.19: ## %cond_true61
134131
; CHECK-NEXT: movl %eax, %ebp
135132
; CHECK-NEXT: movq (%r14), %rax
136133
; CHECK-NEXT: movq _.str5@GOTPCREL(%rip), %rdi
@@ -139,8 +136,8 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
139136
; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
140137
; CHECK-NEXT: callq *200(%rax)
141138
; CHECK-NEXT: testl %eax, %eax
142-
; CHECK-NEXT: js LBB0_23
143-
; CHECK-NEXT: ## %bb.18: ## %cond_next73
139+
; CHECK-NEXT: js LBB0_27
140+
; CHECK-NEXT: ## %bb.20: ## %cond_next73
144141
; CHECK-NEXT: movl $1, {{[0-9]+}}(%rsp)
145142
; CHECK-NEXT: movq (%r14), %rax
146143
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rsi
@@ -149,45 +146,45 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
149146
; CHECK-NEXT: movl %ebp, %edx
150147
; CHECK-NEXT: callq *232(%rax)
151148
; CHECK-NEXT: testl %eax, %eax
152-
; CHECK-NEXT: jne LBB0_23
153-
; CHECK-NEXT: LBB0_19: ## %cond_next89
149+
; CHECK-NEXT: jne LBB0_27
150+
; CHECK-NEXT: LBB0_21: ## %cond_next89
154151
; CHECK-NEXT: movl $2, %edi
155152
; CHECK-NEXT: callq _PyTuple_New
156153
; CHECK-NEXT: testq %rax, %rax
157-
; CHECK-NEXT: je LBB0_23
158-
; CHECK-NEXT: ## %bb.20: ## %cond_next97
154+
; CHECK-NEXT: je LBB0_27
155+
; CHECK-NEXT: ## %bb.22: ## %cond_next97
159156
; CHECK-NEXT: movq %rax, %r14
160157
; CHECK-NEXT: movq _PyArray_API@GOTPCREL(%rip), %r12
161158
; CHECK-NEXT: movq (%r12), %rax
162159
; CHECK-NEXT: movq 200(%rax), %rdi
163160
; CHECK-NEXT: xorl %esi, %esi
164161
; CHECK-NEXT: callq *304(%rdi)
165162
; CHECK-NEXT: testq %rax, %rax
166-
; CHECK-NEXT: je LBB0_21
167-
; CHECK-NEXT: ## %bb.25: ## %cond_next135
163+
; CHECK-NEXT: je LBB0_25
164+
; CHECK-NEXT: ## %bb.23: ## %cond_next135
168165
; CHECK-NEXT: movb %r15b, 16(%rax)
169166
; CHECK-NEXT: movq %rax, 24(%r14)
170167
; CHECK-NEXT: movq (%r12), %rax
171168
; CHECK-NEXT: movq 200(%rax), %rdi
172169
; CHECK-NEXT: xorl %esi, %esi
173170
; CHECK-NEXT: callq *304(%rdi)
174171
; CHECK-NEXT: testq %rax, %rax
175-
; CHECK-NEXT: je LBB0_21
176-
; CHECK-NEXT: ## %bb.26: ## %cond_next182
172+
; CHECK-NEXT: je LBB0_25
173+
; CHECK-NEXT: ## %bb.24: ## %cond_next182
177174
; CHECK-NEXT: movb %bl, 16(%rax)
178175
; CHECK-NEXT: movq %rax, 32(%r14)
179176
; CHECK-NEXT: movq %r14, %rax
180-
; CHECK-NEXT: jmp LBB0_24
181-
; CHECK-NEXT: LBB0_21: ## %cond_true113
177+
; CHECK-NEXT: jmp LBB0_28
178+
; CHECK-NEXT: LBB0_25: ## %cond_true113
182179
; CHECK-NEXT: decq (%r14)
183-
; CHECK-NEXT: jne LBB0_23
184-
; CHECK-NEXT: ## %bb.22: ## %cond_true126
180+
; CHECK-NEXT: jne LBB0_27
181+
; CHECK-NEXT: ## %bb.26: ## %cond_true126
185182
; CHECK-NEXT: movq 8(%r14), %rax
186183
; CHECK-NEXT: movq %r14, %rdi
187184
; CHECK-NEXT: callq *48(%rax)
188-
; CHECK-NEXT: LBB0_23: ## %UnifiedReturnBlock
185+
; CHECK-NEXT: LBB0_27: ## %UnifiedReturnBlock
189186
; CHECK-NEXT: xorl %eax, %eax
190-
; CHECK-NEXT: LBB0_24: ## %UnifiedReturnBlock
187+
; CHECK-NEXT: LBB0_28: ## %UnifiedReturnBlock
191188
; CHECK-NEXT: addq $32, %rsp
192189
; CHECK-NEXT: popq %rbx
193190
; CHECK-NEXT: popq %r12

llvm/test/CodeGen/X86/pr160612.ll

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -O2 | FileCheck %s
3+
4+
; Test for issue #160612: OR conditions in branches should use multiple branches
5+
; instead of materializing booleans with SETCC when no special optimizations apply.
6+
7+
declare void @subroutine_foo()
8+
declare void @subroutine_bar()
9+
10+
; Original issue: (x == 0 || y == 0) was generating SETCC + TEST + BRANCH
11+
; instead of using two conditional branches directly.
12+
define void @func_a(i32 noundef %x, i32 noundef %y) {
13+
; CHECK-LABEL: func_a:
14+
; CHECK: # %bb.0: # %entry
15+
; CHECK-NEXT: testl %edi, %edi
16+
; CHECK-NEXT: je subroutine_foo@PLT # TAILCALL
17+
; CHECK-NEXT: # %bb.1: # %entry
18+
; CHECK-NEXT: testl %esi, %esi
19+
; CHECK-NEXT: jne subroutine_bar@PLT # TAILCALL
20+
; CHECK-NEXT: # %bb.2: # %if.then
21+
; CHECK-NEXT: jmp subroutine_foo@PLT # TAILCALL
22+
entry:
23+
%cmp = icmp eq i32 %x, 0
24+
%cmp1 = icmp eq i32 %y, 0
25+
%or.cond = or i1 %cmp, %cmp1
26+
br i1 %or.cond, label %if.then, label %if.else
27+
28+
if.then:
29+
tail call void @subroutine_foo()
30+
br label %if.end
31+
32+
if.else:
33+
tail call void @subroutine_bar()
34+
br label %if.end
35+
36+
if.end:
37+
ret void
38+
}
39+
40+
; Reference implementation that already generated optimal code.
41+
; This should continue to generate the same optimal code.
42+
define void @func_b(i32 noundef %x, i32 noundef %y) {
43+
; CHECK-LABEL: func_b:
44+
; CHECK: # %bb.0: # %entry
45+
; CHECK-NEXT: testl %edi, %edi
46+
; CHECK-NEXT: je subroutine_foo@PLT # TAILCALL
47+
; CHECK-NEXT: # %bb.1: # %if.else
48+
; CHECK-NEXT: testl %esi, %esi
49+
; CHECK-NEXT: je subroutine_foo@PLT # TAILCALL
50+
; CHECK-NEXT: # %bb.2: # %if.else3
51+
; CHECK-NEXT: jmp subroutine_bar@PLT # TAILCALL
52+
entry:
53+
%cmp = icmp eq i32 %x, 0
54+
br i1 %cmp, label %if.then, label %if.else
55+
56+
if.then:
57+
tail call void @subroutine_foo()
58+
br label %if.end4
59+
60+
if.else:
61+
%cmp1 = icmp eq i32 %y, 0
62+
br i1 %cmp1, label %if.then2, label %if.else3
63+
64+
if.then2:
65+
tail call void @subroutine_foo()
66+
br label %if.end4
67+
68+
if.else3:
69+
tail call void @subroutine_bar()
70+
br label %if.end4
71+
72+
if.end4:
73+
ret void
74+
}

0 commit comments

Comments
 (0)