Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3625,6 +3625,24 @@ X86TargetLowering::getJumpConditionMergingParams(Instruction::BinaryOps Opc,
match(Lhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value())) &&
match(Rhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value())))
BaseCost += 1;

// For OR conditions with EQ comparisons, prefer splitting into branches
// (unless CCMP is available). OR+EQ cannot be optimized via bitwise ops,
// unlike OR+NE which becomes (P|Q)!=0. Similarly, don't split signed
// comparisons (SLT, SGT) that can be optimized.
if (BaseCost >= 0 && !Subtarget.hasCCMP() && Opc == Instruction::Or) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we use a match here like the previous check?

if (BaseCost >= 0 && !Subtarget.hasCCMP() && Opc == Instruction::Or &&
      match(Lhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value())) &&
      match(Rhs, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(), m_Value())))
    return {-1,-1,-1};

auto *LCmp = dyn_cast<ICmpInst>(Lhs);
auto *RCmp = dyn_cast<ICmpInst>(Rhs);
if (LCmp && RCmp) {
ICmpInst::Predicate LPred = LCmp->getPredicate();
ICmpInst::Predicate RPred = RCmp->getPredicate();
// Split OR+EQ patterns as they don't have clever optimizations
if (LPred == ICmpInst::ICMP_EQ && RPred == ICmpInst::ICMP_EQ) {
return {-1, -1, -1};
}
}
}

return {BaseCost, BrMergingLikelyBias.getValue(),
BrMergingUnlikelyBias.getValue()};
}
Expand Down
109 changes: 53 additions & 56 deletions llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi
; CHECK-NEXT: callq __ubyte_convert_to_ctype
; CHECK-NEXT: testl %eax, %eax
; CHECK-NEXT: js LBB0_6
; CHECK-NEXT: js LBB0_4
; CHECK-NEXT: ## %bb.1: ## %cond_next.i
; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi
; CHECK-NEXT: movq %rbx, %rdi
Expand All @@ -53,84 +53,81 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
; CHECK-NEXT: sarl $31, %ecx
; CHECK-NEXT: andl %eax, %ecx
; CHECK-NEXT: cmpl $-2, %ecx
; CHECK-NEXT: je LBB0_10
; CHECK-NEXT: je LBB0_8
; CHECK-NEXT: ## %bb.2: ## %cond_next.i
; CHECK-NEXT: cmpl $-1, %ecx
; CHECK-NEXT: jne LBB0_3
; CHECK-NEXT: LBB0_8: ## %bb4
; CHECK-NEXT: jne LBB0_6
; CHECK-NEXT: LBB0_3: ## %bb4
; CHECK-NEXT: movq _PyArray_API@GOTPCREL(%rip), %rax
; CHECK-NEXT: movq (%rax), %rax
; CHECK-NEXT: movq 16(%rax), %rax
; CHECK-NEXT: jmp LBB0_9
; CHECK-NEXT: LBB0_6: ## %_ubyte_convert2_to_ctypes.exit
; CHECK-NEXT: jmp LBB0_10
; CHECK-NEXT: LBB0_4: ## %_ubyte_convert2_to_ctypes.exit
; CHECK-NEXT: cmpl $-2, %eax
; CHECK-NEXT: je LBB0_10
; CHECK-NEXT: ## %bb.7: ## %_ubyte_convert2_to_ctypes.exit
; CHECK-NEXT: cmpl $-1, %eax
; CHECK-NEXT: je LBB0_8
; CHECK-NEXT: LBB0_3: ## %bb35
; CHECK-NEXT: ## %bb.5: ## %_ubyte_convert2_to_ctypes.exit
; CHECK-NEXT: cmpl $-1, %eax
; CHECK-NEXT: je LBB0_3
; CHECK-NEXT: LBB0_6: ## %bb35
; CHECK-NEXT: movq _PyUFunc_API@GOTPCREL(%rip), %r14
; CHECK-NEXT: movq (%r14), %rax
; CHECK-NEXT: callq *216(%rax)
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx
; CHECK-NEXT: testb %dl, %dl
; CHECK-NEXT: je LBB0_4
; CHECK-NEXT: ## %bb.12: ## %cond_false.i
; CHECK-NEXT: setne %dil
; CHECK-NEXT: je LBB0_11
; CHECK-NEXT: ## %bb.7: ## %cond_false.i
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %esi
; CHECK-NEXT: movzbl %sil, %ecx
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: divb %dl
; CHECK-NEXT: movl %eax, %r15d
; CHECK-NEXT: testb %cl, %cl
; CHECK-NEXT: setne %al
; CHECK-NEXT: testb %dil, %al
; CHECK-NEXT: jne LBB0_5
; CHECK-NEXT: LBB0_13: ## %cond_true.i200
; CHECK-NEXT: testb %dl, %dl
; CHECK-NEXT: jne LBB0_15
; CHECK-NEXT: ## %bb.14: ## %cond_true14.i
; CHECK-NEXT: movl $4, %edi
; CHECK-NEXT: callq _feraiseexcept
; CHECK-NEXT: LBB0_15: ## %ubyte_ctype_remainder.exit
; CHECK-NEXT: xorl %ebx, %ebx
; CHECK-NEXT: jmp LBB0_16
; CHECK-NEXT: LBB0_10: ## %bb17
; CHECK-NEXT: jne LBB0_12
; CHECK-NEXT: jmp LBB0_14
; CHECK-NEXT: LBB0_8: ## %bb17
; CHECK-NEXT: callq _PyErr_Occurred
; CHECK-NEXT: testq %rax, %rax
; CHECK-NEXT: jne LBB0_23
; CHECK-NEXT: ## %bb.11: ## %cond_next
; CHECK-NEXT: jne LBB0_27
; CHECK-NEXT: ## %bb.9: ## %cond_next
; CHECK-NEXT: movq _PyArray_API@GOTPCREL(%rip), %rax
; CHECK-NEXT: movq (%rax), %rax
; CHECK-NEXT: movq 80(%rax), %rax
; CHECK-NEXT: LBB0_9: ## %bb4
; CHECK-NEXT: LBB0_10: ## %bb4
; CHECK-NEXT: movq 96(%rax), %rax
; CHECK-NEXT: movq %r14, %rdi
; CHECK-NEXT: movq %rbx, %rsi
; CHECK-NEXT: callq *40(%rax)
; CHECK-NEXT: jmp LBB0_24
; CHECK-NEXT: LBB0_4: ## %cond_true.i
; CHECK-NEXT: jmp LBB0_28
; CHECK-NEXT: LBB0_11: ## %cond_true.i
; CHECK-NEXT: movl $4, %edi
; CHECK-NEXT: callq _feraiseexcept
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %esi
; CHECK-NEXT: xorl %r15d, %r15d
; CHECK-NEXT: testb %sil, %sil
; CHECK-NEXT: sete %al
; CHECK-NEXT: je LBB0_14
; CHECK-NEXT: LBB0_12: ## %cond_false.i
; CHECK-NEXT: testb %dl, %dl
; CHECK-NEXT: sete %cl
; CHECK-NEXT: xorl %r15d, %r15d
; CHECK-NEXT: orb %al, %cl
; CHECK-NEXT: jne LBB0_13
; CHECK-NEXT: LBB0_5: ## %cond_next17.i
; CHECK-NEXT: je LBB0_14
; CHECK-NEXT: ## %bb.13: ## %cond_next17.i
; CHECK-NEXT: movzbl %sil, %eax
; CHECK-NEXT: divb %dl
; CHECK-NEXT: movzbl %ah, %ebx
; CHECK-NEXT: LBB0_16: ## %ubyte_ctype_remainder.exit
; CHECK-NEXT: jmp LBB0_18
; CHECK-NEXT: LBB0_14: ## %cond_true.i200
; CHECK-NEXT: testb %dl, %dl
; CHECK-NEXT: jne LBB0_17
; CHECK-NEXT: ## %bb.16: ## %cond_true14.i
; CHECK-NEXT: movl $4, %edi
; CHECK-NEXT: callq _feraiseexcept
; CHECK-NEXT: LBB0_17: ## %ubyte_ctype_remainder.exit
; CHECK-NEXT: xorl %ebx, %ebx
; CHECK-NEXT: LBB0_18: ## %ubyte_ctype_remainder.exit
; CHECK-NEXT: movq (%r14), %rax
; CHECK-NEXT: callq *224(%rax)
; CHECK-NEXT: testl %eax, %eax
; CHECK-NEXT: je LBB0_19
; CHECK-NEXT: ## %bb.17: ## %cond_true61
; CHECK-NEXT: je LBB0_21
; CHECK-NEXT: ## %bb.19: ## %cond_true61
; CHECK-NEXT: movl %eax, %ebp
; CHECK-NEXT: movq (%r14), %rax
; CHECK-NEXT: movq _.str5@GOTPCREL(%rip), %rdi
Expand All @@ -139,8 +136,8 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
; CHECK-NEXT: callq *200(%rax)
; CHECK-NEXT: testl %eax, %eax
; CHECK-NEXT: js LBB0_23
; CHECK-NEXT: ## %bb.18: ## %cond_next73
; CHECK-NEXT: js LBB0_27
; CHECK-NEXT: ## %bb.20: ## %cond_next73
; CHECK-NEXT: movl $1, {{[0-9]+}}(%rsp)
; CHECK-NEXT: movq (%r14), %rax
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rsi
Expand All @@ -149,45 +146,45 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) {
; CHECK-NEXT: movl %ebp, %edx
; CHECK-NEXT: callq *232(%rax)
; CHECK-NEXT: testl %eax, %eax
; CHECK-NEXT: jne LBB0_23
; CHECK-NEXT: LBB0_19: ## %cond_next89
; CHECK-NEXT: jne LBB0_27
; CHECK-NEXT: LBB0_21: ## %cond_next89
; CHECK-NEXT: movl $2, %edi
; CHECK-NEXT: callq _PyTuple_New
; CHECK-NEXT: testq %rax, %rax
; CHECK-NEXT: je LBB0_23
; CHECK-NEXT: ## %bb.20: ## %cond_next97
; CHECK-NEXT: je LBB0_27
; CHECK-NEXT: ## %bb.22: ## %cond_next97
; CHECK-NEXT: movq %rax, %r14
; CHECK-NEXT: movq _PyArray_API@GOTPCREL(%rip), %r12
; CHECK-NEXT: movq (%r12), %rax
; CHECK-NEXT: movq 200(%rax), %rdi
; CHECK-NEXT: xorl %esi, %esi
; CHECK-NEXT: callq *304(%rdi)
; CHECK-NEXT: testq %rax, %rax
; CHECK-NEXT: je LBB0_21
; CHECK-NEXT: ## %bb.25: ## %cond_next135
; CHECK-NEXT: je LBB0_25
; CHECK-NEXT: ## %bb.23: ## %cond_next135
; CHECK-NEXT: movb %r15b, 16(%rax)
; CHECK-NEXT: movq %rax, 24(%r14)
; CHECK-NEXT: movq (%r12), %rax
; CHECK-NEXT: movq 200(%rax), %rdi
; CHECK-NEXT: xorl %esi, %esi
; CHECK-NEXT: callq *304(%rdi)
; CHECK-NEXT: testq %rax, %rax
; CHECK-NEXT: je LBB0_21
; CHECK-NEXT: ## %bb.26: ## %cond_next182
; CHECK-NEXT: je LBB0_25
; CHECK-NEXT: ## %bb.24: ## %cond_next182
; CHECK-NEXT: movb %bl, 16(%rax)
; CHECK-NEXT: movq %rax, 32(%r14)
; CHECK-NEXT: movq %r14, %rax
; CHECK-NEXT: jmp LBB0_24
; CHECK-NEXT: LBB0_21: ## %cond_true113
; CHECK-NEXT: jmp LBB0_28
; CHECK-NEXT: LBB0_25: ## %cond_true113
; CHECK-NEXT: decq (%r14)
; CHECK-NEXT: jne LBB0_23
; CHECK-NEXT: ## %bb.22: ## %cond_true126
; CHECK-NEXT: jne LBB0_27
; CHECK-NEXT: ## %bb.26: ## %cond_true126
; CHECK-NEXT: movq 8(%r14), %rax
; CHECK-NEXT: movq %r14, %rdi
; CHECK-NEXT: callq *48(%rax)
; CHECK-NEXT: LBB0_23: ## %UnifiedReturnBlock
; CHECK-NEXT: LBB0_27: ## %UnifiedReturnBlock
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: LBB0_24: ## %UnifiedReturnBlock
; CHECK-NEXT: LBB0_28: ## %UnifiedReturnBlock
; CHECK-NEXT: addq $32, %rsp
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: popq %r12
Expand Down
19 changes: 19 additions & 0 deletions llvm/test/CodeGen/X86/issue-160612.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
extern void subroutine_foo(void);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be deleted?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if you want to include these as references - best add them as comments above their respective IR implementation in the ll file

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done -- deleted

extern void subroutine_bar(void);

void func_a(int x, int y) {
if (x == 0 || y == 0)
subroutine_foo();
else
subroutine_bar();
}

void func_b(int x, int y) {
if (x == 0)
subroutine_foo();
else if (y == 0)
subroutine_foo();
else
subroutine_bar();
}

74 changes: 74 additions & 0 deletions llvm/test/CodeGen/X86/issue-160612.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -O2 | FileCheck %s

; Test for issue #160612: OR conditions in branches should use multiple branches
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(style) rename issue-160612.ll -> pr160612.ll (pr stands for problem report not pull request)

; instead of materializing booleans with SETCC when no special optimizations apply.

declare void @subroutine_foo()
declare void @subroutine_bar()

; Original issue: (x == 0 || y == 0) was generating SETCC + TEST + BRANCH
; instead of using two conditional branches directly.
define void @func_a(i32 noundef %x, i32 noundef %y) {
; CHECK-LABEL: func_a:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: je subroutine_foo@PLT # TAILCALL
; CHECK-NEXT: # %bb.1: # %entry
; CHECK-NEXT: testl %esi, %esi
; CHECK-NEXT: jne subroutine_bar@PLT # TAILCALL
; CHECK-NEXT: # %bb.2: # %if.then
; CHECK-NEXT: jmp subroutine_foo@PLT # TAILCALL
entry:
%cmp = icmp eq i32 %x, 0
%cmp1 = icmp eq i32 %y, 0
%or.cond = or i1 %cmp, %cmp1
br i1 %or.cond, label %if.then, label %if.else

if.then:
tail call void @subroutine_foo()
br label %if.end

if.else:
tail call void @subroutine_bar()
br label %if.end

if.end:
ret void
}

; Reference implementation that already generated optimal code.
; This should continue to generate the same optimal code.
define void @func_b(i32 noundef %x, i32 noundef %y) {
; CHECK-LABEL: func_b:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: je subroutine_foo@PLT # TAILCALL
; CHECK-NEXT: # %bb.1: # %if.else
; CHECK-NEXT: testl %esi, %esi
; CHECK-NEXT: je subroutine_foo@PLT # TAILCALL
; CHECK-NEXT: # %bb.2: # %if.else3
; CHECK-NEXT: jmp subroutine_bar@PLT # TAILCALL
entry:
%cmp = icmp eq i32 %x, 0
br i1 %cmp, label %if.then, label %if.else

if.then:
tail call void @subroutine_foo()
br label %if.end4

if.else:
%cmp1 = icmp eq i32 %y, 0
br i1 %cmp1, label %if.then2, label %if.else3

if.then2:
tail call void @subroutine_foo()
br label %if.end4

if.else3:
tail call void @subroutine_bar()
br label %if.end4

if.end4:
ret void
}
Loading
Loading