Skip to content

Commit 0eff6c6

Browse files
committed
[InstCombine] add vector support for (A >> C) == (B >> C) --> (A^B) u< (1 << C)
Reviewed By: spatel, RKSimon Differential Revision: https://reviews.llvm.org/D127398
1 parent 8780630 commit 0eff6c6

File tree

2 files changed

+16
-17
lines changed

2 files changed

+16
-17
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4620,18 +4620,21 @@ Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) {
46204620

46214621
// (A >> C) == (B >> C) --> (A^B) u< (1 << C)
46224622
// For lshr and ashr pairs.
4623-
if ((match(Op0, m_OneUse(m_LShr(m_Value(A), m_ConstantInt(Cst1)))) &&
4624-
match(Op1, m_OneUse(m_LShr(m_Value(B), m_Specific(Cst1))))) ||
4625-
(match(Op0, m_OneUse(m_AShr(m_Value(A), m_ConstantInt(Cst1)))) &&
4626-
match(Op1, m_OneUse(m_AShr(m_Value(B), m_Specific(Cst1)))))) {
4627-
unsigned TypeBits = Cst1->getBitWidth();
4628-
unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits);
4623+
const APInt *AP1, *AP2;
4624+
if ((match(Op0, m_OneUse(m_LShr(m_Value(A), m_APIntAllowUndef(AP1)))) &&
4625+
match(Op1, m_OneUse(m_LShr(m_Value(B), m_APIntAllowUndef(AP2))))) ||
4626+
(match(Op0, m_OneUse(m_AShr(m_Value(A), m_APIntAllowUndef(AP1)))) &&
4627+
match(Op1, m_OneUse(m_AShr(m_Value(B), m_APIntAllowUndef(AP2)))))) {
4628+
if (AP1 != AP2)
4629+
return nullptr;
4630+
unsigned TypeBits = AP1->getBitWidth();
4631+
unsigned ShAmt = AP1->getLimitedValue(TypeBits);
46294632
if (ShAmt < TypeBits && ShAmt != 0) {
46304633
ICmpInst::Predicate NewPred =
46314634
Pred == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
46324635
Value *Xor = Builder.CreateXor(A, B, I.getName() + ".unshifted");
46334636
APInt CmpVal = APInt::getOneBitSet(TypeBits, ShAmt);
4634-
return new ICmpInst(NewPred, Xor, Builder.getInt(CmpVal));
4637+
return new ICmpInst(NewPred, Xor, ConstantInt::get(A->getType(), CmpVal));
46354638
}
46364639
}
46374640

llvm/test/Transforms/InstCombine/compare-signs.ll

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,10 @@ define i32 @test3(i32 %a, i32 %b) nounwind readnone {
4747
ret i32 %t3
4848
}
4949

50-
; TODO this should optimize but doesn't due to missing vector support in InstCombiner::foldICmpEquality.
5150
define <2 x i32> @test3vec(<2 x i32> %a, <2 x i32> %b) nounwind readnone {
5251
; CHECK-LABEL: @test3vec(
53-
; CHECK-NEXT: [[T0:%.*]] = lshr <2 x i32> [[A:%.*]], <i32 31, i32 31>
54-
; CHECK-NEXT: [[T1:%.*]] = lshr <2 x i32> [[B:%.*]], <i32 31, i32 31>
55-
; CHECK-NEXT: [[T2:%.*]] = icmp eq <2 x i32> [[T0]], [[T1]]
52+
; CHECK-NEXT: [[T2_UNSHIFTED:%.*]] = xor <2 x i32> [[A:%.*]], [[B:%.*]]
53+
; CHECK-NEXT: [[T2:%.*]] = icmp sgt <2 x i32> [[T2_UNSHIFTED]], <i32 -1, i32 -1>
5654
; CHECK-NEXT: [[T3:%.*]] = zext <2 x i1> [[T2]] to <2 x i32>
5755
; CHECK-NEXT: ret <2 x i32> [[T3]]
5856
;
@@ -65,9 +63,8 @@ define <2 x i32> @test3vec(<2 x i32> %a, <2 x i32> %b) nounwind readnone {
6563

6664
define <2 x i32> @test3vec_undef1(<2 x i32> %a, <2 x i32> %b) nounwind readnone {
6765
; CHECK-LABEL: @test3vec_undef1(
68-
; CHECK-NEXT: [[T0:%.*]] = lshr <2 x i32> [[A:%.*]], <i32 24, i32 undef>
69-
; CHECK-NEXT: [[T1:%.*]] = lshr <2 x i32> [[B:%.*]], <i32 24, i32 24>
70-
; CHECK-NEXT: [[T2:%.*]] = icmp eq <2 x i32> [[T0]], [[T1]]
66+
; CHECK-NEXT: [[T2_UNSHIFTED:%.*]] = xor <2 x i32> [[A:%.*]], [[B:%.*]]
67+
; CHECK-NEXT: [[T2:%.*]] = icmp ult <2 x i32> [[T2_UNSHIFTED]], <i32 16777216, i32 16777216>
7168
; CHECK-NEXT: [[T3:%.*]] = zext <2 x i1> [[T2]] to <2 x i32>
7269
; CHECK-NEXT: ret <2 x i32> [[T3]]
7370
;
@@ -80,9 +77,8 @@ define <2 x i32> @test3vec_undef1(<2 x i32> %a, <2 x i32> %b) nounwind readnone
8077

8178
define <2 x i32> @test3vec_undef2(<2 x i32> %a, <2 x i32> %b) nounwind readnone {
8279
; CHECK-LABEL: @test3vec_undef2(
83-
; CHECK-NEXT: [[T0:%.*]] = lshr <2 x i32> [[A:%.*]], <i32 undef, i32 17>
84-
; CHECK-NEXT: [[T1:%.*]] = lshr <2 x i32> [[B:%.*]], <i32 undef, i32 17>
85-
; CHECK-NEXT: [[T2:%.*]] = icmp eq <2 x i32> [[T0]], [[T1]]
80+
; CHECK-NEXT: [[T2_UNSHIFTED:%.*]] = xor <2 x i32> [[A:%.*]], [[B:%.*]]
81+
; CHECK-NEXT: [[T2:%.*]] = icmp ult <2 x i32> [[T2_UNSHIFTED]], <i32 131072, i32 131072>
8682
; CHECK-NEXT: [[T3:%.*]] = zext <2 x i1> [[T2]] to <2 x i32>
8783
; CHECK-NEXT: ret <2 x i32> [[T3]]
8884
;

0 commit comments

Comments
 (0)