Skip to content

Commit 5ea1edc

Browse files
committed
Implement known-bits optimization.
1 parent 9b896fa commit 5ea1edc

File tree

2 files changed

+62
-13
lines changed

2 files changed

+62
-13
lines changed

llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,13 @@
1515
#include "llvm/Analysis/CmpInstAnalysis.h"
1616
#include "llvm/Analysis/FloatingPointPredicateUtils.h"
1717
#include "llvm/Analysis/InstructionSimplify.h"
18+
#include "llvm/Analysis/ValueTracking.h"
1819
#include "llvm/IR/ConstantRange.h"
1920
#include "llvm/IR/DerivedTypes.h"
2021
#include "llvm/IR/Instructions.h"
2122
#include "llvm/IR/Intrinsics.h"
2223
#include "llvm/IR/PatternMatch.h"
24+
#include "llvm/Support/KnownBits.h"
2325
#include "llvm/Transforms/InstCombine/InstCombiner.h"
2426
#include "llvm/Transforms/Utils/Local.h"
2527

@@ -3376,9 +3378,13 @@ Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
33763378
Value *LHS0 = LHS->getOperand(0), *RHS0 = RHS->getOperand(0);
33773379
Value *LHS1 = LHS->getOperand(1), *RHS1 = RHS->getOperand(1);
33783380

3381+
// dbgs() << "LHS0 = " << *LHS0 << "\nLHS1 = " << *LHS1 << '\n';
3382+
// dbgs() << "RHS0 = " << *RHS0 << "\nRHS1 = " << *RHS1 << '\n';
3383+
33793384
const APInt *LHSC = nullptr, *RHSC = nullptr;
33803385
match(LHS1, m_APInt(LHSC));
33813386
match(RHS1, m_APInt(RHSC));
3387+
// dbgs() << "LHSC = " << *LHSC << "\nRHSC = " << *RHSC << '\n';
33823388

33833389
// (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)
33843390
// (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B)
@@ -3575,6 +3581,40 @@ Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
35753581
return Builder.createIsFPClass(X, IsAnd ? FPClassTest::fcNormal
35763582
: ~FPClassTest::fcNormal);
35773583

3584+
if (!IsLogical && IsAnd) {
3585+
auto TryCandidate = [&](Value *X) -> Value * {
3586+
if (!X->getType()->isIntegerTy())
3587+
return nullptr;
3588+
3589+
Type *Ty = X->getType();
3590+
unsigned BitWidth = Ty->getScalarSizeInBits();
3591+
3592+
// KnownL and KnownR hold information deduced from the LHS icmp and RHS
3593+
// icmps, respectively
3594+
KnownBits KnownL(BitWidth), KnownR(BitWidth);
3595+
3596+
computeKnownBitsFromICmpCond(X, LHS, KnownL, Q, /*Invert=*/false);
3597+
computeKnownBitsFromICmpCond(X, RHS, KnownR, Q, /*Invert=*/false);
3598+
3599+
KnownBits Combined = KnownL.unionWith(KnownR);
3600+
3601+
// Avoid stomping on cases where one icmp alone determines X. Those are handled by more specific InstCombine folds.
3602+
if (KnownL.isConstant() || KnownR.isConstant())
3603+
return nullptr;
3604+
3605+
if (!Combined.isConstant())
3606+
return nullptr;
3607+
3608+
APInt ConstVal = Combined.getConstant();
3609+
return Builder.CreateICmpEQ(X, ConstantInt::get(Ty, ConstVal));
3610+
};
3611+
3612+
if (Value *Res = TryCandidate(LHS0))
3613+
return Res;
3614+
if (Value *Res = TryCandidate(RHS0))
3615+
return Res;
3616+
}
3617+
35783618
return foldAndOrOfICmpsUsingRanges(LHS, RHS, IsAnd);
35793619
}
35803620

llvm/test/Transforms/InstCombine/and-or-icmps.ll

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -702,9 +702,9 @@ define i1 @PR42691_10_logical(i32 %x) {
702702

703703
define i1 @substitute_constant_and_eq_eq(i8 %x, i8 %y) {
704704
; CHECK-LABEL: @substitute_constant_and_eq_eq(
705-
; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[X:%.*]], 42
706705
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[Y:%.*]], 42
707-
; CHECK-NEXT: [[R:%.*]] = and i1 [[C1]], [[TMP1]]
706+
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[Y1:%.*]], 42
707+
; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP1]], [[TMP2]]
708708
; CHECK-NEXT: ret i1 [[R]]
709709
;
710710
%c1 = icmp eq i8 %x, 42
@@ -728,9 +728,9 @@ define i1 @substitute_constant_and_eq_eq_logical(i8 %x, i8 %y) {
728728

729729
define i1 @substitute_constant_and_eq_eq_commute(i8 %x, i8 %y) {
730730
; CHECK-LABEL: @substitute_constant_and_eq_eq_commute(
731-
; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[X:%.*]], 42
732731
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[Y:%.*]], 42
733-
; CHECK-NEXT: [[R:%.*]] = and i1 [[C1]], [[TMP1]]
732+
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[Y1:%.*]], 42
733+
; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP1]], [[TMP2]]
734734
; CHECK-NEXT: ret i1 [[R]]
735735
;
736736
%c1 = icmp eq i8 %x, 42
@@ -741,9 +741,9 @@ define i1 @substitute_constant_and_eq_eq_commute(i8 %x, i8 %y) {
741741

742742
define i1 @substitute_constant_and_eq_eq_commute_logical(i8 %x, i8 %y) {
743743
; CHECK-LABEL: @substitute_constant_and_eq_eq_commute_logical(
744-
; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[X:%.*]], 42
745744
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[Y:%.*]], 42
746-
; CHECK-NEXT: [[R:%.*]] = and i1 [[C1]], [[TMP1]]
745+
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[Y1:%.*]], 42
746+
; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP1]], [[TMP2]]
747747
; CHECK-NEXT: ret i1 [[R]]
748748
;
749749
%c1 = icmp eq i8 %x, 42
@@ -1392,12 +1392,12 @@ define i1 @bitwise_and_bitwise_and_icmps(i8 %x, i8 %y, i8 %z) {
13921392

13931393
define i1 @bitwise_and_bitwise_and_icmps_comm1(i8 %x, i8 %y, i8 %z) {
13941394
; CHECK-LABEL: @bitwise_and_bitwise_and_icmps_comm1(
1395-
; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[Y:%.*]], 42
1395+
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i8 [[Y:%.*]], 42
13961396
; CHECK-NEXT: [[Z_SHIFT:%.*]] = shl nuw i8 1, [[Z:%.*]]
13971397
; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[Z_SHIFT]], 1
13981398
; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[X:%.*]], [[TMP1]]
1399-
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i8 [[TMP2]], [[TMP1]]
1400-
; CHECK-NEXT: [[AND2:%.*]] = and i1 [[C1]], [[TMP3]]
1399+
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i8 [[TMP2]], [[TMP1]]
1400+
; CHECK-NEXT: [[AND2:%.*]] = and i1 [[TMP3]], [[TMP4]]
14011401
; CHECK-NEXT: ret i1 [[AND2]]
14021402
;
14031403
%c1 = icmp eq i8 %y, 42
@@ -3725,10 +3725,7 @@ define i1 @merge_range_check_or(i8 %a) {
37253725
; Just a very complicated way of checking if v1 == 0.
37263726
define i1 @complicated_zero_equality_test(i64 %v1) {
37273727
; CHECK-LABEL: @complicated_zero_equality_test(
3728-
; CHECK-NEXT: [[V2:%.*]] = trunc i64 [[V1:%.*]] to i32
3729-
; CHECK-NEXT: [[V3:%.*]] = icmp eq i32 [[V2]], 0
3730-
; CHECK-NEXT: [[V4:%.*]] = icmp ult i64 [[V1]], 4294967296
3731-
; CHECK-NEXT: [[V5:%.*]] = and i1 [[V4]], [[V3]]
3728+
; CHECK-NEXT: [[V5:%.*]] = icmp eq i64 [[V1:%.*]], 0
37323729
; CHECK-NEXT: ret i1 [[V5]]
37333730
;
37343731
%v2 = trunc i64 %v1 to i32
@@ -3737,3 +3734,15 @@ define i1 @complicated_zero_equality_test(i64 %v1) {
37373734
%v5 = and i1 %v4, %v3
37383735
ret i1 %v5
37393736
}
3737+
3738+
define i1 @commuted_complicated_zero_equality_test(i64 %v1) {
3739+
; CHECK-LABEL: @commuted_complicated_zero_equality_test(
3740+
; CHECK-NEXT: [[V5:%.*]] = icmp eq i64 [[V1:%.*]], 0
3741+
; CHECK-NEXT: ret i1 [[V5]]
3742+
;
3743+
%v2 = trunc i64 %v1 to i32
3744+
%v3 = icmp ult i64 %v1, 4294967296 ; 2 ^ 32
3745+
%v4 = icmp eq i32 %v2, 0
3746+
%v5 = and i1 %v4, %v3
3747+
ret i1 %v5
3748+
}

0 commit comments

Comments
 (0)