Skip to content

Commit d8b5d15

Browse files
committed
fold "icmp eq (num + (val - 1)) & -val, num" to "icmp eq 0, (and num, val - 1)"
1 parent 90e8c8e commit d8b5d15

File tree

3 files changed

+155
-0
lines changed

3 files changed

+155
-0
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1320,6 +1320,67 @@ Instruction *InstCombinerImpl::foldICmpWithZero(ICmpInst &Cmp) {
13201320
return nullptr;
13211321
}
13221322

1323+
// Fold icmp eq (num + (val - 1)) & -val, num
1324+
// to
1325+
// icmp eq 0, (and num, val - 1)
1326+
// For value being power of two
1327+
Instruction *InstCombinerImpl::foldNextMultiply(ICmpInst &Cmp) {
1328+
Value *Op0 = Cmp.getOperand(0), *Op1 = Cmp.getOperand(1);
1329+
Value *Neg, *Add, *Num, *Mask, *Value;
1330+
CmpInst::Predicate Pred = Cmp.getPredicate();
1331+
const APInt *NegConst, *MaskConst, *NumCost;
1332+
1333+
if (Pred != ICmpInst::ICMP_EQ)
1334+
return nullptr;
1335+
1336+
// Match num + neg
1337+
if (!match(Op0, m_And(m_Value(Add), m_Value(Neg))))
1338+
return nullptr;
1339+
1340+
// Match num & mask
1341+
if (!match(Add, m_Add(m_Value(Num), m_Value(Mask))))
1342+
return nullptr;
1343+
1344+
// Check the constant case
1345+
if (match(Neg, m_APInt(NegConst)) && match(Mask, m_APInt(MaskConst))) {
1346+
// Mask + 1 should be a power-of-two
1347+
if (!(*MaskConst + 1).isPowerOf2())
1348+
return nullptr;
1349+
1350+
// Neg = -(Mask + 1)
1351+
if (*NegConst != -(*MaskConst + 1))
1352+
return nullptr;
1353+
} else {
1354+
// Match neg = sub 0, val
1355+
if (!match(Neg, m_Sub(m_Zero(), m_Value(Value))))
1356+
return nullptr;
1357+
1358+
// mask = %val - 1, which can be represented as sub %val, 1 or add %val, -1
1359+
if (!match(Mask, m_Add(m_Value(Value), m_AllOnes())) &&
1360+
!match(Mask, m_Sub(m_Value(Value), m_One())))
1361+
return nullptr;
1362+
1363+
// Value should be a known power-of-two.
1364+
if (!isKnownToBeAPowerOfTwo(Value, false, &Cmp))
1365+
return nullptr;
1366+
}
1367+
1368+
// Guard against weird special-case where Op1 gets optimized to constant. Leave it constant
1369+
// fonder.
1370+
if (match(Op1, m_APInt(NumCost)))
1371+
return nullptr;
1372+
1373+
if (!match(Op1, m_Value(Num)))
1374+
return nullptr;
1375+
1376+
// Create new icmp eq (num & (val - 1)), 0
1377+
auto NewAnd = Builder.CreateAnd(Num, Mask);
1378+
auto Zero = llvm::Constant::getNullValue(Num->getType());
1379+
auto ICmp = Builder.CreateICmp(CmpInst::ICMP_EQ, NewAnd, Zero);
1380+
1381+
return replaceInstUsesWith(Cmp, ICmp);
1382+
}
1383+
13231384
/// Fold icmp Pred X, C.
13241385
/// TODO: This code structure does not make sense. The saturating add fold
13251386
/// should be moved to some other helper and extended as noted below (it is also
@@ -7644,6 +7705,9 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
76447705
if (Instruction *Res = foldICmpUsingKnownBits(I))
76457706
return Res;
76467707

7708+
if (Instruction *Res = foldNextMultiply(I))
7709+
return Res;
7710+
76477711
// Test if the ICmpInst instruction is used exclusively by a select as
76487712
// part of a minimum or maximum operation. If so, refrain from doing
76497713
// any other folding. This helps out other analyses which understand

llvm/lib/Transforms/InstCombine/InstCombineInternal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -721,6 +721,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
721721
Instruction *foldICmpUsingKnownBits(ICmpInst &Cmp);
722722
Instruction *foldICmpWithDominatingICmp(ICmpInst &Cmp);
723723
Instruction *foldICmpWithConstant(ICmpInst &Cmp);
724+
Instruction *foldNextMultiply(ICmpInst &Cmp);
724725
Instruction *foldICmpUsingBoolRange(ICmpInst &I);
725726
Instruction *foldICmpInstWithConstant(ICmpInst &Cmp);
726727
Instruction *foldICmpInstWithConstantNotInt(ICmpInst &Cmp);

llvm/test/Transforms/InstCombine/icmp-add.ll

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3300,3 +3300,93 @@ entry:
33003300
%cmp = icmp ult i32 %add, 253
33013301
ret i1 %cmp
33023302
}
3303+
3304+
define i1 @val_is_aligend_sub(i32 %num, i32 %val) {
3305+
; CHECK-LABEL: @val_is_aligend_sub(
3306+
; CHECK-NEXT: [[TMP1:%.*]] = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 [[NUM:%.*]])
3307+
; CHECK-NEXT: [[POW:%.*]] = icmp eq i32 [[TMP1]], 1
3308+
; CHECK-NEXT: call void @llvm.assume(i1 [[POW]])
3309+
; CHECK-NEXT: [[NEG:%.*]] = add i32 [[NUM]], -1
3310+
; CHECK-NEXT: [[_2_SROA_0_0:%.*]] = and i32 [[NUM_BIASED:%.*]], [[NEG]]
3311+
; CHECK-NEXT: [[_0:%.*]] = icmp eq i32 [[_2_SROA_0_0]], 0
3312+
; CHECK-NEXT: ret i1 [[_0]]
3313+
;
3314+
%1 = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %val)
3315+
%pow = icmp eq i32 %1, 1
3316+
call void @llvm.assume(i1 %pow)
3317+
3318+
%mask = sub i32 %val, 1
3319+
%neg = sub nsw i32 0, %val
3320+
3321+
%num.biased = add i32 %num, %mask
3322+
%_2.sroa.0.0 = and i32 %num.biased, %neg
3323+
%_0 = icmp eq i32 %_2.sroa.0.0, %num
3324+
ret i1 %_0
3325+
}
3326+
3327+
define i1 @val_is_aligend_add(i32 %num, i32 %val) {
3328+
; CHECK-LABEL: @val_is_aligend_add(
3329+
; CHECK-NEXT: [[TMP1:%.*]] = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 [[NUM:%.*]])
3330+
; CHECK-NEXT: [[POW:%.*]] = icmp eq i32 [[TMP1]], 1
3331+
; CHECK-NEXT: call void @llvm.assume(i1 [[POW]])
3332+
; CHECK-NEXT: [[NEG:%.*]] = add i32 [[NUM]], -1
3333+
; CHECK-NEXT: [[_2_SROA_0_0:%.*]] = and i32 [[NUM_BIASED:%.*]], [[NEG]]
3334+
; CHECK-NEXT: [[_0:%.*]] = icmp eq i32 [[_2_SROA_0_0]], 0
3335+
; CHECK-NEXT: ret i1 [[_0]]
3336+
;
3337+
%1 = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %val)
3338+
%pow = icmp eq i32 %1, 1
3339+
call void @llvm.assume(i1 %pow)
3340+
3341+
%mask = add i32 %val, -1
3342+
%neg = sub nsw i32 0, %val
3343+
3344+
%num.biased = add i32 %num, %mask
3345+
%_2.sroa.0.0 = and i32 %num.biased, %neg
3346+
%_0 = icmp eq i32 %_2.sroa.0.0, %num
3347+
ret i1 %_0
3348+
}
3349+
3350+
define i1 @val_is_aligend_const_pow2(i32 %num) {
3351+
; CHECK-LABEL: @val_is_aligend_const_pow2(
3352+
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[NUM:%.*]], 4095
3353+
; CHECK-NEXT: [[_0:%.*]] = icmp eq i32 [[TMP1]], 0
3354+
; CHECK-NEXT: ret i1 [[_0]]
3355+
;
3356+
%num.biased = add i32 %num, 4095
3357+
%_2.sroa.0.0 = and i32 %num.biased, -4096
3358+
%_0 = icmp eq i32 %_2.sroa.0.0, %num
3359+
ret i1 %_0
3360+
}
3361+
3362+
; Should not work for non-power-of-two cases
3363+
define i1 @val_is_aligend_const_non-pow2(i32 %num) {
3364+
; CHECK-LABEL: @val_is_aligend_const_non-pow2(
3365+
; CHECK-NEXT: [[NUM_BIASED:%.*]] = add i32 [[NUM:%.*]], 6
3366+
; CHECK-NEXT: [[_2_SROA_0_0:%.*]] = and i32 [[NUM_BIASED]], -7
3367+
; CHECK-NEXT: [[_0:%.*]] = icmp eq i32 [[_2_SROA_0_0]], [[NUM]]
3368+
; CHECK-NEXT: ret i1 [[_0]]
3369+
;
3370+
%num.biased = add i32 %num, 6
3371+
%_2.sroa.0.0 = and i32 %num.biased, -7
3372+
%_0 = icmp eq i32 %_2.sroa.0.0, %num
3373+
ret i1 %_0
3374+
}
3375+
3376+
define i1 @val_is_aligend_non_pow(i32 %num, i32 %val) {
3377+
; CHECK-LABEL: @val_is_aligend_non_pow(
3378+
; CHECK-NEXT: [[MASK:%.*]] = add i32 [[VAL:%.*]], -1
3379+
; CHECK-NEXT: [[NEG:%.*]] = sub nsw i32 0, [[VAL]]
3380+
; CHECK-NEXT: [[NUM_BIASED:%.*]] = add i32 [[NUM:%.*]], [[MASK]]
3381+
; CHECK-NEXT: [[_2_SROA_0_0:%.*]] = and i32 [[NUM_BIASED]], [[NEG]]
3382+
; CHECK-NEXT: [[_0:%.*]] = icmp eq i32 [[_2_SROA_0_0]], [[NUM]]
3383+
; CHECK-NEXT: ret i1 [[_0]]
3384+
;
3385+
%mask = add i32 %val, -1
3386+
%neg = sub nsw i32 0, %val
3387+
3388+
%num.biased = add i32 %num, %mask
3389+
%_2.sroa.0.0 = and i32 %num.biased, %neg
3390+
%_0 = icmp eq i32 %_2.sroa.0.0, %num
3391+
ret i1 %_0
3392+
}

0 commit comments

Comments
 (0)