Skip to content

Commit 3014422

Browse files
authored
[llvm] [InstCombine] fold "icmp eq (X + (V - 1)) & -V, X" to "icmp eq (and X, V - 1), 0" (#152851)
This fold optimizes ```llvm define i1 @src(i32 %num, i32 %val) { %mask = add i32 %val, -1 %neg = sub nsw i32 0, %val %num.biased = add i32 %num, %mask %_2.sroa.0.0 = and i32 %num.biased, %neg %_0 = icmp eq i32 %_2.sroa.0.0, %num ret i1 %_0 } ``` to ```llvm define i1 @tgt(i32 %num, i32 %val) { %mask = add i32 %val, -1 %tmp = and i32 %num, %mask %ret = icmp eq i32 %tmp, 0 ret i1 %ret } ``` For power-of-two `val`. Observed in real life for following code ```rust pub fn is_aligned(num: usize) -> bool { num.next_multiple_of(1 << 12) == num } ``` which verifies that num is aligned to 4096. Alive2 proof https://alive2.llvm.org/ce/z/QisECm
1 parent f92afe7 commit 3014422

File tree

3 files changed

+179
-0
lines changed

3 files changed

+179
-0
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1320,6 +1320,35 @@ Instruction *InstCombinerImpl::foldICmpWithZero(ICmpInst &Cmp) {
13201320
return nullptr;
13211321
}
13221322

1323+
/// Fold icmp eq (num + mask) & ~mask, num
1324+
/// to
1325+
/// icmp eq (and num, mask), 0
1326+
/// Where mask is a low bit mask.
1327+
Instruction *InstCombinerImpl::foldIsMultipleOfAPowerOfTwo(ICmpInst &Cmp) {
1328+
Value *Num;
1329+
CmpPredicate Pred;
1330+
const APInt *Mask, *Neg;
1331+
1332+
if (!match(&Cmp,
1333+
m_c_ICmp(Pred, m_Value(Num),
1334+
m_OneUse(m_c_And(m_OneUse(m_c_Add(m_Deferred(Num),
1335+
m_LowBitMask(Mask))),
1336+
m_APInt(Neg))))))
1337+
return nullptr;
1338+
1339+
if (*Neg != ~*Mask)
1340+
return nullptr;
1341+
1342+
if (!ICmpInst::isEquality(Pred))
1343+
return nullptr;
1344+
1345+
// Create new icmp eq (num & mask), 0
1346+
auto *NewAnd = Builder.CreateAnd(Num, *Mask);
1347+
auto *Zero = Constant::getNullValue(Num->getType());
1348+
1349+
return new ICmpInst(Pred, NewAnd, Zero);
1350+
}
1351+
13231352
/// Fold icmp Pred X, C.
13241353
/// TODO: This code structure does not make sense. The saturating add fold
13251354
/// should be moved to some other helper and extended as noted below (it is also
@@ -7644,6 +7673,9 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
76447673
if (Instruction *Res = foldICmpUsingKnownBits(I))
76457674
return Res;
76467675

7676+
if (Instruction *Res = foldIsMultipleOfAPowerOfTwo(I))
7677+
return Res;
7678+
76477679
// Test if the ICmpInst instruction is used exclusively by a select as
76487680
// part of a minimum or maximum operation. If so, refrain from doing
76497681
// any other folding. This helps out other analyses which understand

llvm/lib/Transforms/InstCombine/InstCombineInternal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -721,6 +721,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
721721
Instruction *foldICmpUsingKnownBits(ICmpInst &Cmp);
722722
Instruction *foldICmpWithDominatingICmp(ICmpInst &Cmp);
723723
Instruction *foldICmpWithConstant(ICmpInst &Cmp);
724+
Instruction *foldIsMultipleOfAPowerOfTwo(ICmpInst &Cmp);
724725
Instruction *foldICmpUsingBoolRange(ICmpInst &I);
725726
Instruction *foldICmpInstWithConstant(ICmpInst &Cmp);
726727
Instruction *foldICmpInstWithConstantNotInt(ICmpInst &Cmp);

llvm/test/Transforms/InstCombine/icmp-add.ll

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3300,3 +3300,149 @@ entry:
33003300
%cmp = icmp ult i32 %add, 253
33013301
ret i1 %cmp
33023302
}
3303+
3304+
; PR 152851
3305+
3306+
define i1 @val_is_aligend_const_pow2(i32 %num) {
3307+
; CHECK-LABEL: @val_is_aligend_const_pow2(
3308+
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[NUM:%.*]], 4095
3309+
; CHECK-NEXT: [[_0:%.*]] = icmp eq i32 [[TMP1]], 0
3310+
; CHECK-NEXT: ret i1 [[_0]]
3311+
;
3312+
%num.biased = add i32 %num, 4095
3313+
%num.masked = and i32 %num.biased, -4096
3314+
%_0 = icmp eq i32 %num.masked, %num
3315+
ret i1 %_0
3316+
}
3317+
3318+
define i1 @val_is_aligend_const_pow2_add_commute(i32 %num) {
3319+
; CHECK-LABEL: @val_is_aligend_const_pow2_add_commute(
3320+
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[NUM:%.*]], 4095
3321+
; CHECK-NEXT: [[_0:%.*]] = icmp eq i32 [[TMP1]], 0
3322+
; CHECK-NEXT: ret i1 [[_0]]
3323+
;
3324+
%num.biased = add i32 4095, %num
3325+
%num.masked = and i32 %num.biased, -4096
3326+
%_0 = icmp eq i32 %num.masked, %num
3327+
ret i1 %_0
3328+
}
3329+
3330+
define i1 @val_is_aligend_const_pow2_and_commute(i32 %num) {
3331+
; CHECK-LABEL: @val_is_aligend_const_pow2_and_commute(
3332+
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[NUM:%.*]], 4095
3333+
; CHECK-NEXT: [[_0:%.*]] = icmp eq i32 [[TMP1]], 0
3334+
; CHECK-NEXT: ret i1 [[_0]]
3335+
;
3336+
%num.biased = add i32 %num, 4095
3337+
%num.masked = and i32 -4096, %num.biased
3338+
%_0 = icmp eq i32 %num.masked, %num
3339+
ret i1 %_0
3340+
}
3341+
3342+
define i1 @val_is_aligend_const_pow2_icm_commute(i32 %num) {
3343+
; CHECK-LABEL: @val_is_aligend_const_pow2_icm_commute(
3344+
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[NUM:%.*]], 4095
3345+
; CHECK-NEXT: [[_0:%.*]] = icmp eq i32 [[TMP1]], 0
3346+
; CHECK-NEXT: ret i1 [[_0]]
3347+
;
3348+
%num.biased = add i32 %num, 4095
3349+
%num.masked = and i32 %num.biased, -4096
3350+
%_0 = icmp eq i32 %num, %num.masked
3351+
ret i1 %_0
3352+
}
3353+
3354+
; Should not work for non-power-of-two cases
3355+
define i1 @val_is_aligend_const_non_pow2(i32 %num) {
3356+
; CHECK-LABEL: @val_is_aligend_const_non_pow2(
3357+
; CHECK-NEXT: [[NUM_BIASED:%.*]] = add i32 [[NUM:%.*]], 6
3358+
; CHECK-NEXT: [[NUM_MASKED:%.*]] = and i32 [[NUM_BIASED]], -7
3359+
; CHECK-NEXT: [[_0:%.*]] = icmp eq i32 [[NUM_MASKED]], [[NUM]]
3360+
; CHECK-NEXT: ret i1 [[_0]]
3361+
;
3362+
%num.biased = add i32 %num, 6
3363+
%num.masked = and i32 %num.biased, -7
3364+
%_0 = icmp eq i32 %num.masked, %num
3365+
ret i1 %_0
3366+
}
3367+
3368+
define i1 @val_is_aligend_const_pow2_multiuse(i32 %num) {
3369+
; CHECK-LABEL: @val_is_aligend_const_pow2_multiuse(
3370+
; CHECK-NEXT: [[NUM_BIASED:%.*]] = add i32 [[NUM:%.*]], 4095
3371+
; CHECK-NEXT: [[NUM_MASKED:%.*]] = and i32 [[NUM_BIASED]], -4096
3372+
; CHECK-NEXT: call void @use(i32 [[NUM_MASKED]])
3373+
; CHECK-NEXT: [[_0:%.*]] = icmp eq i32 [[NUM_MASKED]], [[NUM]]
3374+
; CHECK-NEXT: ret i1 [[_0]]
3375+
;
3376+
%num.biased = add i32 %num, 4095
3377+
%num.masked = and i32 %num.biased, -4096
3378+
call void @use(i32 %num.masked)
3379+
%_0 = icmp eq i32 %num.masked, %num
3380+
ret i1 %_0
3381+
}
3382+
3383+
; Applies since number of instructions do not change
3384+
define i1 @val_is_aligend_const_pow2_multiuse1(i32 %num) {
3385+
; CHECK-LABEL: @val_is_aligend_const_pow2_multiuse1(
3386+
; CHECK-NEXT: [[NUM_BIASED:%.*]] = add i32 [[NUM:%.*]], 4095
3387+
; CHECK-NEXT: call void @use(i32 [[NUM_BIASED]])
3388+
; CHECK-NEXT: [[NUM_MASKED:%.*]] = and i32 [[NUM_BIASED]], -4096
3389+
; CHECK-NEXT: [[_0:%.*]] = icmp eq i32 [[NUM_MASKED]], [[NUM]]
3390+
; CHECK-NEXT: ret i1 [[_0]]
3391+
;
3392+
%num.biased = add i32 %num, 4095
3393+
call void @use(i32 %num.biased)
3394+
%num.masked = and i32 %num.biased, -4096
3395+
%_0 = icmp eq i32 %num.masked, %num
3396+
ret i1 %_0
3397+
}
3398+
3399+
define i1 @val_is_aligend_const_pow2_ne(i32 %num) {
3400+
; CHECK-LABEL: @val_is_aligend_const_pow2_ne(
3401+
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[NUM:%.*]], 4095
3402+
; CHECK-NEXT: [[_0:%.*]] = icmp ne i32 [[TMP1]], 0
3403+
; CHECK-NEXT: ret i1 [[_0]]
3404+
;
3405+
%num.biased = add i32 %num, 4095
3406+
%num.masked = and i32 %num.biased, -4096
3407+
%_0 = icmp ne i32 %num.masked, %num
3408+
ret i1 %_0
3409+
}
3410+
3411+
define i1 @val_is_aligend_const_mismatch(i32 %num) {
3412+
; CHECK-LABEL: @val_is_aligend_const_mismatch(
3413+
; CHECK-NEXT: [[NUM_BIASED:%.*]] = add i32 [[NUM:%.*]], 4095
3414+
; CHECK-NEXT: [[NUM_MASKED:%.*]] = and i32 [[NUM_BIASED]], -4095
3415+
; CHECK-NEXT: [[_0:%.*]] = icmp ne i32 [[NUM_MASKED]], [[NUM]]
3416+
; CHECK-NEXT: ret i1 [[_0]]
3417+
;
3418+
%num.biased = add i32 %num, 4095
3419+
%num.masked = and i32 %num.biased, -4095
3420+
%_0 = icmp ne i32 %num.masked, %num
3421+
ret i1 %_0
3422+
}
3423+
3424+
define i1 @val_is_aligend_const_mismatch1(i32 %num) {
3425+
; CHECK-LABEL: @val_is_aligend_const_mismatch1(
3426+
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[NUM:%.*]], -4096
3427+
; CHECK-NEXT: [[NUM_MASKED:%.*]] = add i32 [[TMP1]], 4096
3428+
; CHECK-NEXT: [[_0:%.*]] = icmp ne i32 [[NUM_MASKED]], [[NUM]]
3429+
; CHECK-NEXT: ret i1 [[_0]]
3430+
;
3431+
%num.biased = add i32 %num, 4096
3432+
%num.masked = and i32 %num.biased, -4096
3433+
%_0 = icmp ne i32 %num.masked, %num
3434+
ret i1 %_0
3435+
}
3436+
3437+
define i1 @val_is_aligend_pred_mismatch(i32 %num) {
3438+
; CHECK-LABEL: @val_is_aligend_pred_mismatch(
3439+
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[NUM:%.*]], -4096
3440+
; CHECK-NEXT: [[NUM_MASKED:%.*]] = add i32 [[TMP1]], 4096
3441+
; CHECK-NEXT: [[_0:%.*]] = icmp sge i32 [[NUM_MASKED]], [[NUM]]
3442+
; CHECK-NEXT: ret i1 [[_0]]
3443+
;
3444+
%num.biased = add i32 %num, 4096
3445+
%num.masked = and i32 %num.biased, -4096
3446+
%_0 = icmp sge i32 %num.masked, %num
3447+
ret i1 %_0
3448+
}

0 commit comments

Comments
 (0)