Skip to content

Commit ee8f7a5

Browse files
InstCombine: improve optimizations for ceiling division with no overflow
fixes #142497
1 parent 857138b commit ee8f7a5

File tree

2 files changed

+201
-0
lines changed

2 files changed

+201
-0
lines changed

llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1787,6 +1787,50 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
17871787
if (Instruction *Ashr = foldAddToAshr(I))
17881788
return Ashr;
17891789

1790+
// Ceiling division by power-of-2:
1791+
// (X >> log2(N)) + zext(X & (N-1) != 0) --> (X + (N-1)) >> log2(N)
1792+
// This is valid when adding (N-1) to X doesn't overflow.
1793+
{
1794+
Value *X = nullptr, *Cmp = nullptr;
1795+
const APInt *ShiftAmt = nullptr, *Mask = nullptr;
1796+
CmpPredicate Pred;
1797+
1798+
// Match: (X >> C) + zext((X & Mask) != 0)
1799+
// or: zext((X & Mask) != 0) + (X >> C)
1800+
Value *Op0 = I.getOperand(0);
1801+
Value *Op1 = I.getOperand(1);
1802+
1803+
// Try matching with shift on left, zext on right
1804+
bool Matched = false;
1805+
if (match(Op0, m_LShr(m_Value(X), m_APInt(ShiftAmt))) &&
1806+
match(Op1, m_ZExt(m_Value(Cmp)))) {
1807+
Matched = match(Cmp, m_ICmp(Pred, m_And(m_Specific(X), m_APInt(Mask)),
1808+
m_ZeroInt()));
1809+
} else if (match(Op1, m_LShr(m_Value(X), m_APInt(ShiftAmt))) &&
1810+
match(Op0, m_ZExt(m_Value(Cmp)))) {
1811+
Matched = match(Cmp, m_ICmp(Pred, m_And(m_Specific(X), m_APInt(Mask)),
1812+
m_ZeroInt()));
1813+
}
1814+
1815+
if (Matched &&
1816+
Pred == ICmpInst::ICMP_NE &&
1817+
ShiftAmt && ShiftAmt->uge(1) && ShiftAmt->ult(BitWidth) &&
1818+
Mask && *Mask == (APInt(BitWidth, 1) << *ShiftAmt) - 1) {
1819+
1820+
// Check if X + Mask doesn't overflow
1821+
Constant *MaskC = ConstantInt::get(X->getType(), *Mask);
1822+
bool WillNotOverflowUnsigned = willNotOverflowUnsignedAdd(X, MaskC, I);
1823+
1824+
if (WillNotOverflowUnsigned) {
1825+
// (X + Mask) >> ShiftAmt
1826+
bool WillNotOverflowSigned = willNotOverflowSignedAdd(X, MaskC, I);
1827+
Value *Add = Builder.CreateAdd(X, MaskC, "", WillNotOverflowUnsigned,
1828+
WillNotOverflowSigned);
1829+
return BinaryOperator::CreateLShr(Add, ConstantInt::get(X->getType(), *ShiftAmt));
1830+
}
1831+
}
1832+
}
1833+
17901834
// (~X) + (~Y) --> -2 - (X + Y)
17911835
{
17921836
// To ensure we can save instructions we need to ensure that we consume both

llvm/test/Transforms/InstCombine/add.ll

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4273,4 +4273,161 @@ define i32 @fold_zext_nneg_add_const_fail2(i8 %x) {
42734273
}
42744274

42754275
declare void @llvm.assume(i1)
4276+
declare i32 @llvm.ctlz.i32(i32, i1)
4277+
4278+
; Ceiling division by power-of-2: (x >> log2(N)) + ((x & (N-1)) != 0) -> (x + (N-1)) >> log2(N)
4279+
; This is only valid when x + (N-1) doesn't overflow
4280+
4281+
; Test with known range that prevents overflow
4282+
define noundef range(i32 0, 100) i32 @ceil_div_by_8_known_range(i32 noundef range(i32 0, 100) %x) {
4283+
; CHECK-LABEL: @ceil_div_by_8_known_range(
4284+
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[X:%.*]], 7
4285+
; CHECK-NEXT: [[R:%.*]] = lshr i32 [[TMP1]], 3
4286+
; CHECK-NEXT: ret i32 [[R]]
4287+
;
4288+
%shr = lshr i32 %x, 3
4289+
%and = and i32 %x, 7
4290+
%cmp = icmp ne i32 %and, 0
4291+
%ext = zext i1 %cmp to i32
4292+
%r = add i32 %shr, %ext
4293+
ret i32 %r
4294+
}
4295+
4296+
; Test with the exact IR from the original testcase
4297+
define noundef range(i32 0, 6) i32 @ceil_div_from_clz(i32 noundef %v) {
4298+
; CHECK-LABEL: @ceil_div_from_clz(
4299+
; CHECK-NEXT: [[CTLZ:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[V:%.*]], i1 false)
4300+
; CHECK-NEXT: [[TMP1:%.*]] = sub nuw nsw i32 39, [[CTLZ]]
4301+
; CHECK-NEXT: [[R:%.*]] = lshr i32 [[TMP1]], 3
4302+
; CHECK-NEXT: ret i32 [[R]]
4303+
;
4304+
%ctlz = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 %v, i1 false)
4305+
%sub = sub nuw nsw i32 32, %ctlz
4306+
%shr = lshr i32 %sub, 3
4307+
%and = and i32 %sub, 7
4308+
%cmp = icmp ne i32 %and, 0
4309+
%ext = zext i1 %cmp to i32
4310+
%r = add nuw nsw i32 %shr, %ext
4311+
ret i32 %r
4312+
}
4313+
4314+
; Vector version with known range
4315+
define <2 x i32> @ceil_div_by_8_vec_range(<2 x i32> range(i32 0, 1000) %x) {
4316+
; CHECK-LABEL: @ceil_div_by_8_vec_range(
4317+
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw <2 x i32> [[X:%.*]], splat (i32 7)
4318+
; CHECK-NEXT: [[R:%.*]] = lshr <2 x i32> [[TMP1]], splat (i32 3)
4319+
; CHECK-NEXT: ret <2 x i32> [[R]]
4320+
;
4321+
%shr = lshr <2 x i32> %x, <i32 3, i32 3>
4322+
%and = and <2 x i32> %x, <i32 7, i32 7>
4323+
%cmp = icmp ne <2 x i32> %and, <i32 0, i32 0>
4324+
%ext = zext <2 x i1> %cmp to <2 x i32>
4325+
%r = add <2 x i32> %shr, %ext
4326+
ret <2 x i32> %r
4327+
}
4328+
4329+
; Ceiling division by 16 with known range
4330+
define i16 @ceil_div_by_16_i16(i16 range(i16 0, 1000) %x) {
4331+
; CHECK-LABEL: @ceil_div_by_16_i16(
4332+
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i16 [[X:%.*]], 15
4333+
; CHECK-NEXT: [[R:%.*]] = lshr i16 [[TMP1]], 4
4334+
; CHECK-NEXT: ret i16 [[R]]
4335+
;
4336+
%shr = lshr i16 %x, 4
4337+
%and = and i16 %x, 15
4338+
%cmp = icmp ne i16 %and, 0
4339+
%ext = zext i1 %cmp to i16
4340+
%r = add i16 %shr, %ext
4341+
ret i16 %r
4342+
}
4343+
4344+
; Negative test: no overflow guarantee - should NOT optimize
4345+
define i32 @ceil_div_by_8_no_overflow_info(i32 %x) {
4346+
; CHECK-LABEL: @ceil_div_by_8_no_overflow_info(
4347+
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 3
4348+
; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], 7
4349+
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
4350+
; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32
4351+
; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[SHR]], [[EXT]]
4352+
; CHECK-NEXT: ret i32 [[R]]
4353+
;
4354+
%shr = lshr i32 %x, 3
4355+
%and = and i32 %x, 7
4356+
%cmp = icmp ne i32 %and, 0
4357+
%ext = zext i1 %cmp to i32
4358+
%r = add i32 %shr, %ext
4359+
ret i32 %r
4360+
}
4361+
4362+
; Negative test: nuw on final add doesn't help
4363+
define i32 @ceil_div_by_8_only_nuw_on_add(i32 %x) {
4364+
; CHECK-LABEL: @ceil_div_by_8_only_nuw_on_add(
4365+
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 3
4366+
; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], 7
4367+
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
4368+
; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32
4369+
; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[SHR]], [[EXT]]
4370+
; CHECK-NEXT: ret i32 [[R]]
4371+
;
4372+
%shr = lshr i32 %x, 3
4373+
%and = and i32 %x, 7
4374+
%cmp = icmp ne i32 %and, 0
4375+
%ext = zext i1 %cmp to i32
4376+
%r = add nuw i32 %shr, %ext ; nuw here doesn't prove x+7 won't overflow
4377+
ret i32 %r
4378+
}
4379+
4380+
; Negative test: wrong mask
4381+
define i32 @ceil_div_wrong_mask(i32 range(i32 0, 100) %x) {
4382+
; CHECK-LABEL: @ceil_div_wrong_mask(
4383+
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 3
4384+
; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], 6
4385+
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
4386+
; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32
4387+
; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[SHR]], [[EXT]]
4388+
; CHECK-NEXT: ret i32 [[R]]
4389+
;
4390+
%shr = lshr i32 %x, 3
4391+
%and = and i32 %x, 6 ; Wrong mask: should be 7
4392+
%cmp = icmp ne i32 %and, 0
4393+
%ext = zext i1 %cmp to i32
4394+
%r = add i32 %shr, %ext
4395+
ret i32 %r
4396+
}
4397+
4398+
; Negative test: wrong shift amount
4399+
define i32 @ceil_div_wrong_shift(i32 range(i32 0, 100) %x) {
4400+
; CHECK-LABEL: @ceil_div_wrong_shift(
4401+
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 4
4402+
; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], 7
4403+
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
4404+
; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32
4405+
; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[SHR]], [[EXT]]
4406+
; CHECK-NEXT: ret i32 [[R]]
4407+
;
4408+
%shr = lshr i32 %x, 4 ; Shift by 4, but mask is 7 (should be 15)
4409+
%and = and i32 %x, 7
4410+
%cmp = icmp ne i32 %and, 0
4411+
%ext = zext i1 %cmp to i32
4412+
%r = add i32 %shr, %ext
4413+
ret i32 %r
4414+
}
4415+
4416+
; Negative test: wrong comparison
4417+
define i32 @ceil_div_wrong_cmp(i32 range(i32 0, 100) %x) {
4418+
; CHECK-LABEL: @ceil_div_wrong_cmp(
4419+
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 3
4420+
; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], 7
4421+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0
4422+
; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i32
4423+
; CHECK-NEXT: [[R:%.*]] = add nuw nsw i32 [[SHR]], [[EXT]]
4424+
; CHECK-NEXT: ret i32 [[R]]
4425+
;
4426+
%shr = lshr i32 %x, 3
4427+
%and = and i32 %x, 7
4428+
%cmp = icmp eq i32 %and, 0 ; Wrong: should be ne
4429+
%ext = zext i1 %cmp to i32
4430+
%r = add i32 %shr, %ext
4431+
ret i32 %r
4432+
}
42764433
declare void @fake_func(i32)

0 commit comments

Comments
 (0)