Skip to content

Commit 922ab66

Browse files
authored
[InstCombine] Drop nowrap flags in foldBitCeil (#125817)
For convenience this patch drops nsw for `sub`. It also allows this fold with `ctlz_zero_undef`. Alive2: https://alive2.llvm.org/ce/z/VmvqSt
1 parent ee76bda commit 922ab66

File tree

2 files changed

+55
-10
lines changed

2 files changed

+55
-10
lines changed

llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3432,7 +3432,7 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) {
34323432
static bool isSafeToRemoveBitCeilSelect(ICmpInst::Predicate Pred, Value *Cond0,
34333433
const APInt *Cond1, Value *CtlzOp,
34343434
unsigned BitWidth,
3435-
bool &ShouldDropNUW) {
3435+
bool &ShouldDropNoWrap) {
34363436
// The challenge in recognizing std::bit_ceil(X) is that the operand is used
34373437
// for the CTLZ proper and select condition, each possibly with some
34383438
// operation like add and sub.
@@ -3455,7 +3455,7 @@ static bool isSafeToRemoveBitCeilSelect(ICmpInst::Predicate Pred, Value *Cond0,
34553455
ConstantRange CR = ConstantRange::makeExactICmpRegion(
34563456
CmpInst::getInversePredicate(Pred), *Cond1);
34573457

3458-
ShouldDropNUW = false;
3458+
ShouldDropNoWrap = false;
34593459

34603460
// Match the operation that's used to compute CtlzOp from CommonAncestor. If
34613461
// CtlzOp == CommonAncestor, return true as no operation is needed. If a
@@ -3466,11 +3466,12 @@ static bool isSafeToRemoveBitCeilSelect(ICmpInst::Predicate Pred, Value *Cond0,
34663466
if (CtlzOp == CommonAncestor)
34673467
return true;
34683468
if (match(CtlzOp, m_Add(m_Specific(CommonAncestor), m_APInt(C)))) {
3469+
ShouldDropNoWrap = true;
34693470
CR = CR.add(*C);
34703471
return true;
34713472
}
34723473
if (match(CtlzOp, m_Sub(m_APInt(C), m_Specific(CommonAncestor)))) {
3473-
ShouldDropNUW = true;
3474+
ShouldDropNoWrap = true;
34743475
CR = ConstantRange(*C).sub(CR);
34753476
return true;
34763477
}
@@ -3541,19 +3542,21 @@ static Instruction *foldBitCeil(SelectInst &SI, IRBuilderBase &Builder,
35413542
Pred = CmpInst::getInversePredicate(Pred);
35423543
}
35433544

3544-
bool ShouldDropNUW;
3545+
bool ShouldDropNoWrap;
35453546

35463547
if (!match(FalseVal, m_One()) ||
35473548
!match(TrueVal,
35483549
m_OneUse(m_Shl(m_One(), m_OneUse(m_Sub(m_SpecificInt(BitWidth),
35493550
m_Value(Ctlz)))))) ||
3550-
!match(Ctlz, m_Intrinsic<Intrinsic::ctlz>(m_Value(CtlzOp), m_Zero())) ||
3551+
!match(Ctlz, m_Intrinsic<Intrinsic::ctlz>(m_Value(CtlzOp), m_Value())) ||
35513552
!isSafeToRemoveBitCeilSelect(Pred, Cond0, Cond1, CtlzOp, BitWidth,
3552-
ShouldDropNUW))
3553+
ShouldDropNoWrap))
35533554
return nullptr;
35543555

3555-
if (ShouldDropNUW)
3556+
if (ShouldDropNoWrap) {
35563557
cast<Instruction>(CtlzOp)->setHasNoUnsignedWrap(false);
3558+
cast<Instruction>(CtlzOp)->setHasNoSignedWrap(false);
3559+
}
35573560

35583561
// Build 1 << (-CTLZ & (BitWidth-1)). The negation likely corresponds to a
35593562
// single hardware instruction as opposed to BitWidth - CTLZ, where BitWidth
@@ -3562,6 +3565,8 @@ static Instruction *foldBitCeil(SelectInst &SI, IRBuilderBase &Builder,
35623565

35633566
// Drop range attributes and re-infer them in the next iteration.
35643567
cast<Instruction>(Ctlz)->dropPoisonGeneratingAnnotations();
3568+
// Set is_zero_poison to false and re-infer them in the next iteration.
3569+
cast<Instruction>(Ctlz)->setOperand(1, Builder.getFalse());
35653570
IC.addToWorklist(cast<Instruction>(Ctlz));
35663571
Value *Neg = Builder.CreateNeg(Ctlz);
35673572
Value *Masked =

llvm/test/Transforms/InstCombine/bit_ceil.ll

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -302,9 +302,9 @@ define i32 @pr91691(i32 %0) {
302302
ret i32 %7
303303
}
304304

305-
define i32 @pr91691_keep_nsw(i32 %0) {
306-
; CHECK-LABEL: @pr91691_keep_nsw(
307-
; CHECK-NEXT: [[TMP2:%.*]] = sub nsw i32 -2, [[TMP0:%.*]]
305+
define i32 @pr91691_drop_nsw(i32 %0) {
306+
; CHECK-LABEL: @pr91691_drop_nsw(
307+
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 -2, [[TMP0:%.*]]
308308
; CHECK-NEXT: [[TMP3:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP2]], i1 false)
309309
; CHECK-NEXT: [[TMP4:%.*]] = sub nsw i32 0, [[TMP3]]
310310
; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 31
@@ -337,6 +337,46 @@ define i32 @test_drop_range_attr(i32 %x) {
337337
ret i32 %sel
338338
}
339339

340+
define i32 @bit_ceil_plus_nsw(i32 %x) {
341+
; CHECK-LABEL: @bit_ceil_plus_nsw(
342+
; CHECK-NEXT: entry:
343+
; CHECK-NEXT: [[SUB:%.*]] = add i32 [[X:%.*]], 1
344+
; CHECK-NEXT: [[CTLZ:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[SUB]], i1 false)
345+
; CHECK-NEXT: [[TMP0:%.*]] = sub nsw i32 0, [[CTLZ]]
346+
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 31
347+
; CHECK-NEXT: [[SEL:%.*]] = shl nuw i32 1, [[TMP1]]
348+
; CHECK-NEXT: ret i32 [[SEL]]
349+
;
350+
entry:
351+
%sub = add nsw i32 %x, 1
352+
%ctlz = tail call i32 @llvm.ctlz.i32(i32 %sub, i1 false)
353+
%sub2 = sub nuw nsw i32 32, %ctlz
354+
%shl = shl nuw i32 1, %sub2
355+
%ult = icmp ult i32 %x, 2147483647
356+
%sel = select i1 %ult, i32 %shl, i32 1
357+
ret i32 %sel
358+
}
359+
360+
define i32 @bit_ceil_plus_nuw(i32 %x) {
361+
; CHECK-LABEL: @bit_ceil_plus_nuw(
362+
; CHECK-NEXT: entry:
363+
; CHECK-NEXT: [[SUB:%.*]] = add i32 [[X:%.*]], 1
364+
; CHECK-NEXT: [[CTLZ:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[SUB]], i1 false)
365+
; CHECK-NEXT: [[TMP0:%.*]] = sub nsw i32 0, [[CTLZ]]
366+
; CHECK-NEXT: [[SUB2:%.*]] = and i32 [[TMP0]], 31
367+
; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[SUB2]]
368+
; CHECK-NEXT: ret i32 [[SHL]]
369+
;
370+
entry:
371+
%sub = add nuw i32 %x, 1
372+
%ctlz = tail call i32 @llvm.ctlz.i32(i32 %sub, i1 false)
373+
%sub2 = sub nuw nsw i32 32, %ctlz
374+
%shl = shl nuw i32 1, %sub2
375+
%ult = icmp ult i32 %x, 2147483647
376+
%sel = select i1 %ult, i32 %shl, i32 1
377+
ret i32 %sel
378+
}
379+
340380
declare i32 @llvm.ctlz.i32(i32, i1 immarg)
341381
declare i64 @llvm.ctlz.i64(i64, i1 immarg)
342382
declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1)

0 commit comments

Comments
 (0)