Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
Original file line number Diff line number Diff line change
Expand Up @@ -510,9 +510,12 @@ class SCEVUMinExpr : public SCEVMinMaxExpr {

/// This node is the base class for sequential/in-order min/max selections.
/// Note that their fundamental difference from SCEVMinMaxExpr's is that they
/// are early-returning upon reaching saturation point.
/// I.e. given `0 umin_seq poison`, the result will be `0`,
/// while the result of `0 umin poison` is `poison`.
/// are early-returning
/// * upon reaching saturation point
/// I.e. given `0 umin_seq poison`, the result will be `0`,
/// while the result of `0 umin poison` is `poison`.
/// * if any operand may trigger UB, e.g. if there is an UDiv operand that may
/// divide by 0.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These aren't really two separate cases. The "early return" happens "upon reaching the saturation point" in both cases. The clarification here is just that "early return" is in the sense that the RHS will not be executed at all (even if it has UB), rather than only that its value will not be used.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, should be updated in #110824

class SCEVSequentialMinMaxExpr : public SCEVNAryExpr {
friend class ScalarEvolution;

Expand Down
8 changes: 8 additions & 0 deletions llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,11 @@ class SCEVExpander : public SCEVVisitor<SCEVExpander, Value *> {
/// "expanded" form.
bool LSRMode;

/// When true, rewrite any divisors of UDiv expressions that may be 0 to
/// umax(Divisor, 1) to avoid introducing UB. If the divisor may be poison,
/// freeze it first.
bool SafeUDivMode = false;

typedef IRBuilder<InstSimplifyFolder, IRBuilderCallbackInserter> BuilderType;
BuilderType Builder;

Expand Down Expand Up @@ -419,6 +424,9 @@ class SCEVExpander : public SCEVVisitor<SCEVExpander, Value *> {
BasicBlock::iterator findInsertPointAfter(Instruction *I,
Instruction *MustDominate) const;

static const SCEV *rewriteExpressionToRemoveUB(const SCEV *BTC, Loop *L,
ScalarEvolution &SE);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unused prototype.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dropped, thanks!


private:
LLVMContext &getContext() const { return SE.getContext(); }

Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/Analysis/ScalarEvolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4304,6 +4304,16 @@ ScalarEvolution::getSequentialMinMaxExpr(SCEVTypes Kind,
}

for (unsigned i = 1, e = Ops.size(); i != e; ++i) {
bool MayBeUB = SCEVExprContains(Ops[i], [this](const SCEV *S) {
auto *UDiv = dyn_cast<SCEVUDivExpr>(S);
// The UDiv may be UB if the divisor is poison or zero. Unless the divisor
// is a non-zero constant, we have to assume the UDiv may be UB.
return UDiv && (!isa<SCEVConstant>(UDiv->getOperand(1)) ||
!isKnownNonZero(UDiv->getOperand(1)));
});

if (MayBeUB)
continue;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should also change the doc comment on SCEVSequentialMinMaxExpr for the new semantics introduced here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added thanks!

// We can replace %x umin_seq %y with %x umin %y if either:
// * %y being poison implies %x is also poison.
// * %x cannot be the saturating value (e.g. zero for umin).
Expand Down
14 changes: 13 additions & 1 deletion llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -676,7 +676,15 @@ Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
}

Value *RHS = expand(S->getRHS());
const SCEV *RHSExpr = S->getRHS();
Value *RHS = expand(RHSExpr);
if (SafeUDivMode &&
(!isa<SCEVConstant>(RHSExpr) || SE.isKnownNonZero(RHSExpr))) {
if (!isa<SCEVConstant>(S->getRHS()))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can now use the isGuaranteedNotToBePoison check instead.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, updated. This removes the freezes in a few cases, but that should be fine as in those cases the function would always trigger UB, so expanding the UDiv before the loop simply triggers UB earlier (e.g. multi_exit_4_exit_count_with_udiv_by_value_in_latch). This should be fine I think (https://alive2.llvm.org/ce/z/anDrW9).

Added additional variants where this is not the case (multi_exit_4_exit_count_with_udiv_by_value_in_latch_call_before_loop, multi_exit_4_exit_count_with_udiv_by_value_in_latch_loop_may_not_execute) and there's also multi_exit_4_exit_count_with_udiv_by_value_in_latch_different_bounds

RHS = Builder.CreateFreeze(RHS);
RHS = Builder.CreateIntrinsic(RHS->getType(), Intrinsic::umax,
{RHS, ConstantInt::get(RHS->getType(), 1)});
}
return InsertBinop(Instruction::UDiv, LHS, RHS, SCEV::FlagAnyWrap,
/*IsSafeToHoist*/ SE.isKnownNonZero(S->getRHS()));
}
Expand Down Expand Up @@ -1371,11 +1379,14 @@ Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
Value *SCEVExpander::expandMinMaxExpr(const SCEVNAryExpr *S,
Intrinsic::ID IntrinID, Twine Name,
bool IsSequential) {
bool PrevSafeMode = SafeUDivMode;
SafeUDivMode |= IsSequential;
Value *LHS = expand(S->getOperand(S->getNumOperands() - 1));
Type *Ty = LHS->getType();
if (IsSequential)
LHS = Builder.CreateFreeze(LHS);
for (int i = S->getNumOperands() - 2; i >= 0; --i) {
SafeUDivMode = (IsSequential && i != 0) || PrevSafeMode;
Value *RHS = expand(S->getOperand(i));
if (IsSequential && i != 0)
RHS = Builder.CreateFreeze(RHS);
Expand All @@ -1390,6 +1401,7 @@ Value *SCEVExpander::expandMinMaxExpr(const SCEVNAryExpr *S,
}
LHS = Sel;
}
SafeUDivMode = PrevSafeMode;
return LHS;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,14 @@
; The UDiv in the latch may never be executed. The backedge-taken-count
; expressions must account for the fact that evaluating the UDiv
; unconditionally may trigger UB.
; FIXME: umin_seq should be used instead of umin for BTCs.
define i64 @multi_exit_exit_count_with_udiv_by_value_in_latch(ptr %dst, i64 %N) {
; CHECK-LABEL: 'multi_exit_exit_count_with_udiv_by_value_in_latch'
; CHECK-NEXT: Determining loop execution counts for: @multi_exit_exit_count_with_udiv_by_value_in_latch
; CHECK-NEXT: Loop %loop.header: <multiple exits> backedge-taken count is ((42 /u %N) umin (0 smax %N))
; CHECK-NEXT: Loop %loop.header: <multiple exits> backedge-taken count is ((0 smax %N) umin_seq (42 /u %N))
; CHECK-NEXT: exit count for loop.header: (0 smax %N)
; CHECK-NEXT: exit count for loop.latch: (42 /u %N)
; CHECK-NEXT: Loop %loop.header: constant max backedge-taken count is i64 42
; CHECK-NEXT: Loop %loop.header: symbolic max backedge-taken count is ((42 /u %N) umin (0 smax %N))
; CHECK-NEXT: Loop %loop.header: symbolic max backedge-taken count is ((0 smax %N) umin_seq (42 /u %N))
; CHECK-NEXT: symbolic max exit count for loop.header: (0 smax %N)
; CHECK-NEXT: symbolic max exit count for loop.latch: (42 /u %N)
; CHECK-NEXT: Loop %loop.header: Trip multiple is 1
Expand Down Expand Up @@ -41,7 +40,6 @@ exit:
; The UDiv in the latch may never be executed. The backedge-taken-count
; expressions must account for the fact that evaluating the UDiv
; unconditionally may trigger UB.
; FIXME: umin_seq should be used instead of umin for BTCs.
define i64 @multi_exit_exit_count_with_udiv_by_value_in_latch_different_bounds(ptr %dst, i64 %N, i64 %M) {
; CHECK-LABEL: 'multi_exit_exit_count_with_udiv_by_value_in_latch_different_bounds'
; CHECK-NEXT: Determining loop execution counts for: @multi_exit_exit_count_with_udiv_by_value_in_latch_different_bounds
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -463,9 +463,12 @@ define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch(ptr %dst, i64 %N
; CHECK-LABEL: define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch(
; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP9:%.*]] = freeze i64 [[N]]
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP9]], i64 1)
; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 42, [[TMP10]]
; CHECK-NEXT: [[TMP8:%.*]] = freeze i64 [[TMP0]]
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 0)
; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 42, [[N]]
; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[SMAX]], i64 [[TMP0]])
; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP8]], i64 [[SMAX]])
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[UMIN]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP1]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
Expand Down Expand Up @@ -529,7 +532,9 @@ define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch_different_bounds
; CHECK-LABEL: define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch_different_bounds(
; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]], i64 [[M:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 42, [[M]]
; CHECK-NEXT: [[TMP0:%.*]] = freeze i64 [[M]]
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP0]], i64 1)
; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 42, [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = freeze i64 [[TMP2]]
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 0)
; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[SMAX]])
Expand Down Expand Up @@ -598,9 +603,12 @@ define i64 @multi_exit_4_exit_count_with_udiv_by_frozen_value_in_latch(ptr %dst,
; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[FR_N:%.*]] = freeze i64 [[N]]
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 0)
; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 42, [[FR_N]]
; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP2]], i64 [[TMP0]])
; CHECK-NEXT: [[TMP0:%.*]] = freeze i64 [[FR_N]]
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP0]], i64 1)
; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 42, [[TMP1]]
; CHECK-NEXT: [[TMP10:%.*]] = freeze i64 [[TMP2]]
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 0)
; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP10]], i64 [[SMAX]])
; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[UMIN]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP3]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
Expand Down Expand Up @@ -786,12 +794,15 @@ define i64 @multi_exit_4_exit_count_with_urem_by_value_in_latch(ptr %dst, i64 %N
; CHECK-LABEL: define i64 @multi_exit_4_exit_count_with_urem_by_value_in_latch(
; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 0)
; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 42, [[N]]
; CHECK-NEXT: [[TMP11:%.*]] = freeze i64 [[N]]
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP11]], i64 1)
; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 42, [[TMP12]]
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[N]], [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 42, [[TMP1]]
; CHECK-NEXT: [[SMAX1:%.*]] = call i64 @llvm.smax.i64(i64 [[TMP2]], i64 0)
; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[SMAX]], i64 [[SMAX1]])
; CHECK-NEXT: [[TMP10:%.*]] = freeze i64 [[SMAX1]]
; CHECK-NEXT: [[SMAX2:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 0)
; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP10]], i64 [[SMAX2]])
; CHECK-NEXT: [[TMP3:%.*]] = add nuw i64 [[UMIN]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP3]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
Expand Down Expand Up @@ -1004,9 +1015,12 @@ define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch1(ptr %dst, i64 %
; CHECK-LABEL: define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch1(
; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = freeze i64 [[N]]
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP0]], i64 1)
; CHECK-NEXT: [[TMP9:%.*]] = udiv i64 42, [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = freeze i64 [[TMP9]]
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 0)
; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 42, [[N]]
; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[SMAX]], i64 [[TMP0]])
; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP10]], i64 [[SMAX]])
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[UMIN]], 1
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP1]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
Expand Down Expand Up @@ -1068,6 +1082,46 @@ exit:
ret i64 %p
}

define i64 @multi_exit_exit_count_with_udiv_by_0_in_latch(ptr %dst, i64 %N) {
; CHECK-LABEL: define i64 @multi_exit_exit_count_with_udiv_by_0_in_latch(
; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
; CHECK: loop.header:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]]
; CHECK-NEXT: store i32 1, ptr [[GEP]], align 4
; CHECK-NEXT: [[C_0:%.*]] = icmp slt i64 [[IV]], [[N]]
; CHECK-NEXT: br i1 [[C_0]], label [[LOOP_LATCH]], label [[EXIT:%.*]]
; CHECK: loop.latch:
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[D:%.*]] = udiv i64 42, 0
; CHECK-NEXT: [[C_1:%.*]] = icmp slt i64 [[IV]], [[D]]
; CHECK-NEXT: br i1 [[C_1]], label [[LOOP_HEADER]], label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: [[P:%.*]] = phi i64 [ 1, [[LOOP_HEADER]] ], [ 0, [[LOOP_LATCH]] ]
; CHECK-NEXT: ret i64 [[P]]
;
entry:
br label %loop.header

loop.header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
%gep = getelementptr inbounds i32, ptr %dst, i64 %iv
store i32 1, ptr %gep
%c.0 = icmp slt i64 %iv, %N
br i1 %c.0, label %loop.latch, label %exit

loop.latch:
%iv.next = add i64 %iv, 1
%d = udiv i64 42, 0
%c.1 = icmp slt i64 %iv, %d
br i1 %c.1, label %loop.header, label %exit

exit:
%p = phi i64 [ 1, %loop.header ], [ 0, %loop.latch]
ret i64 %p
}

;.
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
Expand Down