Skip to content
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,11 @@ class SCEVExpander : public SCEVVisitor<SCEVExpander, Value *> {
/// "expanded" form.
bool LSRMode;

/// When true, rewrite any divisors of UDiv expressions that may be 0 to
/// umax(Divisor, 1) to avoid introducing UB. If the divisor may be poison,
/// freeze it first.
bool SafeUDivMode = false;

typedef IRBuilder<InstSimplifyFolder, IRBuilderCallbackInserter> BuilderType;
BuilderType Builder;

Expand Down
14 changes: 13 additions & 1 deletion llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -681,7 +681,15 @@ Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true);
}

Value *RHS = expand(S->getRHS());
const SCEV *RHSExpr = S->getRHS();
Value *RHS = expand(RHSExpr);
if (SafeUDivMode) {
if (!ScalarEvolution::isGuaranteedNotToBePoison(RHSExpr))
RHS = Builder.CreateFreeze(RHS);
if (!SE.isKnownNonZero(RHSExpr))
RHS = Builder.CreateIntrinsic(RHS->getType(), Intrinsic::umax,
{RHS, ConstantInt::get(RHS->getType(), 1)});
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This logic still doesn't look quite right to me. If you have isKnownNonZero but not isGuaranteedNotToBePoison, then you still need the umax, because you no longer know that it's non-zero after the freeze.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated, thanks!

}
return InsertBinop(Instruction::UDiv, LHS, RHS, SCEV::FlagAnyWrap,
/*IsSafeToHoist*/ SE.isKnownNonZero(S->getRHS()));
}
Expand Down Expand Up @@ -1376,11 +1384,14 @@ Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
Value *SCEVExpander::expandMinMaxExpr(const SCEVNAryExpr *S,
Intrinsic::ID IntrinID, Twine Name,
bool IsSequential) {
bool PrevSafeMode = SafeUDivMode;
SafeUDivMode |= IsSequential;
Value *LHS = expand(S->getOperand(S->getNumOperands() - 1));
Type *Ty = LHS->getType();
if (IsSequential)
LHS = Builder.CreateFreeze(LHS);
for (int i = S->getNumOperands() - 2; i >= 0; --i) {
SafeUDivMode = (IsSequential && i != 0) || PrevSafeMode;
Value *RHS = expand(S->getOperand(i));
if (IsSequential && i != 0)
RHS = Builder.CreateFreeze(RHS);
Expand All @@ -1395,6 +1406,7 @@ Value *SCEVExpander::expandMinMaxExpr(const SCEVNAryExpr *S,
}
LHS = Sel;
}
SafeUDivMode = PrevSafeMode;
return LHS;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -459,12 +459,12 @@ exit:
ret i64 %p
}

; FIXME: currently the expansion of the loop bounds may introduce UB through the division.
define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch(ptr %dst, i64 %N) {
; CHECK-LABEL: define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch(
; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 42, [[N]]
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1)
; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 42, [[TMP10]]
; CHECK-NEXT: [[TMP8:%.*]] = freeze i64 [[TMP0]]
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 0)
; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP8]], i64 [[SMAX]])
Expand Down Expand Up @@ -529,13 +529,14 @@ exit:

declare void @foo()

; FIXME: currently the expansion of the loop bounds may introduce UB through the division.
define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch_call_before_loop(ptr %dst, i64 %N) {
; CHECK-LABEL: define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch_call_before_loop(
; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @foo()
; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 42, [[N]]
; CHECK-NEXT: [[TMP0:%.*]] = freeze i64 [[N]]
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP0]], i64 1)
; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 42, [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = freeze i64 [[TMP2]]
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 0)
; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[SMAX]])
Expand Down Expand Up @@ -599,14 +600,15 @@ exit:
ret i64 %p
}

; FIXME: currently the expansion of the loop bounds may introduce UB through the division.
define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch_loop_may_not_execute(ptr %dst, i64 %N, i1 %c) {
; CHECK-LABEL: define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch_loop_may_not_execute(
; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]], i1 [[C:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER_PREHEADER:%.*]], label [[EXIT:%.*]]
; CHECK: loop.header.preheader:
; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 42, [[N]]
; CHECK-NEXT: [[TMP0:%.*]] = freeze i64 [[N]]
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP0]], i64 1)
; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 42, [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = freeze i64 [[TMP2]]
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 0)
; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[SMAX]])
Expand Down Expand Up @@ -672,12 +674,13 @@ exit:
ret i64 %p
}

; FIXME: currently the expansion of the loop bounds may introduce UB through the division.
define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch_different_bounds(ptr %dst, i64 %N, i64 %M) {
; CHECK-LABEL: define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch_different_bounds(
; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]], i64 [[M:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 42, [[M]]
; CHECK-NEXT: [[TMP0:%.*]] = freeze i64 [[M]]
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP0]], i64 1)
; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 42, [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = freeze i64 [[TMP2]]
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 0)
; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[SMAX]])
Expand Down Expand Up @@ -740,13 +743,13 @@ exit:
ret i64 %p
}

; FIXME: currently the expansion of the loop bounds may introduce UB through the division.
define i64 @multi_exit_4_exit_count_with_udiv_by_frozen_value_in_latch(ptr %dst, i64 %N) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think SCEV is actually computing the trip count wrong here. It currently produces ((42 /u %fr.N) umin (0 smax %N2))... but I think it should actually be ((0 smax %N2) umin_seq (42 /u %fr.N)). And that should be a well-defined if %N2 is zero, even if we would divide by zero without the umin_seq. (I don't think anyone was thinking about divide-by-zero when https://reviews.llvm.org/D116766 was originally proposed, but it seems like a natural extension.)

Under those semantics, we shouldn't need to modify VPlan to examine the exits. SCEVExpander itself should enable SafeUDivMode when it evaluates operands of umin_seq after the first.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(The relevant code on the SCEV side is ScalarEvolution::getSequentialMinMaxExpr; it "optimizes" umin_seq to umin.)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So the proposal here is to change the semantics of umin_seq from a select to a conditional branch, so it not only prevents poison propagation but also UB propagation?

Does that really cover all cases? What if we have one exit with a udiv exit count, but also a prior abnormal exit? Maybe not relevant to vectorization, but thinking more generally here.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure there's really a useful distinction between an abnormal exit inside the loop, vs. an abnormal exit in the loop preheader. Either way, if you want to hoist the trip count past such an operation, you need to take additional measures beyond what SCEV normally provides. You need safe-udiv for all udiv operations, and if you want to use the value for anything, you need to freeze it.

For simple vectorization, we should be able to assume the loop exits normally, and we want to use that information to avoid unnecessary freeze operations. So we need to distinguish which freeze/safe-div operations are necessary. The way we currently figure out which operands we need to freeze is via umin_seq; I think it makes sense to figure out which operands need safe-div the same way.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated the PR to use umin_seq in the cases as suggested I think; the main additional change is to forbid simplifying A umin_seq B -> A umin B, if B may trigger UB (at the moment just SCEVUDivExpr?). This could be split off separately + SCEV printing test. Just want to make sure this is what you had in mind before doing so.

(There are some redundant freezes of the UDiv result which we could get rid of)

Looking at this again, I am not sure if the divisor being poison in @multi_exit_4_exit_count_with_udiv_by_value_in_latch actually needs freezing; as the expanded expression is used for a branch, if %N is poison, the branch on poison will also trigger UB, just slightly later. Granted at the SCEV & expander level, we don't really know if that will be the case. @multi_exit_4_exit_count_with_udiv_by_value_in_latch_different_bound uses 2 different bounds for the exits, so here the freeze of the divisor is definitely needed

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this is what I was thinking, roughly.

I am not sure if the divisor being poison in @multi_exit_4_exit_count_with_udiv_by_value_in_latch actually needs freezing

We could probably refine this in SCEVExpander if we end up generating a bunch of freeze in inconvenient places (basically, make a map of SCEVUnknowns which are used without freezing them, and then don't freeze if we see them elsewhere).

; CHECK-LABEL: define i64 @multi_exit_4_exit_count_with_udiv_by_frozen_value_in_latch(
; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[FR_N:%.*]] = freeze i64 [[N]]
; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 42, [[FR_N]]
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.umax.i64(i64 [[FR_N]], i64 1)
; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 42, [[TMP1]]
; CHECK-NEXT: [[TMP10:%.*]] = freeze i64 [[TMP2]]
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 0)
; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP10]], i64 [[SMAX]])
Expand Down Expand Up @@ -931,12 +934,12 @@ exit:
ret void
}

; FIXME: currently the expansion of the loop bounds may introduce UB through the division.
define i64 @multi_exit_4_exit_count_with_urem_by_value_in_latch(ptr %dst, i64 %N) {
; CHECK-LABEL: define i64 @multi_exit_4_exit_count_with_urem_by_value_in_latch(
; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 42, [[N]]
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1)
; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 42, [[TMP12]]
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[N]], [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 42, [[TMP1]]
; CHECK-NEXT: [[SMAX1:%.*]] = call i64 @llvm.smax.i64(i64 [[TMP2]], i64 0)
Expand Down Expand Up @@ -1002,7 +1005,6 @@ exit:
ret i64 %p
}

; FIXME: currently the expansion of the loop bounds may introduce UB through the division.
define i64 @multi_exit_4_exit_count_with_urem_by_constant_in_latch(ptr %dst, i64 %N) {
; CHECK-LABEL: define i64 @multi_exit_4_exit_count_with_urem_by_constant_in_latch(
; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
Expand Down Expand Up @@ -1156,7 +1158,8 @@ define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch1(ptr %dst, i64 %
; CHECK-LABEL: define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch1(
; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP9:%.*]] = udiv i64 42, [[N]]
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1)
; CHECK-NEXT: [[TMP9:%.*]] = udiv i64 42, [[TMP8]]
; CHECK-NEXT: [[TMP10:%.*]] = freeze i64 [[TMP9]]
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 0)
; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP10]], i64 [[SMAX]])
Expand Down Expand Up @@ -1262,13 +1265,13 @@ exit:
ret i64 %p
}

; FIXME: currently the expansion of the loop bounds may introduce UB through the division.
define i64 @multi_exit_count_with_udiv_by_value_in_latch_different_bounds_divisor_non_zero_may_be_poison(ptr %dst, i64 %N, i64 %M) {
; CHECK-LABEL: define i64 @multi_exit_count_with_udiv_by_value_in_latch_different_bounds_divisor_non_zero_may_be_poison(
; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]], i64 [[M:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[M_1:%.*]] = call i64 @llvm.umax.i64(i64 [[M]], i64 1)
; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 42, [[M_1]]
; CHECK-NEXT: [[TMP9:%.*]] = freeze i64 [[M_1]]
; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 42, [[TMP9]]
; CHECK-NEXT: [[TMP1:%.*]] = freeze i64 [[TMP0]]
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 0)
; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 [[SMAX]])
Expand Down