-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[SCEVExpander] Expand UDiv avoiding UB when in seq_min/max. #92177
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
bff4e5a
16a4b00
32833a7
b0d098c
d6d8655
4ace524
70f2ef7
fa12ea4
5b1a789
c2dc23d
1468d8b
c12b25c
5f31e85
9c4b76d
b699ddd
5469289
d5c8931
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -459,12 +459,12 @@ exit: | |
| ret i64 %p | ||
| } | ||
|
|
||
| ; FIXME: currently the expansion of the loop bounds may introduce UB through the division. | ||
| define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch(ptr %dst, i64 %N) { | ||
| ; CHECK-LABEL: define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch( | ||
| ; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) { | ||
| ; CHECK-NEXT: entry: | ||
| ; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 42, [[N]] | ||
| ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1) | ||
| ; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 42, [[TMP10]] | ||
| ; CHECK-NEXT: [[TMP8:%.*]] = freeze i64 [[TMP0]] | ||
| ; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 0) | ||
| ; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP8]], i64 [[SMAX]]) | ||
|
|
@@ -529,13 +529,14 @@ exit: | |
|
|
||
| declare void @foo() | ||
|
|
||
| ; FIXME: currently the expansion of the loop bounds may introduce UB through the division. | ||
| define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch_call_before_loop(ptr %dst, i64 %N) { | ||
| ; CHECK-LABEL: define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch_call_before_loop( | ||
| ; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) { | ||
| ; CHECK-NEXT: entry: | ||
| ; CHECK-NEXT: call void @foo() | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 42, [[N]] | ||
| ; CHECK-NEXT: [[TMP0:%.*]] = freeze i64 [[N]] | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP0]], i64 1) | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 42, [[TMP1]] | ||
| ; CHECK-NEXT: [[TMP3:%.*]] = freeze i64 [[TMP2]] | ||
| ; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 0) | ||
| ; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[SMAX]]) | ||
|
|
@@ -599,14 +600,15 @@ exit: | |
| ret i64 %p | ||
| } | ||
|
|
||
| ; FIXME: currently the expansion of the loop bounds may introduce UB through the division. | ||
| define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch_loop_may_not_execute(ptr %dst, i64 %N, i1 %c) { | ||
| ; CHECK-LABEL: define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch_loop_may_not_execute( | ||
| ; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]], i1 [[C:%.*]]) { | ||
| ; CHECK-NEXT: entry: | ||
| ; CHECK-NEXT: br i1 [[C]], label [[LOOP_HEADER_PREHEADER:%.*]], label [[EXIT:%.*]] | ||
| ; CHECK: loop.header.preheader: | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 42, [[N]] | ||
| ; CHECK-NEXT: [[TMP0:%.*]] = freeze i64 [[N]] | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP0]], i64 1) | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 42, [[TMP1]] | ||
| ; CHECK-NEXT: [[TMP3:%.*]] = freeze i64 [[TMP2]] | ||
| ; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 0) | ||
| ; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[SMAX]]) | ||
|
|
@@ -672,12 +674,13 @@ exit: | |
| ret i64 %p | ||
| } | ||
|
|
||
| ; FIXME: currently the expansion of the loop bounds may introduce UB through the division. | ||
| define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch_different_bounds(ptr %dst, i64 %N, i64 %M) { | ||
| ; CHECK-LABEL: define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch_different_bounds( | ||
| ; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]], i64 [[M:%.*]]) { | ||
| ; CHECK-NEXT: entry: | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 42, [[M]] | ||
| ; CHECK-NEXT: [[TMP0:%.*]] = freeze i64 [[M]] | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP0]], i64 1) | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 42, [[TMP1]] | ||
| ; CHECK-NEXT: [[TMP3:%.*]] = freeze i64 [[TMP2]] | ||
| ; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 0) | ||
| ; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[SMAX]]) | ||
|
|
@@ -740,13 +743,13 @@ exit: | |
| ret i64 %p | ||
| } | ||
|
|
||
| ; FIXME: currently the expansion of the loop bounds may introduce UB through the division. | ||
| define i64 @multi_exit_4_exit_count_with_udiv_by_frozen_value_in_latch(ptr %dst, i64 %N) { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think SCEV is actually computing the trip count wrong here. It currently produces Under those semantics, we shouldn't need to modify VPlan to examine the exits. SCEVExpander itself should enable SafeUDivMode when it evaluates operands of umin_seq after the first.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (The relevant code on the SCEV side is ScalarEvolution::getSequentialMinMaxExpr; it "optimizes" umin_seq to umin.)
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So the proposal here is to change the semantics of umin_seq from a select to a conditional branch, so it not only prevents poison propagation but also UB propagation? Does that really cover all cases? What if we have one exit with a udiv exit count, but also a prior abnormal exit? Maybe not relevant to vectorization, but thinking more generally here.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure there's really a useful distinction between an abnormal exit inside the loop, vs. an abnormal exit in the loop preheader. Either way, if you want to hoist the trip count past such an operation, you need to take additional measures beyond what SCEV normally provides. You need safe-udiv for all udiv operations, and if you want to use the value for anything, you need to freeze it. For simple vectorization, we should be able to assume the loop exits normally, and we want to use that information to avoid unnecessary freeze operations. So we need to distinguish which freeze/safe-div operations are necessary. The way we currently figure out which operands we need to freeze is via umin_seq; I think it makes sense to figure out which operands need safe-div the same way.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated the PR to use umin_seq in the cases as suggested I think; the main additional change is to forbid simplifying (There are some redundant freezes of the UDiv result which we could get rid of) Looking at this again, I am not sure if the divisor being poison in
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, this is what I was thinking, roughly.
We could probably refine this in SCEVExpander if we end up generating a bunch of freeze in inconvenient places (basically, make a map of SCEVUnknowns which are used without freezing them, and then don't freeze if we see them elsewhere). |
||
| ; CHECK-LABEL: define i64 @multi_exit_4_exit_count_with_udiv_by_frozen_value_in_latch( | ||
| ; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) { | ||
| ; CHECK-NEXT: entry: | ||
| ; CHECK-NEXT: [[FR_N:%.*]] = freeze i64 [[N]] | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 42, [[FR_N]] | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.umax.i64(i64 [[FR_N]], i64 1) | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 42, [[TMP1]] | ||
| ; CHECK-NEXT: [[TMP10:%.*]] = freeze i64 [[TMP2]] | ||
| ; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 0) | ||
| ; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP10]], i64 [[SMAX]]) | ||
|
|
@@ -931,12 +934,12 @@ exit: | |
| ret void | ||
| } | ||
|
|
||
| ; FIXME: currently the expansion of the loop bounds may introduce UB through the division. | ||
| define i64 @multi_exit_4_exit_count_with_urem_by_value_in_latch(ptr %dst, i64 %N) { | ||
| ; CHECK-LABEL: define i64 @multi_exit_4_exit_count_with_urem_by_value_in_latch( | ||
| ; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) { | ||
| ; CHECK-NEXT: entry: | ||
| ; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 42, [[N]] | ||
| ; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1) | ||
| ; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 42, [[TMP12]] | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[N]], [[TMP0]] | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = sub i64 42, [[TMP1]] | ||
| ; CHECK-NEXT: [[SMAX1:%.*]] = call i64 @llvm.smax.i64(i64 [[TMP2]], i64 0) | ||
|
|
@@ -1002,7 +1005,6 @@ exit: | |
| ret i64 %p | ||
| } | ||
|
|
||
| ; FIXME: currently the expansion of the loop bounds may introduce UB through the division. | ||
| define i64 @multi_exit_4_exit_count_with_urem_by_constant_in_latch(ptr %dst, i64 %N) { | ||
| ; CHECK-LABEL: define i64 @multi_exit_4_exit_count_with_urem_by_constant_in_latch( | ||
| ; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) { | ||
|
|
@@ -1156,7 +1158,8 @@ define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch1(ptr %dst, i64 % | |
| ; CHECK-LABEL: define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch1( | ||
| ; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) { | ||
| ; CHECK-NEXT: entry: | ||
| ; CHECK-NEXT: [[TMP9:%.*]] = udiv i64 42, [[N]] | ||
| ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1) | ||
| ; CHECK-NEXT: [[TMP9:%.*]] = udiv i64 42, [[TMP8]] | ||
| ; CHECK-NEXT: [[TMP10:%.*]] = freeze i64 [[TMP9]] | ||
| ; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 0) | ||
| ; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP10]], i64 [[SMAX]]) | ||
|
|
@@ -1262,13 +1265,14 @@ exit: | |
| ret i64 %p | ||
| } | ||
|
|
||
| ; FIXME: currently the expansion of the loop bounds may introduce UB through the division. | ||
| define i64 @multi_exit_count_with_udiv_by_value_in_latch_different_bounds_divisor_non_zero_may_be_poison(ptr %dst, i64 %N, i64 %M) { | ||
| ; CHECK-LABEL: define i64 @multi_exit_count_with_udiv_by_value_in_latch_different_bounds_divisor_non_zero_may_be_poison( | ||
| ; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]], i64 [[M:%.*]]) { | ||
| ; CHECK-NEXT: entry: | ||
| ; CHECK-NEXT: [[M_1:%.*]] = call i64 @llvm.umax.i64(i64 [[M]], i64 1) | ||
| ; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 42, [[M_1]] | ||
| ; CHECK-NEXT: [[TMP9:%.*]] = freeze i64 [[M_1]] | ||
| ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP9]], i64 1) | ||
| ; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 42, [[TMP10]] | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = freeze i64 [[TMP0]] | ||
| ; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 0) | ||
| ; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 [[SMAX]]) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This logic still doesn't look quite right to me. If you have isKnownNonZero but not isGuaranteedNotToBePoison, then you still need the umax, because you no longer know that it's non-zero after the freeze.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated, thanks!