ValueTracking: strip stray break in recur-match #109794

artagnon · 2024-09-24T12:58:38Z

There is a stray break statement in the recurrence-handling code in computeKnownBitsFromOperator, that seems to be unintended. Strip this statement so that we have the opportunity to go through the rest of phi-handling code, and refine KnownBits further.

artagnon · 2024-09-25T12:52:23Z

I had more time to think about this, and check reduced cases in Alive2. The diff seems to be correct in all cases, and I think the break was unintended. Marking as ready for review.

llvmbot · 2024-09-25T12:52:58Z

@llvm/pr-subscribers-llvm-analysis

@llvm/pr-subscribers-llvm-transforms

Author: Ramkumar Ramachandra (artagnon)

Changes

There is a stray break statement in the recurrence-handling code in computeKnownBitsFromOperator, that seems to be unintended. Strip this statement so that we have the opportunity to go through the rest of phi-handling code, and refine KnownBits further.

Full diff: https://github.com/llvm/llvm-project/pull/109794.diff

4 Files Affected:

(modified) llvm/lib/Analysis/ValueTracking.cpp (-2)
(modified) llvm/test/Transforms/InstCombine/cast_phi.ll (+3-3)
(modified) llvm/test/Transforms/InstCombine/known-non-zero.ll (+3-4)
(modified) llvm/test/Transforms/InstCombine/remove-loop-phi-multiply-by-zero.ll (+16-16)

diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 56eb3f99b39d2c..04ab1ec6a21c83 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -1515,8 +1515,6 @@ static void computeKnownBitsFromOperator(const Operator *I,
                    Known3.isNonNegative())
             Known.makeNonNegative();
         }
-
-        break;
       }
     }
 
diff --git a/llvm/test/Transforms/InstCombine/cast_phi.ll b/llvm/test/Transforms/InstCombine/cast_phi.ll
index 7dfe60539138d6..99da3ac1c7c810 100644
--- a/llvm/test/Transforms/InstCombine/cast_phi.ll
+++ b/llvm/test/Transforms/InstCombine/cast_phi.ll
@@ -29,7 +29,7 @@ define void @MainKernel(i32 %iNumSteps, i32 %tid, i32 %base) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = phi float [ [[TMP11:%.*]], [[DOTBB12]] ], [ [[CONV_I]], [[DOTBB2]] ]
 ; CHECK-NEXT:    [[I12_06:%.*]] = phi i32 [ [[SUB:%.*]], [[DOTBB12]] ], [ [[INUMSTEPS]], [[DOTBB2]] ]
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp ugt i32 [[I12_06]], [[BASE:%.*]]
-; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[I12_06]], 1
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw i32 [[I12_06]], 1
 ; CHECK-NEXT:    [[CONV_I9:%.*]] = sext i32 [[ADD]] to i64
 ; CHECK-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds [258 x float], ptr [[CALLA]], i64 0, i64 [[CONV_I9]]
 ; CHECK-NEXT:    [[ARRAYIDX24:%.*]] = getelementptr inbounds [258 x float], ptr [[CALLB]], i64 0, i64 [[CONV_I9]]
@@ -70,8 +70,8 @@ define void @MainKernel(i32 %iNumSteps, i32 %tid, i32 %base) {
 ; CHECK-NEXT:    store float [[TMP10]], ptr [[ARRAYIDX6]], align 4
 ; CHECK-NEXT:    br label [[DOTBB12]]
 ; CHECK:       .bb12:
-; CHECK-NEXT:    [[SUB]] = add i32 [[I12_06]], -4
-; CHECK-NEXT:    [[CMP13:%.*]] = icmp sgt i32 [[SUB]], 0
+; CHECK-NEXT:    [[SUB]] = add nsw i32 [[I12_06]], -4
+; CHECK-NEXT:    [[CMP13:%.*]] = icmp sgt i32 [[I12_06]], 4
 ; CHECK-NEXT:    br i1 [[CMP13]], label [[DOTBB3]], label [[DOTBB8]]
 ;
   %callA = alloca [258 x float], align 4
diff --git a/llvm/test/Transforms/InstCombine/known-non-zero.ll b/llvm/test/Transforms/InstCombine/known-non-zero.ll
index b77c04eb81475a..7a2a8626379614 100644
--- a/llvm/test/Transforms/InstCombine/known-non-zero.ll
+++ b/llvm/test/Transforms/InstCombine/known-non-zero.ll
@@ -102,12 +102,11 @@ define void @D60846_miscompile(ptr %p) {
 ; CHECK-NEXT:    [[IS_ZERO:%.*]] = icmp eq i16 [[I]], 0
 ; CHECK-NEXT:    br i1 [[IS_ZERO]], label [[COMMON]], label [[NON_ZERO:%.*]]
 ; CHECK:       non_zero:
-; CHECK-NEXT:    [[IS_ONE:%.*]] = icmp eq i16 [[I]], 1
-; CHECK-NEXT:    store i1 [[IS_ONE]], ptr [[P:%.*]], align 1
+; CHECK-NEXT:    store i1 true, ptr [[P:%.*]], align 1
 ; CHECK-NEXT:    br label [[COMMON]]
 ; CHECK:       common:
-; CHECK-NEXT:    [[I_INC]] = add i16 [[I]], 1
-; CHECK-NEXT:    [[LOOP_COND:%.*]] = icmp ult i16 [[I_INC]], 2
+; CHECK-NEXT:    [[I_INC]] = add nuw nsw i16 [[I]], 1
+; CHECK-NEXT:    [[LOOP_COND:%.*]] = icmp eq i16 [[I]], 0
 ; CHECK-NEXT:    br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/Transforms/InstCombine/remove-loop-phi-multiply-by-zero.ll b/llvm/test/Transforms/InstCombine/remove-loop-phi-multiply-by-zero.ll
index d431055f0c21b7..4123bc5557899a 100644
--- a/llvm/test/Transforms/InstCombine/remove-loop-phi-multiply-by-zero.ll
+++ b/llvm/test/Transforms/InstCombine/remove-loop-phi-multiply-by-zero.ll
@@ -6,8 +6,8 @@ define double @test_mul_fast_flags(ptr %arr_d) {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_02:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[INC]] = add i64 [[I_02]], 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[INC]], 1000
+; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_02]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[I_02]], 999
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[END:%.*]]
 ; CHECK:       end:
 ; CHECK-NEXT:    ret double 0.000000e+00
@@ -36,8 +36,8 @@ define double @test_nsz_nnan_flags_enabled(ptr %arr_d) {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_02:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[INC]] = add i64 [[I_02]], 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[INC]], 1000
+; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_02]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[I_02]], 999
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[END:%.*]]
 ; CHECK:       end:
 ; CHECK-NEXT:    ret double 0.000000e+00
@@ -70,8 +70,8 @@ define double @test_nnan_flag_enabled(ptr %arr_d) {
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x double], ptr [[ARR_D:%.*]], i64 0, i64 [[I_02]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[MUL]] = fmul nnan double [[F_PROD_01]], [[TMP0]]
-; CHECK-NEXT:    [[INC]] = add i64 [[I_02]], 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[INC]], 1000
+; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_02]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[I_02]], 999
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[END:%.*]]
 ; CHECK:       end:
 ; CHECK-NEXT:    ret double [[MUL]]
@@ -104,8 +104,8 @@ define double @test_ninf_flag_enabled(ptr %arr_d) {
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x double], ptr [[ARR_D:%.*]], i64 0, i64 [[I_02]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[MUL]] = fmul ninf double [[F_PROD_01]], [[TMP0]]
-; CHECK-NEXT:    [[INC]] = add i64 [[I_02]], 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[INC]], 1000
+; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_02]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[I_02]], 999
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[END:%.*]]
 ; CHECK:       end:
 ; CHECK-NEXT:    ret double [[MUL]]
@@ -138,8 +138,8 @@ define double @test_nsz_flag_enabled(ptr %arr_d) {
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x double], ptr [[ARR_D:%.*]], i64 0, i64 [[I_02]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[MUL]] = fmul nsz double [[F_PROD_01]], [[TMP0]]
-; CHECK-NEXT:    [[INC]] = add i64 [[I_02]], 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[INC]], 1000
+; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_02]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[I_02]], 999
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[END:%.*]]
 ; CHECK:       end:
 ; CHECK-NEXT:    ret double [[MUL]]
@@ -172,8 +172,8 @@ define double @test_phi_initalise_to_non_zero(ptr %arr_d) {
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x double], ptr [[ARR_D:%.*]], i64 0, i64 [[I_02]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[MUL]] = fmul fast double [[F_PROD_01]], [[TMP0]]
-; CHECK-NEXT:    [[INC]] = add i64 [[I_02]], 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[INC]], 1000
+; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_02]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[I_02]], 999
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[END:%.*]]
 ; CHECK:       end:
 ; CHECK-NEXT:    ret double [[MUL]]
@@ -284,8 +284,8 @@ define i32 @test_int_phi_operands(ptr %arr_d) {
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARR_D:%.*]], i64 [[I_02]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[MUL]] = mul nsw i32 [[F_PROD_01]], [[TMP0]]
-; CHECK-NEXT:    [[INC]] = add i64 [[I_02]], 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[INC]], 1000
+; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_02]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[I_02]], 999
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[END:%.*]]
 ; CHECK:       end:
 ; CHECK-NEXT:    ret i32 [[MUL]]
@@ -318,8 +318,8 @@ define i32 @test_int_phi_operands_initalise_to_non_zero(ptr %arr_d) {
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARR_D:%.*]], i64 [[I_02]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[MUL]] = mul i32 [[F_PROD_01]], [[TMP0]]
-; CHECK-NEXT:    [[INC]] = add i64 [[I_02]], 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[INC]], 1000
+; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_02]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[I_02]], 999
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[END:%.*]]
 ; CHECK:       end:
 ; CHECK-NEXT:    ret i32 [[MUL]]

artagnon · 2024-10-01T09:46:19Z

Gentle ping.

nikic

LGTM

llvm/test/Transforms/InstCombine/known-non-zero.ll

nikic · 2024-10-02T19:07:01Z

It looks like falling through here has significant compile-time cost: https://llvm-compile-time-tracker.com/compare.php?from=33fa40cc9659b7b56a9b440edc0587ff58793cac&to=d432e22b2fce1dfd9456b85a095fd5f08e93c6dd&stat=instructions:u

artagnon · 2024-10-02T19:12:05Z

Hm, perhaps we can get @dtcxzyw to run the opt benchmarks and see if the cost outweighs the extra optimization?

artagnon · 2024-10-02T19:15:06Z

What strikes me as strange is that the break seemed completely unintentional: I dug through the history, and couldn't find any evidence that it was there for some reason.

nikic · 2024-10-02T19:19:25Z

What strikes me as strange is that the break seemed completely unintentional: I dug through the history, and couldn't find any evidence that it was there for some reason.

This kind of break can be there if we expect a code path to be strictly better. Generally speaking, if we can analyze something as a recurrence, the results we get should always be better than a naive analysis, so falling through to the naive analysis is unnecessary.

It's just that the exact way this was done here it is not actually true, because we miss the special handling for conditions on the predecessor branch.

artagnon · 2024-10-02T19:25:27Z

This kind of break can be there if we expect a code path to be strictly better. Generally speaking, if we can analyze something as a recurrence, the results we get should always be better than a naive analysis, so falling through to the naive analysis is unnecessary.

It's just that the exact way this was done here it is not actually true, because we miss the special handling for conditions on the predecessor branch.

This is a good point, but I would have expected the break to apply to the shift recurrences as well then? Can we rearrange the code so that the special handling for conditions on the predecessor branch isn't missed, while still squelching the compile-time regression?

There is a stray break statement in the recurrence-handling code in computeKnownBitsFromOperator, that seems to be unintended. Strip this statement so that we have the opportunity to go through the rest of phi-handling code, and refine KnownBits further.

artagnon · 2024-10-03T08:00:55Z

Investigating it this morning, and I see the code guarded by:

if (Depth < MaxAnalysisRecursionDepth - 1 && Known.isUnknown())

Doesn't this mean that any inserted break is redundant, and that we were actually breaking in more cases than necessary previously? Otherwise, how can Known be Unknown?

ValueTracking: strip stray break in recur-match

63c2cf4

artagnon requested review from bjope and goldsteinn September 24, 2024 12:58

artagnon mentioned this pull request Sep 24, 2024

ValueTracking: refactor recurrence-matching (NFC) #109659

Merged

artagnon marked this pull request as ready for review September 25, 2024 12:52

artagnon requested a review from nikic as a code owner September 25, 2024 12:52

llvmbot added llvm:analysis Includes value tracking, cost tables and constant folding llvm:transforms labels Sep 25, 2024

artagnon requested a review from dtcxzyw October 1, 2024 09:50

nikic approved these changes Oct 1, 2024

View reviewed changes

nikic reviewed Oct 1, 2024

View reviewed changes

llvm/test/Transforms/InstCombine/known-non-zero.ll Show resolved Hide resolved

known-non-zero: update comment

91b9ee9

artagnon merged commit d432e22 into llvm:main Oct 2, 2024
4 checks passed

artagnon deleted the vt-recur-break-fix branch October 2, 2024 10:18

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

ValueTracking: strip stray break in recur-match #109794

ValueTracking: strip stray break in recur-match #109794

Uh oh!

artagnon commented Sep 24, 2024 •

edited

Loading

Uh oh!

artagnon commented Sep 25, 2024

Uh oh!

llvmbot commented Sep 25, 2024 •

edited

Loading

Uh oh!

artagnon commented Oct 1, 2024

Uh oh!

nikic left a comment

Uh oh!

Uh oh!

Uh oh!

nikic commented Oct 2, 2024

Uh oh!

artagnon commented Oct 2, 2024

Uh oh!

artagnon commented Oct 2, 2024

Uh oh!

nikic commented Oct 2, 2024

Uh oh!

artagnon commented Oct 2, 2024

Uh oh!

artagnon commented Oct 3, 2024

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants

ValueTracking: strip stray break in recur-match #109794

ValueTracking: strip stray break in recur-match #109794

Uh oh!

Conversation

artagnon commented Sep 24, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

artagnon commented Sep 25, 2024

Uh oh!

llvmbot commented Sep 25, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

artagnon commented Oct 1, 2024

Uh oh!

nikic left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

nikic commented Oct 2, 2024

Uh oh!

artagnon commented Oct 2, 2024

Uh oh!

artagnon commented Oct 2, 2024

Uh oh!

nikic commented Oct 2, 2024

Uh oh!

artagnon commented Oct 2, 2024

Uh oh!

artagnon commented Oct 3, 2024

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants

artagnon commented Sep 24, 2024 •

edited

Loading

llvmbot commented Sep 25, 2024 •

edited

Loading