Skip to content

Commit 4c941bf

Browse files
committed
[SECV] Try to push the op into ZExt: A + zext (-A + B) -> zext (B) (llvm#151227)
Try to push the constant operand into a ZExt: A + zext (-A + B) -> zext (B), if trunc (A) + -A + B does not unsigned-wrap. The actual code supports ZExts with arbitrary number of arguments, hence the getAddExpr in the return. This helps SCEV reasoning in some cases, commonly when adding an offset to a zero-extended SCEV that subtracts the same offset. Note that this is restricted to cases where we can fold away an operand of the inner Add. This is needed to avoid bad interactions with patterns when forming ZExts, which try to push to ZExt to add operands. https://alive2.llvm.org/ce/z/q7d303 PR: llvm#151227 (cherry picked from commit d74d841)
1 parent 9184074 commit 4c941bf

File tree

5 files changed

+73
-6
lines changed

5 files changed

+73
-6
lines changed

llvm/lib/Analysis/ScalarEvolution.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2682,6 +2682,21 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
26822682
return getAddExpr(NewOps, PreservedFlags);
26832683
}
26842684
}
2685+
2686+
// Try to push the constant operand into a ZExt: A + zext (-A + B) -> zext
2687+
// (B), if trunc (A) + -A + B does not unsigned-wrap.
2688+
if (auto *ZExt = dyn_cast<SCEVZeroExtendExpr>(Ops[1])) {
2689+
const SCEV *B = ZExt->getOperand(0);
2690+
const SCEV *NarrowA = getTruncateExpr(A, B->getType());
2691+
if (isa<SCEVAddExpr>(B) &&
2692+
NarrowA == getNegativeSCEV(cast<SCEVAddExpr>(B)->getOperand(0)) &&
2693+
getZeroExtendExpr(NarrowA, ZExt->getType()) == A &&
2694+
hasFlags(StrengthenNoWrapFlags(this, scAddExpr, {NarrowA, B},
2695+
SCEV::FlagAnyWrap),
2696+
SCEV::FlagNUW)) {
2697+
return getZeroExtendExpr(getAddExpr(NarrowA, B), ZExt->getType());
2698+
}
2699+
}
26852700
}
26862701

26872702
// Canonicalize (-1 * urem X, Y) + X --> (Y * X/Y)

llvm/test/Analysis/ScalarEvolution/zext-add.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ define void @test_push_constant_into_zext(ptr %dst, ptr %src, i32 %n, i64 %offse
1717
; CHECK-NEXT: %l = load i8, ptr %outer.ptr, align 1
1818
; CHECK-NEXT: --> %l U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %inner.loop: Variant, %outer.loop: Variant }
1919
; CHECK-NEXT: %ptr.iv.next = getelementptr i8, ptr %ptr.iv, i64 %offset
20-
; CHECK-NEXT: --> {(%offset + %src),+,%offset}<%inner.loop> U: full-set S: full-set Exits: (((1 + (zext i32 (-1 + (1 smax %n))<nsw> to i64))<nuw><nsw> * %offset) + %src) LoopDispositions: { %inner.loop: Computable, %outer.loop: Variant }
20+
; CHECK-NEXT: --> {(%offset + %src),+,%offset}<%inner.loop> U: full-set S: full-set Exits: (((zext i32 (1 smax %n) to i64) * %offset) + %src) LoopDispositions: { %inner.loop: Computable, %outer.loop: Variant }
2121
; CHECK-NEXT: %iv.next = add i32 %iv, 1
2222
; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%inner.loop> U: [1,-2147483648) S: [1,-2147483648) Exits: (1 smax %n) LoopDispositions: { %inner.loop: Computable, %outer.loop: Variant }
2323
; CHECK-NEXT: Determining loop execution counts for: @test_push_constant_into_zext
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -p indvars -S %s | FileCheck %s
3+
4+
target triple = "arm64-apple-macosx15.0.0"
5+
6+
declare i1 @cond()
7+
8+
define void @pred_mip_12(ptr %dst, ptr %src, i32 %n, i64 %offset) {
9+
; CHECK-LABEL: define void @pred_mip_12(
10+
; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i32 [[N:%.*]], i64 [[OFFSET:%.*]]) {
11+
; CHECK-NEXT: [[ENTRY:.*]]:
12+
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N]], i32 1)
13+
; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[SMAX]] to i64
14+
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[OFFSET]], [[TMP0]]
15+
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP1]]
16+
; CHECK-NEXT: br label %[[OUTER_LOOP:.*]]
17+
; CHECK: [[OUTER_LOOP_LOOPEXIT:.*]]:
18+
; CHECK-NEXT: br label %[[OUTER_LOOP]]
19+
; CHECK: [[OUTER_LOOP]]:
20+
; CHECK-NEXT: [[OUTER_PTR:%.*]] = phi ptr [ [[SRC]], %[[ENTRY]] ], [ [[SCEVGEP]], %[[OUTER_LOOP_LOOPEXIT]] ]
21+
; CHECK-NEXT: [[C:%.*]] = call i1 @cond()
22+
; CHECK-NEXT: br i1 [[C]], label %[[INNER_LOOP_PREHEADER:.*]], label %[[EXIT:.*]]
23+
; CHECK: [[INNER_LOOP_PREHEADER]]:
24+
; CHECK-NEXT: br label %[[INNER_LOOP:.*]]
25+
; CHECK: [[INNER_LOOP]]:
26+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[INNER_LOOP]] ], [ 0, %[[INNER_LOOP_PREHEADER]] ]
27+
; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[OUTER_PTR]], align 1
28+
; CHECK-NEXT: store i8 [[L]], ptr [[DST]], align 2
29+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
30+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[SMAX]]
31+
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[INNER_LOOP]], label %[[OUTER_LOOP_LOOPEXIT]]
32+
; CHECK: [[EXIT]]:
33+
; CHECK-NEXT: ret void
34+
;
35+
entry:
36+
br label %outer.loop
37+
38+
outer.loop:
39+
%outer.ptr = phi ptr [ %src, %entry ], [ %ptr.iv.next, %inner.loop ]
40+
%c = call i1 @cond()
41+
br i1 %c, label %inner.loop, label %exit
42+
43+
inner.loop:
44+
%iv = phi i32 [ 0, %outer.loop ], [ %iv.next, %inner.loop ]
45+
%ptr.iv = phi ptr [ %src, %outer.loop ], [ %ptr.iv.next, %inner.loop ]
46+
%l = load i8, ptr %outer.ptr, align 1
47+
%ptr.iv.next = getelementptr i8, ptr %ptr.iv, i64 %offset
48+
store i8 %l, ptr %dst, align 2
49+
%iv.next = add i32 %iv, 1
50+
%ec = icmp slt i32 %iv.next, %n
51+
br i1 %ec, label %inner.loop, label %outer.loop
52+
53+
exit:
54+
ret void
55+
}

llvm/test/Transforms/IndVarSimplify/zext-nuw.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,9 @@ define void @_Z3fn1v() {
1515
; CHECK-NEXT: [[J_SROA_0_0_COPYLOAD:%.*]] = load i8, ptr [[X5]], align 1
1616
; CHECK-NEXT: br label [[DOTPREHEADER4_LR_PH:%.*]]
1717
; CHECK: .preheader4.lr.ph:
18-
; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[X4]], -1
19-
; CHECK-NEXT: [[TMP2:%.*]] = zext nneg i32 [[TMP1]] to i64
20-
; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
2118
; CHECK-NEXT: [[TMP4:%.*]] = sext i8 [[J_SROA_0_0_COPYLOAD]] to i64
22-
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP3]], [[TMP4]]
19+
; CHECK-NEXT: [[TMP2:%.*]] = zext nneg i32 [[X4]] to i64
20+
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], [[TMP2]]
2321
; CHECK-NEXT: br label [[DOTPREHEADER4:%.*]]
2422
; CHECK: .preheader4:
2523
; CHECK-NEXT: [[K_09:%.*]] = phi ptr [ undef, [[DOTPREHEADER4_LR_PH]] ], [ [[X25:%.*]], [[X22:%.*]] ]

llvm/test/Transforms/LoopIdiom/reuse-lcssa-phi-scev-expansion.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,4 +161,3 @@ loop.0.latch:
161161
exit:
162162
ret ptr %res
163163
}
164-
>>>>>>> f9f68af4b8d5 ([SCEV] Make sure LCSSA is preserved when re-using phi if needed.)

0 commit comments

Comments
 (0)