Skip to content

Commit c37ee78

Browse files
committed
[SECV] Try to push the op into ZExt: A + zext (-A + B) -> zext (B)
Try to push the constant operand into a ZExt: A + zext (-A + B) -> zext (B), if trunc (A) + -A + B does not unsigned-wrap. The actual code supports ZExts with arbitrary number of arguments, hence the getAddExpr in the return. This helps SCEV reasoning in some cases, commonly when adding an offset to a zero-extended SCEV that subtracts the same offset. Note that this is restricted to cases where we can fold away an operand of the inner Add. This is needed to avoid bad interactions with patterns when forming ZExts, which try to push to ZExt to add operands. https://alive2.llvm.org/ce/z/uuYGC3k
1 parent c6f7fa7 commit c37ee78

File tree

3 files changed

+22
-9
lines changed

3 files changed

+22
-9
lines changed

llvm/lib/Analysis/ScalarEvolution.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2682,6 +2682,21 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
26822682
return getAddExpr(NewOps, PreservedFlags);
26832683
}
26842684
}
2685+
2686+
// Try to push the constant operand into a ZExt: A + zext (-A + B) -> zext
2687+
// (B), if trunc (A) + -A + B does not unsigned-wrap.
2688+
if (auto *ZExt = dyn_cast<SCEVZeroExtendExpr>(Ops[1])) {
2689+
const SCEV *B = ZExt->getOperand(0);
2690+
const SCEV *NarrowA = getTruncateExpr(A, B->getType());
2691+
if (isa<SCEVAddExpr>(B) &&
2692+
NarrowA == getNegativeSCEV(cast<SCEVAddExpr>(B)->getOperand(0)) &&
2693+
getZeroExtendExpr(NarrowA, ZExt->getType()) == A &&
2694+
hasFlags(
2695+
StrengthenNoWrapFlags(this, scAddExpr, {NarrowA, B}, OrigFlags),
2696+
SCEV::FlagNUW)) {
2697+
return getZeroExtendExpr(getAddExpr(NarrowA, B), ZExt->getType());
2698+
}
2699+
}
26852700
}
26862701

26872702
// Canonicalize (-1 * urem X, Y) + X --> (Y * X/Y)

llvm/test/Transforms/IndVarSimplify/AArch64/fold-ext-add.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,21 +10,21 @@ define void @pred_mip_12(ptr %dst, ptr %src, i32 %n, i64 %offset) {
1010
; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i32 [[N:%.*]], i64 [[OFFSET:%.*]]) {
1111
; CHECK-NEXT: [[ENTRY:.*]]:
1212
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N]], i32 1)
13+
; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[SMAX]] to i64
14+
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[OFFSET]], [[TMP0]]
15+
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP1]]
1316
; CHECK-NEXT: br label %[[OUTER_LOOP:.*]]
1417
; CHECK: [[OUTER_LOOP_LOOPEXIT:.*]]:
15-
; CHECK-NEXT: [[PTR_IV_NEXT_LCSSA:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[INNER_LOOP:.*]] ]
1618
; CHECK-NEXT: br label %[[OUTER_LOOP]]
1719
; CHECK: [[OUTER_LOOP]]:
18-
; CHECK-NEXT: [[OUTER_PTR:%.*]] = phi ptr [ [[SRC]], %[[ENTRY]] ], [ [[PTR_IV_NEXT_LCSSA]], %[[OUTER_LOOP_LOOPEXIT]] ]
20+
; CHECK-NEXT: [[OUTER_PTR:%.*]] = phi ptr [ [[SRC]], %[[ENTRY]] ], [ [[SCEVGEP]], %[[OUTER_LOOP_LOOPEXIT]] ]
1921
; CHECK-NEXT: [[C:%.*]] = call i1 @cond()
2022
; CHECK-NEXT: br i1 [[C]], label %[[INNER_LOOP_PREHEADER:.*]], label %[[EXIT:.*]]
2123
; CHECK: [[INNER_LOOP_PREHEADER]]:
22-
; CHECK-NEXT: br label %[[INNER_LOOP]]
24+
; CHECK-NEXT: br label %[[INNER_LOOP:.*]]
2325
; CHECK: [[INNER_LOOP]]:
2426
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[INNER_LOOP]] ], [ 0, %[[INNER_LOOP_PREHEADER]] ]
25-
; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT]], %[[INNER_LOOP]] ], [ [[SRC]], %[[INNER_LOOP_PREHEADER]] ]
2627
; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[OUTER_PTR]], align 1
27-
; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 [[OFFSET]]
2828
; CHECK-NEXT: store i8 [[L]], ptr [[DST]], align 2
2929
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
3030
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[SMAX]]

llvm/test/Transforms/IndVarSimplify/zext-nuw.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,9 @@ define void @_Z3fn1v() {
1515
; CHECK-NEXT: [[J_SROA_0_0_COPYLOAD:%.*]] = load i8, ptr [[X5]], align 1
1616
; CHECK-NEXT: br label [[DOTPREHEADER4_LR_PH:%.*]]
1717
; CHECK: .preheader4.lr.ph:
18-
; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[X4]], -1
19-
; CHECK-NEXT: [[TMP2:%.*]] = zext nneg i32 [[TMP1]] to i64
20-
; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
2118
; CHECK-NEXT: [[TMP4:%.*]] = sext i8 [[J_SROA_0_0_COPYLOAD]] to i64
22-
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP3]], [[TMP4]]
19+
; CHECK-NEXT: [[TMP2:%.*]] = zext nneg i32 [[X4]] to i64
20+
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], [[TMP2]]
2321
; CHECK-NEXT: br label [[DOTPREHEADER4:%.*]]
2422
; CHECK: .preheader4:
2523
; CHECK-NEXT: [[K_09:%.*]] = phi ptr [ undef, [[DOTPREHEADER4_LR_PH]] ], [ [[X25:%.*]], [[X22:%.*]] ]

0 commit comments

Comments
 (0)