Skip to content

Commit e8abadc

Browse files
fhahnkcloudy0717
authored andcommitted
[SCEV] Handle non-constant start values in AddRec UDiv canonicalization. (llvm#170474)
Follow-up to llvm#169576 to enable UDiv canonicalization if the start of the AddRec is not constant. The fold is not restricted to constant start values, as long as we are able to compute a constant remainder. The fold is only applied if the subtraction of the remainder can be folded into to start expression, but that is just to avoid creating more complex AddRecs. For reference, the proof from llvm#169576 is https://alive2.llvm.org/ce/z/iu2tav PR: llvm#170474
1 parent 53055cf commit e8abadc

File tree

3 files changed

+53
-17
lines changed

3 files changed

+53
-17
lines changed

llvm/lib/Analysis/ScalarEvolution.cpp

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3490,11 +3490,9 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
34903490
}
34913491
/// Get a canonical UDivExpr for a recurrence.
34923492
/// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0.
3493-
// We can currently only fold X%N if X is constant.
3494-
const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
3495-
if (StartC && !DivInt.urem(StepInt)) {
3496-
const APInt &StartInt = StartC->getAPInt();
3497-
const APInt &StartRem = StartInt.urem(StepInt);
3493+
const APInt *StartRem;
3494+
if (!DivInt.urem(StepInt) && match(getURemExpr(AR->getStart(), Step),
3495+
m_scev_APInt(StartRem))) {
34983496
bool NoWrap =
34993497
getZeroExtendExpr(AR, ExtTy) ==
35003498
getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
@@ -3507,10 +3505,15 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
35073505
// all offsets in [[(X - X%N), X).
35083506
bool CanFoldWithWrap = StepInt.ule(DivInt) && // N <= C
35093507
StepInt.isPowerOf2() && DivInt.isPowerOf2();
3510-
if (StartRem != 0 && (NoWrap || CanFoldWithWrap)) {
3511-
const SCEV *NewLHS = getAddRecExpr(
3512-
getConstant(StartInt - StartRem), Step, AR->getLoop(),
3513-
NoWrap ? SCEV::FlagNW : SCEV::FlagAnyWrap);
3508+
// Only fold if the subtraction can be folded in the start
3509+
// expression.
3510+
const SCEV *NewStart =
3511+
getMinusSCEV(AR->getStart(), getConstant(*StartRem));
3512+
if (*StartRem != 0 && (NoWrap || CanFoldWithWrap) &&
3513+
!isa<SCEVAddExpr>(NewStart)) {
3514+
const SCEV *NewLHS =
3515+
getAddRecExpr(NewStart, Step, AR->getLoop(),
3516+
NoWrap ? SCEV::FlagNW : SCEV::FlagAnyWrap);
35143517
if (LHS != NewLHS) {
35153518
LHS = NewLHS;
35163519

llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -180,27 +180,27 @@ define void @test_step2_start_outer_add_rec_step_16(i64 %n, i64 %m) {
180180
; CHECK-NEXT: %iv.1 = add i64 %iv, 1
181181
; CHECK-NEXT: --> {{\{\{}}1,+,16}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
182182
; CHECK-NEXT: %div.1 = udiv i64 %iv.1, 4
183-
; CHECK-NEXT: --> ({{\{\{}}1,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
183+
; CHECK-NEXT: --> ({{\{\{}}0,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
184184
; CHECK-NEXT: %iv.2 = add i64 %iv, 2
185185
; CHECK-NEXT: --> {{\{\{}}2,+,16}<%outer.header>,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
186186
; CHECK-NEXT: %div.2 = udiv i64 %iv.2, 4
187187
; CHECK-NEXT: --> ({{\{\{}}2,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
188188
; CHECK-NEXT: %iv.3 = add i64 %iv, 3
189189
; CHECK-NEXT: --> {{\{\{}}3,+,16}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
190190
; CHECK-NEXT: %div.3 = udiv i64 %iv.3, 4
191-
; CHECK-NEXT: --> ({{\{\{}}3,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
191+
; CHECK-NEXT: --> ({{\{\{}}2,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
192192
; CHECK-NEXT: %iv.4 = add i64 %iv, 4
193193
; CHECK-NEXT: --> {{\{\{}}4,+,16}<%outer.header>,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
194194
; CHECK-NEXT: %div.4 = udiv i64 %iv.4, 4
195195
; CHECK-NEXT: --> ({{\{\{}}4,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
196196
; CHECK-NEXT: %iv.5 = add i64 %iv, 5
197197
; CHECK-NEXT: --> {{\{\{}}5,+,16}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
198198
; CHECK-NEXT: %div.5 = udiv i64 %iv.5, 4
199-
; CHECK-NEXT: --> ({{\{\{}}5,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
199+
; CHECK-NEXT: --> ({{\{\{}}4,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
200200
; CHECK-NEXT: %iv.neg.1 = add i64 %iv, -1
201201
; CHECK-NEXT: --> {{\{\{}}-1,+,16}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
202202
; CHECK-NEXT: %div.neg.1 = udiv i64 %iv.neg.1, 4
203-
; CHECK-NEXT: --> ({{\{\{}}-1,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
203+
; CHECK-NEXT: --> ({{\{\{}}-2,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
204204
; CHECK-NEXT: %div3.0 = udiv i64 %iv, 3
205205
; CHECK-NEXT: --> ({{\{\{}}0,+,16}<%outer.header>,+,2}<%loop> /u 3) U: [0,6148914691236517205) S: [0,6148914691236517206) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
206206
; CHECK-NEXT: %div3.1 = udiv i64 %iv.1, 3
@@ -296,27 +296,27 @@ define void @test_step2_div4_start_outer_add_rec_step_2(i64 %n, i64 %m) {
296296
; CHECK-NEXT: %iv.1 = add i64 %iv, 1
297297
; CHECK-NEXT: --> {{\{\{}}1,+,2}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
298298
; CHECK-NEXT: %div.1 = udiv i64 %iv.1, 4
299-
; CHECK-NEXT: --> ({{\{\{}}1,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
299+
; CHECK-NEXT: --> ({{\{\{}}0,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
300300
; CHECK-NEXT: %iv.2 = add i64 %iv, 2
301301
; CHECK-NEXT: --> {{\{\{}}2,+,2}<%outer.header>,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
302302
; CHECK-NEXT: %div.2 = udiv i64 %iv.2, 4
303303
; CHECK-NEXT: --> ({{\{\{}}2,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
304304
; CHECK-NEXT: %iv.3 = add i64 %iv, 3
305305
; CHECK-NEXT: --> {{\{\{}}3,+,2}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
306306
; CHECK-NEXT: %div.3 = udiv i64 %iv.3, 4
307-
; CHECK-NEXT: --> ({{\{\{}}3,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
307+
; CHECK-NEXT: --> ({{\{\{}}2,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
308308
; CHECK-NEXT: %iv.4 = add i64 %iv, 4
309309
; CHECK-NEXT: --> {{\{\{}}4,+,2}<%outer.header>,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
310310
; CHECK-NEXT: %div.4 = udiv i64 %iv.4, 4
311311
; CHECK-NEXT: --> ({{\{\{}}4,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
312312
; CHECK-NEXT: %iv.5 = add i64 %iv, 5
313313
; CHECK-NEXT: --> {{\{\{}}5,+,2}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
314314
; CHECK-NEXT: %div.5 = udiv i64 %iv.5, 4
315-
; CHECK-NEXT: --> ({{\{\{}}5,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
315+
; CHECK-NEXT: --> ({{\{\{}}4,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
316316
; CHECK-NEXT: %iv.neg.1 = add i64 %iv, -1
317317
; CHECK-NEXT: --> {{\{\{}}-1,+,2}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
318318
; CHECK-NEXT: %div.neg.1 = udiv i64 %iv.neg.1, 4
319-
; CHECK-NEXT: --> ({{\{\{}}-1,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
319+
; CHECK-NEXT: --> ({{\{\{}}-2,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
320320
; CHECK-NEXT: %div3.0 = udiv i64 %iv, 3
321321
; CHECK-NEXT: --> ({{\{\{}}0,+,2}<%outer.header>,+,2}<%loop> /u 3) U: [0,6148914691236517205) S: [0,6148914691236517206) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
322322
; CHECK-NEXT: %div3.1 = udiv i64 %iv.1, 3

llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,4 +48,37 @@ exit:
4848
ret void
4949
}
5050

51+
define i64 @sdiv_arg_outer_iv(ptr noalias %dst, ptr %src) {
52+
; CHECK: 'sdiv_arg_outer_iv'
53+
; CHECK: Cost of 0 for VF 2: CLONE ir<%div> = sdiv ir<%add.offset>, ir<8>
54+
; CHECK: Cost of 0 for VF 4: CLONE ir<%div> = sdiv ir<%add.offset>, ir<8>
55+
; CHECK: Cost of 0 for VF 8: CLONE ir<%div> = sdiv ir<%add.offset>, ir<8>
56+
; CHECK: Cost of 0 for VF 16: REPLICATE ir<%div> = sdiv ir<%add.offset>, ir<8>
57+
entry:
58+
br label %outer.header
59+
60+
outer.header:
61+
%outer.iv = phi i32 [ 0, %entry ], [ %outer.iv.next, %outer.latch ]
62+
%offset = shl nsw i32 %outer.iv, 7
63+
br label %loop
64+
65+
loop:
66+
%iv = phi i64 [ 0, %outer.header ], [ %iv.next, %loop ]
67+
%iv.trunc = trunc i64 %iv to i32
68+
%add.offset = add i32 %offset, %iv.trunc
69+
%div = sdiv i32 %add.offset, 8
70+
%div.ext = sext i32 %div to i64
71+
%gep.src = getelementptr i8, ptr %src, i64 %div.ext
72+
%l = load i8, ptr %gep.src, align 1
73+
%gep.dst = getelementptr i8, ptr %dst, i64 %iv
74+
store i8 %l, ptr %gep.dst, align 1
75+
%iv.next = add i64 %iv, 1
76+
%ec = icmp eq i64 %iv, 64
77+
br i1 %ec, label %outer.latch, label %loop
78+
79+
outer.latch:
80+
%outer.iv.next = add nsw i32 %outer.iv, 1
81+
br label %outer.header
82+
}
83+
5184
attributes #0 = { "target-features"="+avx2" "tune-cpu"="alderlake" }

0 commit comments

Comments
 (0)