Skip to content

Commit 8693ef1

Browse files
committed
[SCEV] Add tests that benefit from rewriting SCEVAddExpr with guards.
Add additional tests benefiting from rewriting existing SCEVAddExprs with guards.
1 parent b529921 commit 8693ef1

File tree

4 files changed

+245
-0
lines changed

4 files changed

+245
-0
lines changed

llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info-apply-to-adds.ll

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,32 @@ loop:
2727
exit:
2828
ret void
2929
}
30+
31+
declare void @clobber()
32+
33+
define void @test_add_sub_1_guard(ptr %src, i32 %n) {
34+
; CHECK-LABEL: 'test_add_sub_1_guard'
35+
; CHECK-NEXT: Determining loop execution counts for: @test_add_sub_1_guard
36+
; CHECK-NEXT: Loop %loop: backedge-taken count is (zext i32 (-1 + (%n /u 2))<nsw> to i64)
37+
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 4294967295
38+
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (zext i32 (-1 + (%n /u 2))<nsw> to i64)
39+
; CHECK-NEXT: Loop %loop: Trip multiple is 1
40+
;
41+
entry:
42+
%shr = lshr i32 %n, 1
43+
%sub.1 = add i32 %shr, -1
44+
%sub.ext = zext i32 %sub.1 to i64
45+
%pre = icmp eq i32 %shr, 1
46+
%end = getelementptr i8, ptr %src, i64 %sub.ext
47+
br i1 %pre, label %loop, label %exit
48+
49+
loop:
50+
%iv = phi ptr [ %src, %entry ], [ %iv.next, %loop ]
51+
call void @clobber()
52+
%iv.next = getelementptr i8, ptr %iv, i64 1
53+
%ec = icmp eq ptr %iv, %end
54+
br i1 %ec, label %exit, label %loop
55+
56+
exit:
57+
ret void
58+
}

llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -334,4 +334,88 @@ out_of_bounds:
334334
ret i32 -1
335335
}
336336

337+
define void @slt_no_smax_needed(i64 %n, ptr %dst) {
338+
; CHECK-LABEL: @slt_no_smax_needed(
339+
; CHECK-NEXT: entry:
340+
; CHECK-NEXT: [[N_TRUNC:%.*]] = trunc i64 [[N:%.*]] to i32
341+
; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[N_TRUNC]], 1
342+
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[ADD_1]], 1
343+
; CHECK-NEXT: [[PRE:%.*]] = icmp ult i32 [[ADD_1]], 8
344+
; CHECK-NEXT: br i1 [[PRE]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]]
345+
; CHECK: loop.preheader:
346+
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[SHR]], i32 1)
347+
; CHECK-NEXT: br label [[LOOP:%.*]]
348+
; CHECK: loop:
349+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
350+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i32 [[IV]]
351+
; CHECK-NEXT: store i8 0, ptr [[GEP]], align 1
352+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
353+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[SMAX]]
354+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
355+
; CHECK: exit.loopexit:
356+
; CHECK-NEXT: br label [[EXIT]]
357+
; CHECK: exit:
358+
; CHECK-NEXT: ret void
359+
;
360+
entry:
361+
%n.trunc = trunc i64 %n to i32
362+
%add.1 = add i32 %n.trunc, 1
363+
%shr = lshr i32 %add.1, 1
364+
%pre = icmp ult i32 %add.1, 8
365+
br i1 %pre, label %exit, label %loop
366+
367+
loop:
368+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
369+
%gep = getelementptr inbounds i8, ptr %dst, i32 %iv
370+
store i8 0, ptr %gep, align 1
371+
%iv.next = add i32 %iv, 1
372+
%ec = icmp slt i32 %iv.next, %shr
373+
br i1 %ec, label %loop, label %exit
374+
375+
exit:
376+
ret void
377+
}
378+
379+
define void @ult_no_umax_needed(i64 %n, ptr %dst) {
380+
; CHECK-LABEL: @ult_no_umax_needed(
381+
; CHECK-NEXT: entry:
382+
; CHECK-NEXT: [[N_TRUNC:%.*]] = trunc i64 [[N:%.*]] to i32
383+
; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[N_TRUNC]], 1
384+
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[ADD_1]], 1
385+
; CHECK-NEXT: [[PRE:%.*]] = icmp ult i32 [[ADD_1]], 8
386+
; CHECK-NEXT: br i1 [[PRE]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]]
387+
; CHECK: loop.preheader:
388+
; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[SHR]], i32 1)
389+
; CHECK-NEXT: br label [[LOOP:%.*]]
390+
; CHECK: loop:
391+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
392+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i32 [[IV]]
393+
; CHECK-NEXT: store i8 0, ptr [[GEP]], align 1
394+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
395+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], [[UMAX]]
396+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
397+
; CHECK: exit.loopexit:
398+
; CHECK-NEXT: br label [[EXIT]]
399+
; CHECK: exit:
400+
; CHECK-NEXT: ret void
401+
;
402+
entry:
403+
%n.trunc = trunc i64 %n to i32
404+
%add.1 = add i32 %n.trunc, 1
405+
%shr = lshr i32 %add.1, 1
406+
%pre = icmp ult i32 %add.1, 8
407+
br i1 %pre, label %exit, label %loop
408+
409+
loop:
410+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
411+
%gep = getelementptr inbounds i8, ptr %dst, i32 %iv
412+
store i8 0, ptr %gep, align 1
413+
%iv.next = add i32 %iv, 1
414+
%ec = icmp ult i32 %iv.next, %shr
415+
br i1 %ec, label %loop, label %exit
416+
417+
exit:
418+
ret void
419+
}
420+
337421
!0 = !{i32 1, i32 2147483648}

llvm/test/Transforms/LoopUnroll/peel-last-iteration-with-guards.ll

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,11 +201,84 @@ loop.latch:
201201
%ec = icmp eq i32 %iv.next, %n
202202
br i1 %ec, label %exit, label %loop.header
203203

204+
exit:
205+
ret void
206+
}
207+
208+
define void @test_peel_guard_sub_1_btc(i32 %n) {
209+
; CHECK-LABEL: define void @test_peel_guard_sub_1_btc(
210+
; CHECK-SAME: i32 [[N:%.*]]) {
211+
; CHECK-NEXT: [[ENTRY:.*:]]
212+
; CHECK-NEXT: [[SUB:%.*]] = add i32 [[N]], -1
213+
; CHECK-NEXT: [[PRE:%.*]] = icmp eq i32 [[SUB]], 0
214+
; CHECK-NEXT: br i1 [[PRE]], label %[[EXIT:.*]], label %[[LOOP_HEADER_PREHEADER:.*]]
215+
; CHECK: [[LOOP_HEADER_PREHEADER]]:
216+
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -2
217+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0
218+
; CHECK-NEXT: br i1 [[TMP1]], label %[[LOOP_HEADER_PREHEADER_SPLIT:.*]], label %[[EXIT_LOOPEXIT_PEEL_BEGIN:.*]]
219+
; CHECK: [[LOOP_HEADER_PREHEADER_SPLIT]]:
220+
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
221+
; CHECK: [[LOOP_HEADER]]:
222+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ 1, %[[LOOP_HEADER_PREHEADER_SPLIT]] ]
223+
; CHECK-NEXT: br i1 false, label %[[LOOP_LATCH]], label %[[THEN:.*]]
224+
; CHECK: [[THEN]]:
225+
; CHECK-NEXT: [[CALL136:%.*]] = load volatile ptr, ptr null, align 4294967296
226+
; CHECK-NEXT: br label %[[LOOP_LATCH]]
227+
; CHECK: [[LOOP_LATCH]]:
228+
; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 1
229+
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[N]], 1
230+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[TMP2]]
231+
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_LOOPEXIT_PEEL_BEGIN_LOOPEXIT:.*]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
232+
; CHECK: [[EXIT_LOOPEXIT_PEEL_BEGIN_LOOPEXIT]]:
233+
; CHECK-NEXT: [[DOTPH:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP_LATCH]] ]
234+
; CHECK-NEXT: br label %[[EXIT_LOOPEXIT_PEEL_BEGIN]]
235+
; CHECK: [[EXIT_LOOPEXIT_PEEL_BEGIN]]:
236+
; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ 1, %[[LOOP_HEADER_PREHEADER]] ], [ [[DOTPH]], %[[EXIT_LOOPEXIT_PEEL_BEGIN_LOOPEXIT]] ]
237+
; CHECK-NEXT: br label %[[LOOP_HEADER_PEEL:.*]]
238+
; CHECK: [[LOOP_HEADER_PEEL]]:
239+
; CHECK-NEXT: [[CMP115_PEEL:%.*]] = icmp eq i32 [[TMP3]], [[SUB]]
240+
; CHECK-NEXT: br i1 [[CMP115_PEEL]], label %[[LOOP_LATCH_PEEL:.*]], label %[[THEN_PEEL:.*]]
241+
; CHECK: [[THEN_PEEL]]:
242+
; CHECK-NEXT: [[CALL136_PEEL:%.*]] = load volatile ptr, ptr null, align 4294967296
243+
; CHECK-NEXT: br label %[[LOOP_LATCH_PEEL]]
244+
; CHECK: [[LOOP_LATCH_PEEL]]:
245+
; CHECK-NEXT: [[IV_NEXT_PEEL:%.*]] = add nuw i32 [[TMP3]], 1
246+
; CHECK-NEXT: [[EC_PEEL:%.*]] = icmp eq i32 [[IV_NEXT_PEEL]], [[N]]
247+
; CHECK-NEXT: br i1 [[EC_PEEL]], label %[[EXIT_LOOPEXIT_PEEL_NEXT:.*]], label %[[EXIT_LOOPEXIT_PEEL_NEXT]]
248+
; CHECK: [[EXIT_LOOPEXIT_PEEL_NEXT]]:
249+
; CHECK-NEXT: br label %[[LOOP_HEADER_PEEL_NEXT:.*]]
250+
; CHECK: [[LOOP_HEADER_PEEL_NEXT]]:
251+
; CHECK-NEXT: br label %[[EXIT_LOOPEXIT:.*]]
252+
; CHECK: [[EXIT_LOOPEXIT]]:
253+
; CHECK-NEXT: br label %[[EXIT]]
254+
; CHECK: [[EXIT]]:
255+
; CHECK-NEXT: ret void
256+
;
257+
entry:
258+
%sub = add i32 %n, -1
259+
%pre = icmp eq i32 %sub, 0
260+
br i1 %pre, label %exit, label %loop.header
261+
262+
loop.header: ; preds = %loop.latch, %entry
263+
%iv = phi i32 [ %iv.next, %loop.latch ], [ 1, %entry ]
264+
%cmp115 = icmp eq i32 %iv, %sub
265+
br i1 %cmp115, label %loop.latch, label %then
266+
267+
then:
268+
%call136 = load volatile ptr, ptr null, align 4294967296
269+
br label %loop.latch
270+
271+
loop.latch:
272+
%iv.next = add nuw i32 %iv, 1
273+
%ec = icmp eq i32 %iv.next, %n
274+
br i1 %ec, label %exit, label %loop.header
275+
204276
exit:
205277
ret void
206278
}
207279
;.
208280
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
209281
; CHECK: [[META1]] = !{!"llvm.loop.peeled.count", i32 1}
210282
; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META1]]}
283+
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
211284
;.

llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -434,3 +434,62 @@ loop:
434434
exit:
435435
ret void
436436
}
437+
438+
439+
define void @remove_diff_checks_via_guards(i32 %x, i32 %y, ptr %A) {
440+
; CHECK-LABEL: define void @remove_diff_checks_via_guards(
441+
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]], ptr [[A:%.*]]) {
442+
; CHECK-NEXT: [[ENTRY:.*:]]
443+
; CHECK-NEXT: [[OFFSET:%.*]] = sub i32 [[X]], [[Y]]
444+
; CHECK-NEXT: [[CMP:%.*]] = icmp sge i32 [[OFFSET]], 0
445+
; CHECK-NEXT: br i1 [[CMP]], [[EXIT:label %.*]], label %[[LOOP_PREHEADER:.*]]
446+
; CHECK: [[LOOP_PREHEADER]]:
447+
; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[X]] to i64
448+
; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[TMP0]], 1
449+
; CHECK-NEXT: [[SMAX2:%.*]] = call i64 @llvm.smax.i64(i64 [[TMP1]], i64 0)
450+
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[SMAX2]] to i32
451+
; CHECK-NEXT: [[TMP3:%.*]] = add nuw i32 [[TMP2]], 1
452+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP3]], 4
453+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], [[SCALAR_PH:label %.*]], label %[[VECTOR_SCEVCHECK:.*]]
454+
; CHECK: [[VECTOR_SCEVCHECK]]:
455+
; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[X]] to i64
456+
; CHECK-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], 1
457+
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[TMP5]], i64 0)
458+
; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[SMAX]] to i32
459+
; CHECK-NEXT: [[TMP7:%.*]] = icmp slt i32 [[TMP6]], 0
460+
; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[SMAX]], 4294967295
461+
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
462+
; CHECK-NEXT: [[TMP10:%.*]] = trunc i64 [[SMAX]] to i32
463+
; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[OFFSET]], [[TMP10]]
464+
; CHECK-NEXT: [[TMP12:%.*]] = icmp slt i32 [[TMP11]], [[OFFSET]]
465+
; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[SMAX]], 4294967295
466+
; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP12]], [[TMP13]]
467+
; CHECK-NEXT: [[TMP15:%.*]] = or i1 [[TMP9]], [[TMP14]]
468+
; CHECK-NEXT: br i1 [[TMP15]], [[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]]
469+
; CHECK: [[VECTOR_MEMCHECK]]:
470+
; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[OFFSET]] to i64
471+
; CHECK-NEXT: [[TMP17:%.*]] = shl nsw i64 [[TMP16]], 2
472+
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP17]], 16
473+
; CHECK-NEXT: br i1 [[DIFF_CHECK]], [[SCALAR_PH]], [[VECTOR_PH1:label %.*]]
474+
;
475+
entry:
476+
%offset = sub i32 %x, %y
477+
%cmp = icmp sge i32 %offset, 0
478+
br i1 %cmp, label %exit, label %loop
479+
480+
loop:
481+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
482+
%iv.ext = sext i32 %iv to i64
483+
%gep.A = getelementptr i32, ptr %A, i64 %iv.ext
484+
%l = load i32, ptr %gep.A, align 1
485+
%iv.offset = add i32 %iv, %offset
486+
%iv.offset.ext = sext i32 %iv.offset to i64
487+
%gep.A.offset = getelementptr i32, ptr %A, i64 %iv.offset.ext
488+
store i32 %l, ptr %gep.A.offset, align 1
489+
%iv.next = add i32 %iv, 1
490+
%ec = icmp sgt i32 %iv, %x
491+
br i1 %ec, label %exit, label %loop
492+
493+
exit:
494+
ret void
495+
}

0 commit comments

Comments
 (0)