From ccbf26564cc9e9cf91e50724c90c68e20abea0cc Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Wed, 16 Oct 2024 12:10:14 +0000 Subject: [PATCH] [NFC][LoopVectorize] Add more simple early exit tests I realised we are missing tests to cover more loops with multiple early exits - some countable and some uncountable. I've also added a few SVE versions of the test in the AArch64 directory, since I expect there to be some divergence from the same tests in the top level directory once we start vectorising them. --- .../AArch64/simple_early_exit.ll | 333 ++++++++++++++++++ .../LoopVectorize/simple_early_exit.ll | 188 +++++++++- 2 files changed, 518 insertions(+), 3 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll new file mode 100644 index 0000000000000..82556bdd2a5ec --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll @@ -0,0 +1,333 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S < %s -p loop-vectorize | FileCheck %s --check-prefixes=CHECK + +target triple = "aarch64-unknown-linux-gnu" + +declare void @init_mem(ptr, i64); + +define i64 @same_exit_block_pre_inc_use1() #1 { +; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ [[INDEX_NEXT1:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX2]] +; CHECK-NEXT: [[TMP38:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX2]] +; CHECK-NEXT: [[TMP39:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[TMP38]], [[TMP39]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT1]] = add i64 [[INDEX2]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT1]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[START_0_LCSSA:%.*]] = phi i64 [ [[INDEX2]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[START_0_LCSSA]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +; In this example the early exit block appears in the list of ExitNotTaken +; SCEVs, but is not computable. +define i64 @same_exit_block_pre_inc_use4() { +; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use4() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i64], align 8 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i64], align 8 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP1:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i64, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp ult i64 [[INDEX]], [[LD1]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP1]] ], [ 67, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i64] + %p2 = alloca [1024 x i64] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i64, ptr %p1, i64 %index + %ld1 = load i64, ptr %arrayidx, align 1 + %cmp3 = icmp ult i64 %index, %ld1 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @loop_contains_safe_call() #1 { +; CHECK-LABEL: define i64 @loop_contains_safe_call( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP1:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ [[INDEX_NEXT1:%.*]], [[LOOP_INC1:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[P1]], i64 [[INDEX2]] +; CHECK-NEXT: [[LD1:%.*]] = load float, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[SQRT:%.*]] = tail call fast float @llvm.sqrt.f32(float [[LD1]]) +; CHECK-NEXT: [[CMP:%.*]] = fcmp fast ult float [[SQRT]], 3.000000e+00 +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC1]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT1]] = add i64 [[INDEX2]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT1]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX2]], [[LOOP1]] ], [ 67, [[LOOP_INC1]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds float, ptr %p1, i64 %index + %ld1 = load float, ptr %arrayidx, align 1 + %sqrt = tail call fast float @llvm.sqrt.f32(float %ld1) + %cmp = fcmp fast ult float %sqrt, 3.0e+00 + br i1 %cmp, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @loop_contains_safe_div() #1 { +; CHECK-LABEL: define i64 @loop_contains_safe_div( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 4 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[LD1]], 20000 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[DIV]], 1 +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i32, ptr %p1, i64 %index + %ld1 = load i32, ptr %arrayidx, align 1 + %div = udiv i32 %ld1, 20000 + %cmp = icmp eq i32 %div, 1 + br i1 %cmp, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align(8) %p2) { +; DEBUG-LABEL: LV: Checking a loop in 'loop_contains_load_after_early_exit' +; DEBUG: LV: Found an early exit loop with symbolic max backedge taken count: 63 +; DEBUG-NEXT: LV: We can vectorize this loop! +; DEBUG-NEXT: LV: Not vectorizing: Auto-vectorization of loops with uncountable early exit is not yet supported. +; CHECK-LABEL: define i64 @loop_contains_load_after_early_exit( +; CHECK-SAME: ptr align 8 dereferenceable(1024) [[P2:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 4 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: br label [[LOOP1:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[LD1]], 1 +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i64, ptr [[ARRAYIDX2]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP1]] ], [ [[LD2]], [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i32, ptr %p1, i64 %index + %ld1 = load i32, ptr %arrayidx, align 1 + %cmp = icmp eq i32 %ld1, 1 + br i1 %cmp, label %loop.inc, label %loop.end + +loop.inc: + %arrayidx2 = getelementptr inbounds i64, ptr %p2, i64 %index + %ld2 = load i64, ptr %arrayidx2, align 8 + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ %ld2, %loop.inc ] + ret i64 %retval +} + + +; The form of the induction variables requires SCEV predicates. +define i32 @diff_exit_block_needs_scev_check(i32 %end) { +; DEBUG-LABEL: LV: Checking a loop in 'diff_exit_block_needs_scev_check' +; DEBUG: Found an early exit loop with symbolic max backedge taken count: (-1 + (1 umax (zext i10 (trunc i32 %end to i10) to i32))) +; DEBUG-NEXT: LV: We can vectorize this loop! +; DEBUG-NEXT: LV: Not vectorizing: Auto-vectorization of loops with uncountable early exit is not yet supported. +; CHECK-LABEL: define i32 @diff_exit_block_needs_scev_check( +; CHECK-SAME: i32 [[END:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i32], align 4 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i32], align 4 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: [[END_CLAMPED:%.*]] = and i32 [[END]], 1023 +; CHECK-NEXT: br label [[FOR_BODY1:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IND:%.*]] = phi i8 [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[GEP_IND:%.*]] = phi i64 [ [[GEP_IND_NEXT:%.*]], [[FOR_INC]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[GEP_IND]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[P2]], i64 [[GEP_IND]] +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[CMP_EARLY:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: br i1 [[CMP_EARLY]], label [[FOUND:%.*]], label [[FOR_INC]] +; CHECK: for.inc: +; CHECK-NEXT: [[IND_NEXT]] = add i8 [[IND]], 1 +; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[IND_NEXT]] to i32 +; CHECK-NEXT: [[GEP_IND_NEXT]] = add i64 [[GEP_IND]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[CONV]], [[END_CLAMPED]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY1]], label [[EXIT:%.*]] +; CHECK: found: +; CHECK-NEXT: ret i32 1 +; CHECK: exit: +; CHECK-NEXT: ret i32 0 +; +entry: + %p1 = alloca [1024 x i32] + %p2 = alloca [1024 x i32] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + %end.clamped = and i32 %end, 1023 + br label %for.body + +for.body: + %ind = phi i8 [ %ind.next, %for.inc ], [ 0, %entry ] + %gep.ind = phi i64 [ %gep.ind.next, %for.inc ], [ 0, %entry ] + %arrayidx1 = getelementptr inbounds i32, ptr %p1, i64 %gep.ind + %0 = load i32, ptr %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %p2, i64 %gep.ind + %1 = load i32, ptr %arrayidx2, align 4 + %cmp.early = icmp eq i32 %0, %1 + br i1 %cmp.early, label %found, label %for.inc + +for.inc: + %ind.next = add i8 %ind, 1 + %conv = zext i8 %ind.next to i32 + %gep.ind.next = add i64 %gep.ind, 1 + %cmp = icmp ult i32 %conv, %end.clamped + br i1 %cmp, label %for.body, label %exit + +found: + ret i32 1 + +exit: + ret i32 0 +} + + +declare i32 @foo(i32) readonly +declare @foo_vec() + +attributes #0 = { "vector-function-abi-variant"="_ZGVsNxv_foo(foo_vec)" } +attributes #1 = { "target-features"="+sve" vscale_range(1,16) } diff --git a/llvm/test/Transforms/LoopVectorize/simple_early_exit.ll b/llvm/test/Transforms/LoopVectorize/simple_early_exit.ll index 49454ae18db79..d5e4f4d016c6e 100644 --- a/llvm/test/Transforms/LoopVectorize/simple_early_exit.ll +++ b/llvm/test/Transforms/LoopVectorize/simple_early_exit.ll @@ -311,7 +311,6 @@ loop.end: } - define i64 @same_exit_block_post_inc_use() { ; CHECK-LABEL: define i64 @same_exit_block_post_inc_use() { ; CHECK-NEXT: entry: @@ -860,8 +859,8 @@ loop.end: ; There are multiple exit blocks - two of them have an exact representation for the ; exit-not-taken counts and the other is unknown, i.e. the "early exit". -define i64 @multiple_exits_one_early() { -; CHECK-LABEL: define i64 @multiple_exits_one_early() { +define i64 @multiple_exiting_one_early_same_exit() { +; CHECK-LABEL: define i64 @multiple_exiting_one_early_same_exit() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 ; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 @@ -918,6 +917,189 @@ loop.end: } +define i64 @multiple_exiting_one_early_same_exit_phi_of_consts() { +; CHECK-LABEL: define i64 @multiple_exiting_one_early_same_exit_phi_of_consts() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i64 [[INDEX]], 64 +; CHECK-NEXT: br i1 [[CMP1]], label [[SEARCH:%.*]], label [[LOOP_END:%.*]] +; CHECK: search: +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_END]], label [[LOOP_INC]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 128 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 0, [[LOOP]] ], [ 1, [[SEARCH]] ], [ 0, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %cmp1 = icmp ne i64 %index, 64 + br i1 %cmp1, label %search, label %loop.end + +search: + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.end, label %loop.inc + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 128 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ 0, %loop ], [ 1, %search ], [ 0, %loop.inc ] + ret i64 %retval +} + + +define i64 @multiple_exiting_one_early_diff_exit() { +; CHECK-LABEL: define i64 @multiple_exiting_one_early_diff_exit() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i64 [[INDEX]], 64 +; CHECK-NEXT: br i1 [[CMP1]], label [[SEARCH:%.*]], label [[LOOP_END:%.*]] +; CHECK: search: +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_END_EARLY:%.*]], label [[LOOP_INC]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 128 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end.early: +; CHECK-NEXT: [[RET_EARLY:%.*]] = phi i64 [ [[INDEX]], [[SEARCH]] ] +; CHECK-NEXT: ret i64 [[RET_EARLY]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 64, [[LOOP]] ], [ 128, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %cmp1 = icmp ne i64 %index, 64 + br i1 %cmp1, label %search, label %loop.end + +search: + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.end.early, label %loop.inc + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 128 + br i1 %exitcond, label %loop, label %loop.end + +loop.end.early: + %ret.early = phi i64 [ %index, %search ] + ret i64 %ret.early + +loop.end: + %retval = phi i64 [ 64, %loop ], [ 128, %loop.inc ] + ret i64 %retval +} + +define i64 @multiple_exiting_one_early_diff_exit_no_phis() { +; CHECK-LABEL: define i64 @multiple_exiting_one_early_diff_exit_no_phis() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i64 [[INDEX]], 64 +; CHECK-NEXT: br i1 [[CMP1]], label [[SEARCH:%.*]], label [[LOOP_END:%.*]] +; CHECK: search: +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_END_EARLY:%.*]], label [[LOOP_INC]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 128 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end.early: +; CHECK-NEXT: ret i64 1 +; CHECK: loop.end: +; CHECK-NEXT: ret i64 0 +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %cmp1 = icmp ne i64 %index, 64 + br i1 %cmp1, label %search, label %loop.end + +search: + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.end.early, label %loop.inc + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 128 + br i1 %exitcond, label %loop, label %loop.end + +loop.end.early: + ret i64 1 + +loop.end: + ret i64 0 +} + + ; We don't currently support multiple early exits. define i64 @multiple_early_exits() { ; DEBUG-LABEL: LV: Checking a loop in 'multiple_early_exits'