diff --git a/llvm/test/Transforms/LoopVectorize/12-12-11-if-conv.ll b/llvm/test/Transforms/LoopVectorize/12-12-11-if-conv.ll index 279d4e82ccb85..83623fd82bb4a 100644 --- a/llvm/test/Transforms/LoopVectorize/12-12-11-if-conv.ll +++ b/llvm/test/Transforms/LoopVectorize/12-12-11-if-conv.ll @@ -5,8 +5,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ;CHECK-LABEL: @foo( ;CHECK: icmp eq <4 x i32> ;CHECK: select <4 x i1> -;CHECK: ret i32 -define i32 @foo(i32 %x, i32 %t, ptr nocapture %A) nounwind uwtable ssp { +;CHECK: ret void +define void @foo(i32 %x, i32 %t, ptr nocapture %A) nounwind uwtable ssp { entry: %cmp10 = icmp sgt i32 %x, 0 br i1 %cmp10, label %for.body, label %for.end @@ -35,5 +35,5 @@ if.end: ; preds = %for.body, %if.then br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %if.end, %entry - ret i32 undef + ret void } diff --git a/llvm/test/Transforms/LoopVectorize/ARM/scalar-block-cost.ll b/llvm/test/Transforms/LoopVectorize/ARM/scalar-block-cost.ll index 596e42e9f094d..d0c11946c9deb 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/scalar-block-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/scalar-block-cost.ll @@ -36,7 +36,7 @@ for.body: ; preds = %entry, %for.body br i1 %exitcond.not, label %exit, label %for.body } -define i32 @if_convert(ptr %a, ptr %b, i32 %start, i32 %end) #0 { +define void @if_convert(ptr %a, ptr %b, i32 %start, i32 %end) #0 { ; CHECK-COST-2: LV: Found an estimated cost of 0 for VF 1 For instruction: %i.032 = phi i32 [ %inc, %if.end ], [ %start, %for.body.preheader ] ; CHECK-COST-2-NEXT: LV: Found an estimated cost of 0 for VF 1 For instruction: %arrayidx = getelementptr inbounds i32, ptr %a, i32 %i.032 @@ -70,7 +70,7 @@ for.cond.cleanup.loopexit: ; preds = %if.end br label %for.cond.cleanup for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry - ret i32 undef + ret void for.body: ; preds = %for.body.preheader, %if.end %i.032 = phi i32 [ %inc, %if.end ], [ %start, %for.body.preheader ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/avx1.ll b/llvm/test/Transforms/LoopVectorize/X86/avx1.ll index 9e205863b8367..44fb8cb2f3beb 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/avx1.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/avx1.ll @@ -6,8 +6,8 @@ target triple = "x86_64-apple-macosx10.8.0" ; CHECK-LABEL: @read_mod_write_single_ptr( ; CHECK: load <8 x float> -; CHECK: ret i32 -define i32 @read_mod_write_single_ptr(ptr nocapture %a, i32 %n) nounwind uwtable ssp { +; CHECK: ret void +define void @read_mod_write_single_ptr(ptr nocapture %a, i32 %n) nounwind uwtable ssp { %1 = icmp sgt i32 %n, 0 br i1 %1, label %.lr.ph, label %._crit_edge @@ -23,15 +23,15 @@ define i32 @read_mod_write_single_ptr(ptr nocapture %a, i32 %n) nounwind uwtable br i1 %exitcond, label %._crit_edge, label %.lr.ph ._crit_edge: ; preds = %.lr.ph, %0 - ret i32 undef + ret void } ; CHECK-LABEL: @read_mod_i64( ; SLOWMEM32: load <2 x i64> ; FASTMEM32: load <4 x i64> -; CHECK: ret i32 -define i32 @read_mod_i64(ptr nocapture %a, i32 %n) nounwind uwtable ssp { +; CHECK: ret void +define void @read_mod_i64(ptr nocapture %a, i32 %n) nounwind uwtable ssp { %1 = icmp sgt i32 %n, 0 br i1 %1, label %.lr.ph, label %._crit_edge @@ -47,6 +47,6 @@ define i32 @read_mod_i64(ptr nocapture %a, i32 %n) nounwind uwtable ssp { br i1 %exitcond, label %._crit_edge, label %.lr.ph ._crit_edge: ; preds = %.lr.ph, %0 - ret i32 undef + ret void } diff --git a/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll index 6d2cda48f90ca..0287645d9d7f9 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll @@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" -define i32 @conversion_cost1(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwind uwtable ssp { +define void @conversion_cost1(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwind uwtable ssp { ; CHECK-LABEL: @conversion_cost1( ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[N:%.*]], 3 ; CHECK-NEXT: br i1 [[TMP1]], label [[ITER_CHECK:%.*]], label [[DOT_CRIT_EDGE:%.*]] @@ -37,7 +37,7 @@ define i32 @conversion_cost1(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwin ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[IND_END5:%.*]] = add i64 3, [[N_VEC]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 3, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] @@ -58,7 +58,7 @@ define i32 @conversion_cost1(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwin ; CHECK-NEXT: [[INDEX_NEXT11]] = add nuw i64 [[INDEX7]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT9]] = add <4 x i8> [[VEC_IND8]], splat (i8 4) ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT11]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N12:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N12]], label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -73,11 +73,11 @@ define i32 @conversion_cost1(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwin ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: ._crit_edge.loopexit: ; CHECK-NEXT: br label [[DOT_CRIT_EDGE]] ; CHECK: ._crit_edge: -; CHECK-NEXT: ret i32 undef +; CHECK-NEXT: ret void ; %1 = icmp sgt i32 %n, 3 br i1 %1, label %.lr.ph, label %._crit_edge @@ -93,10 +93,10 @@ define i32 @conversion_cost1(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwin br i1 %exitcond, label %._crit_edge, label %.lr.ph ._crit_edge: ; preds = %.lr.ph, %0 - ret i32 undef + ret void } -define i32 @conversion_cost2(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwind uwtable ssp { +define void @conversion_cost2(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwind uwtable ssp { ; CHECK-LABEL: @conversion_cost2( ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[N:%.*]], 9 ; CHECK-NEXT: br i1 [[TMP1]], label [[DOTLR_PH_PREHEADER:%.*]], label [[DOT_CRIT_EDGE:%.*]] @@ -136,7 +136,7 @@ define i32 @conversion_cost2(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwin ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD_3]], splat (i64 2) ; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -152,11 +152,11 @@ define i32 @conversion_cost2(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwin ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: ._crit_edge.loopexit: ; CHECK-NEXT: br label [[DOT_CRIT_EDGE]] ; CHECK: ._crit_edge: -; CHECK-NEXT: ret i32 undef +; CHECK-NEXT: ret void ; %1 = icmp sgt i32 %n, 9 br i1 %1, label %.lr.ph, label %._crit_edge @@ -173,5 +173,5 @@ define i32 @conversion_cost2(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwin br i1 %exitcond, label %._crit_edge, label %.lr.ph ._crit_edge: ; preds = %.lr.ph, %0 - ret i32 undef + ret void } diff --git a/llvm/test/Transforms/LoopVectorize/X86/runtime-limit.ll b/llvm/test/Transforms/LoopVectorize/X86/runtime-limit.ll index af5c921c29149..fa3b4a6609d4c 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/runtime-limit.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/runtime-limit.ll @@ -13,7 +13,7 @@ target triple = "x86_64-unknown-linux" ;CHECK-LABEL: func1x6( ;CHECK: <4 x i32> ;CHECK: ret -define i32 @func1x6(ptr nocapture %out, ptr nocapture %A, ptr nocapture %B, ptr nocapture %C, ptr nocapture %D, ptr nocapture %E, ptr nocapture %F) { +define void @func1x6(ptr nocapture %out, ptr nocapture %A, ptr nocapture %B, ptr nocapture %C, ptr nocapture %D, ptr nocapture %E, ptr nocapture %F) { entry: br label %for.body @@ -40,14 +40,14 @@ for.body: ; preds = %for.body, %entry br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body - ret i32 undef + ret void } ; We are vectorizing with 12 runtime checks. ;CHECK-LABEL: func2x6( ;CHECK: <4 x i32> ;CHECK: ret -define i32 @func2x6(ptr nocapture %out, ptr nocapture %out2, ptr nocapture %A, ptr nocapture %B, ptr nocapture %C, ptr nocapture %D, ptr nocapture %E, ptr nocapture %F) { +define void @func2x6(ptr nocapture %out, ptr nocapture %out2, ptr nocapture %A, ptr nocapture %B, ptr nocapture %C, ptr nocapture %D, ptr nocapture %E, ptr nocapture %F) { entry: br label %for.body @@ -85,5 +85,5 @@ for.body: ; preds = %for.body, %entry br i1 %exitcond, label %for.end, label %for.body for.end: ; preds = %for.body - ret i32 undef + ret void } diff --git a/llvm/test/Transforms/LoopVectorize/X86/unroll-pm.ll b/llvm/test/Transforms/LoopVectorize/X86/unroll-pm.ll index 8971dfe507240..47355e7d9dafd 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/unroll-pm.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/unroll-pm.ll @@ -11,7 +11,7 @@ target triple = "x86_64-apple-macosx10.8.0" ;CHECK-NOUNRL: store <4 x i32> ;CHECK-NOUNRL-NOT: store <4 x i32> ;CHECK-NOUNRL: ret -define i32 @bar(ptr nocapture %A, i32 %n) nounwind uwtable ssp { +define void @bar(ptr nocapture %A, i32 %n) nounwind uwtable ssp { %1 = icmp sgt i32 %n, 0 br i1 %1, label %.lr.ph, label %._crit_edge @@ -27,5 +27,5 @@ define i32 @bar(ptr nocapture %A, i32 %n) nounwind uwtable ssp { br i1 %exitcond, label %._crit_edge, label %.lr.ph ._crit_edge: ; preds = %.lr.ph, %0 - ret i32 undef + ret void } diff --git a/llvm/test/Transforms/LoopVectorize/bsd_regex.ll b/llvm/test/Transforms/LoopVectorize/bsd_regex.ll index f64255f29d335..b7aa958958734 100644 --- a/llvm/test/Transforms/LoopVectorize/bsd_regex.ll +++ b/llvm/test/Transforms/LoopVectorize/bsd_regex.ll @@ -8,7 +8,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; When scalarizing stores we need to preserve the original order. ; Make sure that we are extracting in the correct order (0101, and not 0011). -define i32 @foo(ptr nocapture %A) { +define void @foo(ptr nocapture %A) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] @@ -39,7 +39,7 @@ define i32 @foo(ptr nocapture %A) { ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.end: -; CHECK-NEXT: ret i32 undef +; CHECK-NEXT: ret void ; entry: br label %for.body @@ -55,7 +55,7 @@ for.body: br i1 %exitcond, label %for.end, label %for.body for.end: - ret i32 undef + ret void } diff --git a/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll b/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll index 1588d02eff3db..51255b2e35707 100644 --- a/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll +++ b/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll @@ -3,7 +3,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" -define i32 @foo(ptr nocapture %A, ptr nocapture %B, i32 %n) { +define void @foo(ptr nocapture %A, ptr nocapture %B, i32 %n) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP26:%.*]] = icmp sgt i32 [[N:%.*]], 0 @@ -73,7 +73,7 @@ define i32 @foo(ptr nocapture %A, ptr nocapture %B, i32 %n) { ; CHECK: for.end.loopexit: ; CHECK-NEXT: br label [[FOR_END]] ; CHECK: for.end: -; CHECK-NEXT: ret i32 undef +; CHECK-NEXT: ret void ; entry: %cmp26 = icmp sgt i32 %n, 0 @@ -106,11 +106,11 @@ if.end14: br i1 %exitcond, label %for.end, label %for.body for.end: - ret i32 undef + ret void } ; As above but with multiple variables set per block. -define i32 @multi_variable_if_nest(ptr nocapture %A, ptr nocapture %B, i32 %n) { +define void @multi_variable_if_nest(ptr nocapture %A, ptr nocapture %B, i32 %n) { ; CHECK-LABEL: @multi_variable_if_nest( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP26:%.*]] = icmp sgt i32 [[N:%.*]], 0 @@ -188,7 +188,7 @@ define i32 @multi_variable_if_nest(ptr nocapture %A, ptr nocapture %B, i32 %n) { ; CHECK: for.end.loopexit: ; CHECK-NEXT: br label [[FOR_END]] ; CHECK: for.end: -; CHECK-NEXT: ret i32 undef +; CHECK-NEXT: ret void ; entry: %cmp26 = icmp sgt i32 %n, 0 @@ -224,5 +224,5 @@ if.end14: br i1 %exitcond, label %for.end, label %for.body for.end: - ret i32 undef + ret void } diff --git a/llvm/test/Transforms/LoopVectorize/if-conversion.ll b/llvm/test/Transforms/LoopVectorize/if-conversion.ll index 8a7f4a386fda1..a88a9b1466149 100644 --- a/llvm/test/Transforms/LoopVectorize/if-conversion.ll +++ b/llvm/test/Transforms/LoopVectorize/if-conversion.ll @@ -17,8 +17,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; } ;} -define i32 @function0(ptr nocapture %a, ptr nocapture %b, i32 %start, i32 %end) nounwind uwtable ssp { -; CHECK-LABEL: define i32 @function0( +define void @function0(ptr nocapture %a, ptr nocapture %b, i32 %start, i32 %end) nounwind uwtable ssp { +; CHECK-LABEL: define void @function0( ; CHECK-SAME: ptr captures(none) [[A:%.*]], ptr captures(none) [[B:%.*]], i32 [[START:%.*]], i32 [[END:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[CMP16:%.*]] = icmp slt i32 [[START]], [[END]] @@ -94,7 +94,7 @@ define i32 @function0(ptr nocapture %a, ptr nocapture %b, i32 %start, i32 %end) ; CHECK: [[FOR_END_LOOPEXIT]]: ; CHECK-NEXT: br label %[[FOR_END]] ; CHECK: [[FOR_END]]: -; CHECK-NEXT: ret i32 undef +; CHECK-NEXT: ret void ; entry: %cmp16 = icmp slt i32 %start, %end @@ -127,7 +127,7 @@ if.end: br i1 %cmp, label %for.body, label %for.end for.end: - ret i32 undef + ret void } @@ -237,6 +237,8 @@ for.end: ; preds = %for.inc, %entry ; Handle PHI with single incoming value having a full mask. ; PR34523 +; NOTE: Changing PHI inputs from undef to poison leads to change in +; behaviour of the test. Left as undef for now. define void @PR34523() { ; CHECK-LABEL: define void @PR34523() { ; CHECK-NEXT: [[BB1:.*:]] diff --git a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll index 742ee649db3a2..eea22374ade30 100644 --- a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll @@ -337,7 +337,7 @@ for.end: ; preds = %for.body ; } ; } -define i32 @multiple_uniform_stores(ptr nocapture %var1, ptr nocapture readonly %var2, i32 %itr) #0 { +define void @multiple_uniform_stores(ptr nocapture %var1, ptr nocapture readonly %var2, i32 %itr) #0 { ; CHECK-LABEL: @multiple_uniform_stores( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP20:%.*]] = icmp eq i32 [[ITR:%.*]], 0 @@ -429,7 +429,7 @@ define i32 @multiple_uniform_stores(ptr nocapture %var1, ptr nocapture readonly ; CHECK: for.end10.loopexit: ; CHECK-NEXT: br label [[FOR_END10]] ; CHECK: for.end10: -; CHECK-NEXT: ret i32 undef +; CHECK-NEXT: ret void ; entry: %cmp20 = icmp eq i32 %itr, 0 @@ -469,12 +469,12 @@ for.inc8: ; preds = %for.body3, %for.con br i1 %exitcond26, label %for.end10, label %for.cond1.preheader for.end10: ; preds = %for.inc8, %entry - ret i32 undef + ret void } ; second uniform store to the same address is conditional. ; we do not vectorize this. -define i32 @multiple_uniform_stores_conditional(ptr nocapture %var1, ptr nocapture readonly %var2, i32 %itr) #0 { +define void @multiple_uniform_stores_conditional(ptr nocapture %var1, ptr nocapture readonly %var2, i32 %itr) #0 { ; CHECK-LABEL: @multiple_uniform_stores_conditional( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP20:%.*]] = icmp eq i32 [[ITR:%.*]], 0 @@ -520,7 +520,7 @@ define i32 @multiple_uniform_stores_conditional(ptr nocapture %var1, ptr nocaptu ; CHECK: for.end10.loopexit: ; CHECK-NEXT: br label [[FOR_END10]] ; CHECK: for.end10: -; CHECK-NEXT: ret i32 undef +; CHECK-NEXT: ret void ; entry: %cmp20 = icmp eq i32 %itr, 0 @@ -567,7 +567,7 @@ for.inc8: ; preds = %for.body3, %for.con br i1 %exitcond26, label %for.end10, label %for.cond1.preheader for.end10: ; preds = %for.inc8, %entry - ret i32 undef + ret void } ; cannot vectorize loop with unsafe dependency between uniform load (%i10) and store diff --git a/llvm/test/Transforms/LoopVectorize/memdep.ll b/llvm/test/Transforms/LoopVectorize/memdep.ll index b891b4312f18d..d9d9eec9b7d33 100644 --- a/llvm/test/Transforms/LoopVectorize/memdep.ll +++ b/llvm/test/Transforms/LoopVectorize/memdep.ll @@ -132,7 +132,7 @@ for.end: ; CHECK-LABEL: @f6 ; CHECK-NOT: <2 x i32> -define i32 @f6(ptr %a, i32 %tmp) { +define void @f6(ptr %a, i32 %tmp) { entry: br label %for.body @@ -149,7 +149,7 @@ for.body: br i1 %exitcond, label %for.body, label %for.end for.end: - ret i32 undef + ret void } ; Don't vectorize true loop carried dependencies that are not a multiple of the diff --git a/llvm/test/Transforms/LoopVectorize/partial-lcssa.ll b/llvm/test/Transforms/LoopVectorize/partial-lcssa.ll index d700d484c56bc..f5e480c910c33 100644 --- a/llvm/test/Transforms/LoopVectorize/partial-lcssa.ll +++ b/llvm/test/Transforms/LoopVectorize/partial-lcssa.ll @@ -10,7 +10,7 @@ ; CHECK: store i64 %indvars.outer, ptr %O2, align 4 -define i64 @foo(ptr nocapture %A, ptr nocapture %B, i64 %n, i64 %m, ptr %O1, ptr %O2) { +define void @foo(ptr nocapture %A, ptr nocapture %B, i64 %n, i64 %m, ptr %O1, ptr %O2) { entry: %cmp = icmp sgt i64 %n, 0 br i1 %cmp, label %for.body.outer.preheader, label %for.end.outer @@ -50,5 +50,5 @@ for.end.outer.loopexit: ; preds = %for.end.inner br label %for.end.outer for.end.outer: ; preds = %for.end.outer.loopexit, %entry - ret i64 undef + ret void } diff --git a/llvm/test/Transforms/LoopVectorize/pr28541.ll b/llvm/test/Transforms/LoopVectorize/pr28541.ll index ad7f6e7b16b16..0a9c8c1504055 100644 --- a/llvm/test/Transforms/LoopVectorize/pr28541.ll +++ b/llvm/test/Transforms/LoopVectorize/pr28541.ll @@ -28,7 +28,7 @@ ; CHECK-NOT: vectorized loop ; CHECK-LABEL: fn1 -define i32 @fn1() { +define void @fn1() { entry: %tmp2 = load i32, ptr @b, align 4 %dec3 = add nsw i32 %tmp2, -1 @@ -67,5 +67,5 @@ while.cond.while.end_crit_edge: ; preds = %while.cond br label %while.end while.end: ; preds = %while.cond.while.end_crit_edge, %entry - ret i32 undef + ret void } diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check.ll b/llvm/test/Transforms/LoopVectorize/runtime-check.ll index f87be5a115044..6ea227f7492ee 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-check.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check.ll @@ -10,7 +10,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; a[i] = b[i] * 3; ; } -define i32 @foo(ptr nocapture %a, ptr nocapture %b, i32 %n) nounwind uwtable ssp { +define void @foo(ptr nocapture %a, ptr nocapture %b, i32 %n) nounwind uwtable ssp { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[B2:%.*]] = ptrtoint ptr [[B:%.*]] to i64, !dbg [[DBG4:![0-9]+]] @@ -58,7 +58,7 @@ define i32 @foo(ptr nocapture %a, ptr nocapture %b, i32 %n) nounwind uwtable ssp ; CHECK: for.end.loopexit: ; CHECK-NEXT: br label [[FOR_END]], !dbg [[DBG14:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: ret i32 undef, !dbg [[DBG14]] +; CHECK-NEXT: ret void, !dbg [[DBG14]] ; ; FORCED_OPTSIZE-LABEL: @foo( ; FORCED_OPTSIZE-NEXT: entry: @@ -80,7 +80,7 @@ define i32 @foo(ptr nocapture %a, ptr nocapture %b, i32 %n) nounwind uwtable ssp ; FORCED_OPTSIZE: for.end.loopexit: ; FORCED_OPTSIZE-NEXT: br label [[FOR_END]], !dbg [[DBG10:![0-9]+]] ; FORCED_OPTSIZE: for.end: -; FORCED_OPTSIZE-NEXT: ret i32 undef, !dbg [[DBG10]] +; FORCED_OPTSIZE-NEXT: ret void, !dbg [[DBG10]] ; entry: %cmp6 = icmp sgt i32 %n, 0, !dbg !6 @@ -99,7 +99,7 @@ for.body: ; preds = %entry, %for.body br i1 %exitcond, label %for.end, label %for.body, !dbg !7 for.end: ; preds = %for.body, %entry - ret i32 undef, !dbg !8 + ret void, !dbg !8 } ; Make sure that we try to vectorize loops with a runtime check if the @@ -505,11 +505,11 @@ define void @test_scev_check_mul_add_expansion(ptr %out, ptr %in, i32 %len, i32 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[INDEX]], 6 ; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[OFFSET_IDX]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[OUT]], i64 [[TMP6]] -; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr [[TMP7]], align 2, !alias.scope [[META37:![0-9]+]], !noalias [[META40:![0-9]+]] -; CHECK-NEXT: store i32 0, ptr [[IN]], align 4, !alias.scope [[META40]] +; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr [[TMP7]], align 2, !alias.scope [[META36:![0-9]+]], !noalias [[META39:![0-9]+]] +; CHECK-NEXT: store i32 0, ptr [[IN]], align 4, !alias.scope [[META39]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -524,7 +524,7 @@ define void @test_scev_check_mul_add_expansion(ptr %out, ptr %in, i32 %len, i32 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 ; CHECK-NEXT: store i32 0, ptr [[IN]], align 4 ; CHECK-NEXT: [[CMP7_NOT:%.*]] = icmp sgt i32 [[LEN]], [[IV_NEXT]] -; CHECK-NEXT: br i1 [[CMP7_NOT]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP43:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP7_NOT]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP42:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/write-only.ll b/llvm/test/Transforms/LoopVectorize/write-only.ll index cc21b94bc3070..8df71e8394874 100644 --- a/llvm/test/Transforms/LoopVectorize/write-only.ll +++ b/llvm/test/Transforms/LoopVectorize/write-only.ll @@ -4,8 +4,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ;CHECK-LABEL: @read_mod_write_single_ptr( ;CHECK: load <4 x float> -;CHECK: ret i32 -define i32 @read_mod_write_single_ptr(ptr nocapture %a, i32 %n) nounwind uwtable ssp { +;CHECK: ret void +define void @read_mod_write_single_ptr(ptr nocapture %a, i32 %n) nounwind uwtable ssp { %1 = icmp sgt i32 %n, 0 br i1 %1, label %.lr.ph, label %._crit_edge @@ -21,14 +21,14 @@ define i32 @read_mod_write_single_ptr(ptr nocapture %a, i32 %n) nounwind uwtable br i1 %exitcond, label %._crit_edge, label %.lr.ph ._crit_edge: ; preds = %.lr.ph, %0 - ret i32 undef + ret void } ; Ensure that volatile stores are not vectorized. ; CHECK-LABEL: @read_mod_write_single_ptr_volatile_store( ; CHECK-NOT: store <4 x float> -; CHECK: ret i32 -define i32 @read_mod_write_single_ptr_volatile_store(ptr nocapture %a, i32 %n) nounwind uwtable ssp { +; CHECK: ret void +define void @read_mod_write_single_ptr_volatile_store(ptr nocapture %a, i32 %n) nounwind uwtable ssp { %1 = icmp sgt i32 %n, 0 br i1 %1, label %.lr.ph, label %._crit_edge @@ -44,5 +44,5 @@ define i32 @read_mod_write_single_ptr_volatile_store(ptr nocapture %a, i32 %n) n br i1 %exitcond, label %._crit_edge, label %.lr.ph ._crit_edge: ; preds = %.lr.ph, %0 - ret i32 undef + ret void }