diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 6f00d5034fbd5..907bb7875dc80 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -1511,12 +1511,13 @@ llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, if (isNoWrapAddRec(Ptr, AR, PSE, Lp)) return Stride; - // An nusw getelementptr that is a AddRec with a unit stride - // cannot wrap per definition. If it did, the result would be poison - // and any memory access dependent on it would be immediate UB - // when executed. + // An nusw getelementptr that is an AddRec cannot wrap. If it would wrap, + // the distance between the previously accessed location and the wrapped + // location will be larger than half the pointer index type space. In that + // case, the GEP would be poison and any memory access dependent on it would + // be immediate UB when executed. if (auto *GEP = dyn_cast(Ptr); - GEP && GEP->hasNoUnsignedSignedWrap() && (Stride == 1 || Stride == -1)) + GEP && GEP->hasNoUnsignedSignedWrap()) return Stride; // If the null pointer is undefined, then a access sequence which would diff --git a/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll b/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll index 81d8b01fe7fb7..0bdcc35790148 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll @@ -30,7 +30,6 @@ define void @backdep_type_size_equivalence(ptr nocapture %vec, i64 %n) { ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: -; CHECK-NEXT: {(4 + (8 * %n) + %vec),+,8}<%loop> Added Flags: ; CHECK-EMPTY: ; CHECK-NEXT: Expressions re-written: ; @@ -157,7 +156,6 @@ define void @neg_dist_dep_type_size_equivalence(ptr nocapture %vec, i64 %n) { ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: -; CHECK-NEXT: {((8 * %n) + %vec),+,8}<%loop> Added Flags: ; CHECK-EMPTY: ; CHECK-NEXT: Expressions re-written: ; diff --git a/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-symbolic-max-backedge-taken-count-may-wrap.ll b/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-symbolic-max-backedge-taken-count-may-wrap.ll index b9951c7ed02ec..dd06cab26d095 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-symbolic-max-backedge-taken-count-may-wrap.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-symbolic-max-backedge-taken-count-may-wrap.ll @@ -105,7 +105,6 @@ define i32 @check_no_dep_via_bounds_compare_symbolic_max_btc_neg_1(ptr %P, i32 % ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: -; CHECK-NEXT: {(8 + (8 * %y) + %P),+,8}<%loop> Added Flags: ; CHECK-EMPTY: ; CHECK-NEXT: Expressions re-written: ; diff --git a/llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll b/llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll index c110aa6a69659..9da3d8f3d2802 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll @@ -243,7 +243,7 @@ for.end: ; preds = %for.body ; LAA: Memory dependences are safe{{$}} ; LAA: SCEV assumptions: ; LAA-NEXT: {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> Added Flags: -; LAA-NEXT: {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64)) + %a),+,-4}<%for.body> Added Flags: +; LAA-EMPTY: ; LAA: [PSE] %arrayidxA = getelementptr inbounds i16, ptr %a, i32 %mul: ; LAA-NEXT: ((2 * (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64)) + %a) diff --git a/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll b/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll index 4e75699c91773..98b89abfeafda 100644 --- a/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll +++ b/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll @@ -11,15 +11,7 @@ define void @f(ptr noalias %a, ptr noalias %b, ptr noalias %c, ptr noalias %d, p ; CHECK: for.body.lver.check: ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 -; CHECK-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]]) -; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 0, [[MUL_RESULT]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[MUL_RESULT]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp ult ptr [[TMP3]], [[A]] -; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP4]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP1]], [[TMP5]] -; CHECK-NEXT: br i1 [[TMP6]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]] +; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]] ; CHECK: for.body.ph.lver.orig: ; CHECK-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] ; CHECK: for.body.lver.orig: @@ -27,7 +19,7 @@ define void @f(ptr noalias %a, ptr noalias %b, ptr noalias %c, ptr noalias %d, p ; CHECK-NEXT: [[IND1_LVER_ORIG:%.*]] = phi i32 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[INC1_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ] ; CHECK-NEXT: [[MUL_LVER_ORIG:%.*]] = mul i32 [[IND1_LVER_ORIG]], 2 ; CHECK-NEXT: [[MUL_EXT_LVER_ORIG:%.*]] = zext i32 [[MUL_LVER_ORIG]] to i64 -; CHECK-NEXT: [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[MUL_EXT_LVER_ORIG]] +; CHECK-NEXT: [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[MUL_EXT_LVER_ORIG]] ; CHECK-NEXT: [[LOADA_LVER_ORIG:%.*]] = load i32, ptr [[ARRAYIDXA_LVER_ORIG]], align 4 ; CHECK-NEXT: [[ARRAYIDXB_LVER_ORIG:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[MUL_EXT_LVER_ORIG]] ; CHECK-NEXT: [[LOADB_LVER_ORIG:%.*]] = load i32, ptr [[ARRAYIDXB_LVER_ORIG]], align 4 @@ -53,14 +45,14 @@ define void @f(ptr noalias %a, ptr noalias %b, ptr noalias %c, ptr noalias %d, p ; CHECK-NEXT: [[MUL_LDIST1:%.*]] = mul i32 [[IND1_LDIST1]], 2 ; CHECK-NEXT: [[MUL_EXT_LDIST1:%.*]] = zext i32 [[MUL_LDIST1]] to i64 ; CHECK-NEXT: [[ARRAYIDXA_LDIST1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[MUL_EXT_LDIST1]] -; CHECK-NEXT: [[LOADA_LDIST1:%.*]] = load i32, ptr [[ARRAYIDXA_LDIST1]], align 4, !alias.scope !0 +; CHECK-NEXT: [[LOADA_LDIST1:%.*]] = load i32, ptr [[ARRAYIDXA_LDIST1]], align 4, !alias.scope [[META0:![0-9]+]] ; CHECK-NEXT: [[ARRAYIDXB_LDIST1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[MUL_EXT_LDIST1]] ; CHECK-NEXT: [[LOADB_LDIST1:%.*]] = load i32, ptr [[ARRAYIDXB_LDIST1]], align 4 ; CHECK-NEXT: [[MULA_LDIST1:%.*]] = mul i32 [[LOADB_LDIST1]], [[LOADA_LDIST1]] ; CHECK-NEXT: [[ADD_LDIST1]] = add nuw nsw i64 [[IND_LDIST1]], 1 ; CHECK-NEXT: [[INC1_LDIST1]] = add i32 [[IND1_LDIST1]], 1 ; CHECK-NEXT: [[ARRAYIDXA_PLUS_4_LDIST1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[ADD_LDIST1]] -; CHECK-NEXT: store i32 [[MULA_LDIST1]], ptr [[ARRAYIDXA_PLUS_4_LDIST1]], align 4, !alias.scope !3 +; CHECK-NEXT: store i32 [[MULA_LDIST1]], ptr [[ARRAYIDXA_PLUS_4_LDIST1]], align 4, !alias.scope [[META3:![0-9]+]] ; CHECK-NEXT: [[EXITCOND_LDIST1:%.*]] = icmp eq i64 [[ADD_LDIST1]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND_LDIST1]], label [[FOR_BODY_PH:%.*]], label [[FOR_BODY_LDIST1]] ; CHECK: for.body.ph: @@ -83,7 +75,7 @@ define void @f(ptr noalias %a, ptr noalias %b, ptr noalias %c, ptr noalias %d, p ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT2:%.*]], label [[FOR_BODY]] ; CHECK: for.end.loopexit: ; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: for.end.loopexit2: +; CHECK: for.end.loopexit1: ; CHECK-NEXT: br label [[FOR_END]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -144,15 +136,7 @@ define void @f_with_offset(ptr noalias %b, ptr noalias %c, ptr noalias %d, ptr n ; CHECK: for.body.lver.check: ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 -; CHECK-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]]) -; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 0, [[MUL_RESULT]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[A]], i64 [[MUL_RESULT]] -; CHECK-NEXT: [[TMP4:%.*]] = icmp ult ptr [[TMP3]], [[A]] -; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP4]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP1]], [[TMP5]] -; CHECK-NEXT: br i1 [[TMP6]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]] +; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]] ; CHECK: for.body.ph.lver.orig: ; CHECK-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] ; CHECK: for.body.lver.orig: @@ -186,14 +170,14 @@ define void @f_with_offset(ptr noalias %b, ptr noalias %c, ptr noalias %d, ptr n ; CHECK-NEXT: [[MUL_LDIST1:%.*]] = mul i32 [[IND1_LDIST1]], 2 ; CHECK-NEXT: [[MUL_EXT_LDIST1:%.*]] = zext i32 [[MUL_LDIST1]] to i64 ; CHECK-NEXT: [[ARRAYIDXA_LDIST1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[MUL_EXT_LDIST1]] -; CHECK-NEXT: [[LOADA_LDIST1:%.*]] = load i32, ptr [[ARRAYIDXA_LDIST1]], align 4, !alias.scope !5 +; CHECK-NEXT: [[LOADA_LDIST1:%.*]] = load i32, ptr [[ARRAYIDXA_LDIST1]], align 4, !alias.scope [[META5:![0-9]+]] ; CHECK-NEXT: [[ARRAYIDXB_LDIST1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[MUL_EXT_LDIST1]] ; CHECK-NEXT: [[LOADB_LDIST1:%.*]] = load i32, ptr [[ARRAYIDXB_LDIST1]], align 4 ; CHECK-NEXT: [[MULA_LDIST1:%.*]] = mul i32 [[LOADB_LDIST1]], [[LOADA_LDIST1]] ; CHECK-NEXT: [[ADD_LDIST1]] = add nuw nsw i64 [[IND_LDIST1]], 1 ; CHECK-NEXT: [[INC1_LDIST1]] = add i32 [[IND1_LDIST1]], 1 ; CHECK-NEXT: [[ARRAYIDXA_PLUS_4_LDIST1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[ADD_LDIST1]] -; CHECK-NEXT: store i32 [[MULA_LDIST1]], ptr [[ARRAYIDXA_PLUS_4_LDIST1]], align 4, !alias.scope !8 +; CHECK-NEXT: store i32 [[MULA_LDIST1]], ptr [[ARRAYIDXA_PLUS_4_LDIST1]], align 4, !alias.scope [[META8:![0-9]+]] ; CHECK-NEXT: [[EXITCOND_LDIST1:%.*]] = icmp eq i64 [[ADD_LDIST1]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND_LDIST1]], label [[FOR_BODY_PH:%.*]], label [[FOR_BODY_LDIST1]] ; CHECK: for.body.ph: @@ -216,7 +200,7 @@ define void @f_with_offset(ptr noalias %b, ptr noalias %c, ptr noalias %d, ptr n ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT2:%.*]], label [[FOR_BODY]] ; CHECK: for.end.loopexit: ; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: for.end.loopexit2: +; CHECK: for.end.loopexit1: ; CHECK-NEXT: br label [[FOR_END]] ; CHECK: for.end: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter-cost.ll index 3fc1a15cbd9b2..5f13c8e9ac22e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter-cost.ll @@ -95,7 +95,7 @@ for.cond.cleanup: ; preds = %for.cond.cleanup.lo } ; CHECK: LV: Checking a loop in 'gather_nxv4i32_stride2' -; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: %0 = load float, ptr %arrayidx, align 4 +; CHECK: LV: Found an estimated cost of 2 for VF vscale x 4 For instruction: %0 = load float, ptr %arrayidx, align 4 define void @gather_nxv4i32_stride2(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i64 %n) #0 { entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll index bc51caa63a20a..592b118f53207 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll @@ -279,36 +279,39 @@ for.cond.cleanup: ; preds = %for.inc, %entry ret void } - - define void @gather_nxv4i32_ind64_stride2(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) #0 { ; CHECK-LABEL: @gather_nxv4i32_ind64_stride2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ugt i64 [[N:%.*]], [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -8 +; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP2]], 3 +; CHECK-NEXT: [[DOTNEG:%.*]] = add nsw i64 [[TMP7]], -1 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[N_VEC]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP7]], i64 [[N_VEC]] +; CHECK-NEXT: [[N_VEC1:%.*]] = sub i64 [[N]], [[TMP6]] ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP3]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 3 -; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP7]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP5]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP8:%.*]] = shl [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP9:%.*]] = shl [[STEP_ADD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], [[TMP8]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[B]], [[TMP9]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4f32.nxv4p0( [[TMP10]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv4f32.nxv4p0( [[TMP11]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[DOTIDX1:%.*]] = shl i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 [[DOTIDX1]] +; CHECK-NEXT: [[DOTIDX3:%.*]] = shl nuw nsw i64 [[TMP9]], 5 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[B]], i64 [[DOTIDX3]] +; CHECK-NEXT: [[DOTIDX4:%.*]] = shl i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[TMP11]], i64 [[DOTIDX4]] +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP10]], align 4 +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv8f32( [[WIDE_VEC]]) +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 +; CHECK-NEXT: [[WIDE_VEC1:%.*]] = load , ptr [[TMP15]], align 4 +; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = call { , } @llvm.vector.deinterleave2.nxv8f32( [[WIDE_VEC1]]) +; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = extractvalue { , } [[STRIDED_VEC2]], 0 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[DOTIDX:%.*]] = shl nuw nsw i64 [[TMP13]], 4 @@ -316,14 +319,12 @@ define void @gather_nxv4i32_ind64_stride2(ptr noalias nocapture %a, ptr noalias ; CHECK-NEXT: store [[WIDE_MASKED_GATHER]], ptr [[TMP12]], align 4 ; CHECK-NEXT: store [[WIDE_MASKED_GATHER2]], ptr [[TMP14]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC1]] +; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] @@ -334,7 +335,7 @@ define void @gather_nxv4i32_ind64_stride2(ptr noalias nocapture %a, ptr noalias ; CHECK-NEXT: store float [[TMP16]], ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll b/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll index 5aac001a8b9cf..fa69fefba515f 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll @@ -19,26 +19,8 @@ define ptr addrspace(10) @japi1_vect_42283(ptr nocapture readonly %0, i32 %1) lo ; CHECK-NEXT: [[DOTELT1:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(10) [[TMP5]], i64 0, i32 1 ; CHECK-NEXT: [[DOTUNPACK2:%.*]] = load i64, ptr addrspace(10) [[DOTELT1]], align 8, !tbaa [[TBAA8]] ; CHECK-NEXT: [[TMP8:%.*]] = add nsw i64 [[TMP2]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP8]], 60 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; CHECK: vector.scevcheck: -; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[TMP2]]) -; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 -; CHECK-NEXT: [[TMP9:%.*]] = sub i64 0, [[MUL_RESULT]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(13) [[TMP7]], i64 [[MUL_RESULT]] -; CHECK-NEXT: [[TMP11:%.*]] = icmp ult ptr addrspace(13) [[TMP10]], [[TMP7]] -; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP11]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr addrspace(13) [[TMP7]], i64 8 -; CHECK-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[TMP2]]) -; CHECK-NEXT: [[MUL_RESULT2:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW3:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1 -; CHECK-NEXT: [[TMP13:%.*]] = sub i64 0, [[MUL_RESULT2]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr addrspace(13) [[SCEVGEP]], i64 [[MUL_RESULT2]] -; CHECK-NEXT: [[TMP15:%.*]] = icmp ult ptr addrspace(13) [[TMP14]], [[SCEVGEP]] -; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW3]] -; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP12]], [[TMP16]] -; CHECK-NEXT: br i1 [[TMP17]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp ult i64 [[TMP8]], 16 +; CHECK-NEXT: br i1 [[TMP17]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP8]], 16 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP8]], [[N_MOD_VF]] @@ -77,7 +59,7 @@ define ptr addrspace(10) @japi1_vect_42283(ptr nocapture readonly %0, i32 %1) lo ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP8]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[L44:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[TOP:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[TOP:%.*]] ] ; CHECK-NEXT: br label [[L26:%.*]] ; CHECK: L26: ; CHECK-NEXT: [[VALUE_PHI5:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[TMP27:%.*]], [[L26]] ] diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-2.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-2.ll index aebbcd526fc43..435da2c17d152 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-2.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-2.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -S -passes=loop-vectorize,instcombine -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses=true < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -11,10 +12,6 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; runtime checks are needed, but with Assume=false getPtrStride cannot add ; runtime checks and as a result we can't create the interleave-group. ; -; FIXME: This is currently a missed optimization until we can use Assume=true -; with proper threshold checks. Once we do that the candidate interleave-group -; will not be invalidated by the wrapping checks. - ; #include ; void test(ptr __restrict__ out, ptr __restrict__ in, size_t size) ; { @@ -25,11 +22,52 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; } ; } -; CHECK: vector.body: -; CHECK-NOT: %wide.vec = load <8 x i32>, ptr {{.*}}, align 4 -; CHECK-NOT: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> - define void @_Z4testPfS_m(ptr noalias nocapture %out, ptr noalias nocapture readonly %in, i64 %size) local_unnamed_addr { +; CHECK-LABEL: define void @_Z4testPfS_m( +; CHECK-SAME: ptr noalias nocapture [[OUT:%.*]], ptr noalias nocapture readonly [[IN:%.*]], i64 [[SIZE:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP7:%.*]] = icmp eq i64 [[SIZE]], 0 +; CHECK-NEXT: br i1 [[CMP7]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]] +; CHECK: [[FOR_BODY_PREHEADER]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SIZE]], 5 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[SIZE]], 3 +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i64 4, i64 [[N_MOD_VF]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[SIZE]], [[TMP1]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[DOTIDX:%.*]] = shl i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[IN]], i64 [[DOTIDX]] +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[OUT]], i64 [[INDEX]] +; CHECK-NEXT: store <4 x i32> [[STRIDED_VEC]], ptr [[TMP3]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP_LOOPEXIT:.*]]: +; CHECK-NEXT: br label %[[FOR_COND_CLEANUP]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: ret void +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[OUT_OFFSET_08:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX_IDX:%.*]] = shl i64 [[OUT_OFFSET_08]], 3 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[IN]], i64 [[ARRAYIDX_IDX]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[OUT]], i64 [[OUT_OFFSET_08]] +; CHECK-NEXT: store i32 [[TMP5]], ptr [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[INC]] = add nuw i64 [[OUT_OFFSET_08]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[SIZE]] +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_COND_CLEANUP_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; entry: %cmp7 = icmp eq i64 %size, 0 br i1 %cmp7, label %for.cond.cleanup, label %for.body.preheader @@ -54,3 +92,9 @@ for.body: %exitcond = icmp eq i64 %inc, %size br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body } +;. +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +;. diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-3.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-3.ll index 53201e1a47870..2b5e06c8c948b 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-3.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-3.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -S -passes=loop-vectorize,instcombine -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses=true < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -12,8 +13,6 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; result cannot prove the transformation is safe and therefore invalidate the ; candidate interleave group. ; -; FIXME: This is a missed optimization. Once we use Assume=true here, we will -; not have to invalidate the group. ; void func(unsigned * __restrict a, unsigned * __restrict b, unsigned char x, unsigned char y) { ; int i = 0; @@ -22,11 +21,73 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; ; } -; CHECK: vector.body: -; CHECK-NOT: %wide.vec = load <8 x i32>, ptr {{.*}}, align 4 -; CHECK-NOT: shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> - define void @_Z4funcPjS_hh(ptr noalias nocapture readonly %a, ptr noalias nocapture %b, i8 zeroext %x, i8 zeroext %y) local_unnamed_addr { +; CHECK-LABEL: define void @_Z4funcPjS_hh( +; CHECK-SAME: ptr noalias nocapture readonly [[A:%.*]], ptr noalias nocapture [[B:%.*]], i8 zeroext [[X:%.*]], i8 zeroext [[Y:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP9:%.*]] = icmp eq i8 [[Y]], 0 +; CHECK-NEXT: br i1 [[CMP9]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]] +; CHECK: [[FOR_BODY_PREHEADER]]: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i8 [[Y]] to i64 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i8 [[Y]], 5 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] +; CHECK: [[VECTOR_SCEVCHECK]]: +; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i8 +; CHECK-NEXT: [[MUL_RESULT:%.*]] = shl i8 [[TMP1]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = xor i8 [[X]], -1 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i8 [[MUL_RESULT]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i64 [[TMP0]], 127 +; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 3 +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP8]], i64 4, i64 [[N_MOD_VF]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[WIDE_TRIP_COUNT]], [[TMP7]] +; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i8 +; CHECK-NEXT: [[TMP6:%.*]] = shl i8 [[DOTCAST]], 1 +; CHECK-NEXT: [[IND_END:%.*]] = add i8 [[X]], [[TMP6]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[DOTCAST3:%.*]] = trunc i64 [[INDEX]] to i8 +; CHECK-NEXT: [[TMP9:%.*]] = shl i8 [[DOTCAST3]], 1 +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[X]], [[TMP9]] +; CHECK-NEXT: [[TMP14:%.*]] = zext i8 [[OFFSET_IDX]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]] +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP15]], align 4 +; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = shl <4 x i32> [[TMP23]], +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: store <4 x i32> [[TMP24]], ptr [[TMP25]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i8 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[X]], %[[FOR_BODY_PREHEADER]] ], [ [[X]], %[[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP_LOOPEXIT:.*]]: +; CHECK-NEXT: br label %[[FOR_COND_CLEANUP]] +; CHECK: [[FOR_COND_CLEANUP]]: +; CHECK-NEXT: ret void +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[INDEX_011:%.*]] = phi i8 [ [[ADD:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[IDXPROM:%.*]] = zext i8 [[INDEX_011]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[MUL:%.*]] = shl i32 [[TMP27]], 1 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ADD]] = add i8 [[INDEX_011]], 2 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_COND_CLEANUP_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; entry: %cmp9 = icmp eq i8 %y, 0 br i1 %cmp9, label %for.cond.cleanup, label %for.body.preheader @@ -55,3 +116,9 @@ for.body: %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body } +;. +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]} +;. diff --git a/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll b/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll index 38b64eec17d8d..892c518b6c873 100644 --- a/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll +++ b/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll @@ -458,18 +458,7 @@ define void @f5(ptr noalias %a, ; LV-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], [[TMP1]] ; LV-NEXT: [[TMP5:%.*]] = or i1 [[TMP4]], [[MUL_OVERFLOW]] ; LV-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 -; LV-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] -; LV-NEXT: [[TMP8:%.*]] = sext i32 [[TMP1]] to i64 -; LV-NEXT: [[TMP9:%.*]] = shl nsw i64 [[TMP8]], 1 -; LV-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP9]] -; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]]) -; LV-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 -; LV-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 -; LV-NEXT: [[TMP10:%.*]] = sub i64 0, [[MUL_RESULT3]] -; LV-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[TMP10]] -; LV-NEXT: [[TMP12:%.*]] = icmp ugt ptr [[TMP11]], [[SCEVGEP]] -; LV-NEXT: [[TMP13:%.*]] = or i1 [[TMP12]], [[MUL_OVERFLOW4]] -; LV-NEXT: [[TMP14:%.*]] = or i1 [[TMP7]], [[TMP13]] +; LV-NEXT: [[TMP14:%.*]] = or i1 [[TMP5]], [[TMP6]] ; LV-NEXT: br i1 [[TMP14]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] ; LV: for.body.ph.lver.orig: ; LV-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] @@ -477,7 +466,7 @@ define void @f5(ptr noalias %a, ; LV-NEXT: [[IND_LVER_ORIG:%.*]] = phi i64 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[INC_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ] ; LV-NEXT: [[IND1_LVER_ORIG:%.*]] = phi i32 [ [[TRUNCN]], [[FOR_BODY_PH_LVER_ORIG]] ], [ [[DEC_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ] ; LV-NEXT: [[MUL_LVER_ORIG:%.*]] = mul i32 [[IND1_LVER_ORIG]], 2 -; LV-NEXT: [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr inbounds i16, ptr [[A]], i32 [[MUL_LVER_ORIG]] +; LV-NEXT: [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr inbounds i16, ptr [[A:%.*]], i32 [[MUL_LVER_ORIG]] ; LV-NEXT: [[LOADA_LVER_ORIG:%.*]] = load i16, ptr [[ARRAYIDXA_LVER_ORIG]], align 2 ; LV-NEXT: [[ARRAYIDXB_LVER_ORIG:%.*]] = getelementptr inbounds i16, ptr [[B:%.*]], i64 [[IND_LVER_ORIG]] ; LV-NEXT: [[LOADB_LVER_ORIG:%.*]] = load i16, ptr [[ARRAYIDXB_LVER_ORIG]], align 2 @@ -505,7 +494,7 @@ define void @f5(ptr noalias %a, ; LV-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT5:%.*]], label [[FOR_BODY]] ; LV: for.end.loopexit: ; LV-NEXT: br label [[FOR_END:%.*]] -; LV: for.end.loopexit5: +; LV: for.end.loopexit2: ; LV-NEXT: br label [[FOR_END]] ; LV: for.end: ; LV-NEXT: ret void