diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 0c4e5bb3d4721..969d225c6ef2e 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -1491,10 +1491,51 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() { SmallVector Predicates; for (Instruction &I : *BB) { LoadInst *LI = dyn_cast(&I); + + // Make sure we can execute all computations feeding into Ptr in the loop + // w/o triggering UB and that none of the out-of-loop operands are poison. + // We do not need to check if operations inside the loop can produce + // poison due to flags (e.g. due to an inbounds GEP going out of bounds), + // because flags will be dropped when executing them unconditionally. + // TODO: Results could be improved by considering poison-propagation + // properties of visited ops. + auto CanSpeculatePointerOp = [this](Value *Ptr) { + SmallVector Worklist = {Ptr}; + SmallPtrSet Visited; + while (!Worklist.empty()) { + Value *CurrV = Worklist.pop_back_val(); + if (!Visited.insert(CurrV).second) + continue; + + auto *CurrI = dyn_cast(CurrV); + if (!CurrI || !TheLoop->contains(CurrI)) { + // If operands from outside the loop may be poison then Ptr may also + // be poison. + if (!isGuaranteedNotToBePoison(CurrV, AC, + TheLoop->getLoopPredecessor() + ->getTerminator() + ->getIterator())) + return false; + continue; + } + + // A loaded value may be poison, independent of any flags. + if (isa(CurrI) && !isGuaranteedNotToBePoison(CurrV, AC)) + return false; + + // For other ops, assume poison can only be introduced via flags, + // which can be dropped. + if (!isa(CurrI) && !isSafeToSpeculativelyExecute(CurrI)) + return false; + append_range(Worklist, CurrI->operands()); + } + return true; + }; // Pass the Predicates pointer to isDereferenceableAndAlignedInLoop so // that it will consider loops that need guarding by SCEV checks. The // vectoriser will generate these checks if we decide to vectorise. if (LI && !LI->getType()->isVectorTy() && !mustSuppressSpeculation(*LI) && + CanSpeculatePointerOp(LI->getPointerOperand()) && isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT, AC, &Predicates)) SafePointers.insert(LI->getPointerOperand()); diff --git a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll index 53fd2ed43972c..ff9cf682b6e9b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll @@ -666,34 +666,54 @@ define void @pr70590_recipe_without_underlying_instr(i64 %n, ptr noalias %dst) { ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_SREM_CONTINUE6:.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_SREM_CONTINUE6]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6:.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 -; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_SREM_IF:.*]], label %[[PRED_SREM_CONTINUE:.*]] -; CHECK: [[PRED_SREM_IF]]: -; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE]] -; CHECK: [[PRED_SREM_CONTINUE]]: +; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] +; CHECK: [[PRED_LOAD_IF]]: +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], poison +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP23]], align 1 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i8> poison, i8 [[TMP6]], i32 0 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] +; CHECK: [[PRED_LOAD_CONTINUE]]: +; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i8> [ poison, %[[VECTOR_BODY]] ], [ [[TMP24]], %[[PRED_LOAD_IF]] ] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 -; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_SREM_IF1:.*]], label %[[PRED_SREM_CONTINUE2:.*]] -; CHECK: [[PRED_SREM_IF1]]: -; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE2]] -; CHECK: [[PRED_SREM_CONTINUE2]]: +; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2:.*]] +; CHECK: [[PRED_LOAD_IF1]]: +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], poison +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP11]] +; CHECK-NEXT: [[TMP26:%.*]] = load i8, ptr [[TMP25]], align 1 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i8> [[TMP8]], i8 [[TMP26]], i32 1 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] +; CHECK: [[PRED_LOAD_CONTINUE2]]: +; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i8> [ [[TMP8]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 -; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_SREM_IF3:.*]], label %[[PRED_SREM_CONTINUE4:.*]] -; CHECK: [[PRED_SREM_IF3]]: -; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE4]] -; CHECK: [[PRED_SREM_CONTINUE4]]: +; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]] +; CHECK: [[PRED_LOAD_IF3]]: +; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP17]], poison +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = load i8, ptr [[TMP19]], align 1 +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i8> [[TMP29]], i8 [[TMP20]], i32 2 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE4]] +; CHECK: [[PRED_LOAD_CONTINUE4]]: +; CHECK-NEXT: [[TMP22:%.*]] = phi <4 x i8> [ [[TMP29]], %[[PRED_LOAD_CONTINUE2]] ], [ [[TMP21]], %[[PRED_LOAD_IF3]] ] ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 -; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_SREM_IF5:.*]], label %[[PRED_SREM_CONTINUE6]] -; CHECK: [[PRED_SREM_IF5]]: -; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE6]] -; CHECK: [[PRED_SREM_CONTINUE6]]: -; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], poison +; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6]] +; CHECK: [[PRED_LOAD_IF5]]: +; CHECK-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP30]], poison ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP13]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP14]], align 1 +; CHECK-NEXT: [[TMP27:%.*]] = load i8, ptr [[TMP13]], align 1 +; CHECK-NEXT: [[TMP28:%.*]] = insertelement <4 x i8> [[TMP22]], i8 [[TMP27]], i32 3 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE6]] +; CHECK: [[PRED_LOAD_CONTINUE6]]: +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = phi <4 x i8> [ [[TMP22]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP28]], %[[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i8> zeroinitializer, <4 x i8> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[TMP15]], i32 0 @@ -743,34 +763,54 @@ define void @recipe_without_underlying_instr_lanes_used(i64 %n, ptr noalias %dst ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_SREM_CONTINUE6:.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_SREM_CONTINUE6]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6:.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 -; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_SREM_IF:.*]], label %[[PRED_SREM_CONTINUE:.*]] -; CHECK: [[PRED_SREM_IF]]: -; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE]] -; CHECK: [[PRED_SREM_CONTINUE]]: +; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] +; CHECK: [[PRED_LOAD_IF]]: +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[TMP9]], poison +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP16]] +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP23]], align 1 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i8> poison, i8 [[TMP6]], i32 0 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] +; CHECK: [[PRED_LOAD_CONTINUE]]: +; CHECK-NEXT: [[TMP25:%.*]] = phi <4 x i8> [ poison, %[[VECTOR_BODY]] ], [ [[TMP24]], %[[PRED_LOAD_IF]] ] ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 -; CHECK-NEXT: br i1 [[TMP3]], label %[[PRED_SREM_IF1:.*]], label %[[PRED_SREM_CONTINUE2:.*]] -; CHECK: [[PRED_SREM_IF1]]: -; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE2]] -; CHECK: [[PRED_SREM_CONTINUE2]]: +; CHECK-NEXT: br i1 [[TMP3]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2:.*]] +; CHECK: [[PRED_LOAD_IF1]]: +; CHECK-NEXT: [[TMP26:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP29:%.*]] = add i64 [[TMP26]], poison +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP29]] +; CHECK-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP30]], align 1 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i8> [[TMP25]], i8 [[TMP13]], i32 1 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] +; CHECK: [[PRED_LOAD_CONTINUE2]]: +; CHECK-NEXT: [[TMP15:%.*]] = phi <4 x i8> [ [[TMP25]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 -; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_SREM_IF3:.*]], label %[[PRED_SREM_CONTINUE4:.*]] -; CHECK: [[PRED_SREM_IF3]]: -; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE4]] -; CHECK: [[PRED_SREM_CONTINUE4]]: +; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]] +; CHECK: [[PRED_LOAD_IF3]]: +; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP17]], poison +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = load i8, ptr [[TMP19]], align 1 +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i8> [[TMP15]], i8 [[TMP20]], i32 2 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE4]] +; CHECK: [[PRED_LOAD_CONTINUE4]]: +; CHECK-NEXT: [[TMP22:%.*]] = phi <4 x i8> [ [[TMP15]], %[[PRED_LOAD_CONTINUE2]] ], [ [[TMP21]], %[[PRED_LOAD_IF3]] ] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 -; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_SREM_IF5:.*]], label %[[PRED_SREM_CONTINUE6]] -; CHECK: [[PRED_SREM_IF5]]: -; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE6]] -; CHECK: [[PRED_SREM_CONTINUE6]]: -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], poison +; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6]] +; CHECK: [[PRED_LOAD_IF5]]: +; CHECK-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP31]], poison ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP9]], align 1 +; CHECK-NEXT: [[TMP27:%.*]] = load i8, ptr [[TMP8]], align 1 +; CHECK-NEXT: [[TMP28:%.*]] = insertelement <4 x i8> [[TMP22]], i8 [[TMP27]], i32 3 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE6]] +; CHECK: [[PRED_LOAD_CONTINUE6]]: +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = phi <4 x i8> [ [[TMP22]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP28]], %[[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i8> zeroinitializer, <4 x i8> [[WIDE_LOAD]] ; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> zeroinitializer, <4 x i64> poison ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[PREDPHI7]], i32 3 diff --git a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll index dfae2d3f41d48..7a54519c7cdf8 100644 --- a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll +++ b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll @@ -3,9 +3,9 @@ declare void @llvm.assume(i1) -define void @deref_assumption_in_header_constant_trip_count(ptr noalias %a, ptr noalias %b, ptr noalias %c) nofree nosync{ +define void @deref_assumption_in_header_constant_trip_count(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c) nofree nosync{ ; CHECK-LABEL: define void @deref_assumption_in_header_constant_trip_count( -; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -103,9 +103,9 @@ exit: ret void } -define void @align_deref_assumption_in_header_constant_trip_count_loop_invariant_ptr(ptr noalias %a, ptr noalias %b, ptr noalias %c) nofree nosync{ +define void @align_deref_assumption_in_header_constant_trip_count_loop_invariant_ptr(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c) nofree nosync{ ; CHECK-LABEL: define void @align_deref_assumption_in_header_constant_trip_count_loop_invariant_ptr( -; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1]] { +; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 4) ] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] @@ -179,9 +179,9 @@ exit: ret void } -define void @deref_assumption_too_small_in_header_constant_trip_count(ptr noalias %a, ptr noalias %b, ptr noalias %c) nofree nosync{ +define void @deref_assumption_too_small_in_header_constant_trip_count(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c) nofree nosync{ ; CHECK-LABEL: define void @deref_assumption_too_small_in_header_constant_trip_count( -; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1]] { +; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -279,9 +279,9 @@ exit: ret void } -define void @deref_assumption_in_header_constant_trip_count_align_1(ptr noalias %a, ptr noalias %b, ptr noalias %c) nofree nosync{ +define void @deref_assumption_in_header_constant_trip_count_align_1(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c) nofree nosync{ ; CHECK-LABEL: define void @deref_assumption_in_header_constant_trip_count_align_1( -; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1]] { +; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -479,9 +479,9 @@ exit: ret void } -define void @deref_assumption_in_header_constant_trip_count_align_not_known(ptr noalias %a, ptr noalias %b, ptr noalias %c) nofree nosync{ +define void @deref_assumption_in_header_constant_trip_count_align_not_known(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c) nofree nosync{ ; CHECK-LABEL: define void @deref_assumption_in_header_constant_trip_count_align_not_known( -; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1]] { +; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -579,9 +579,9 @@ exit: ret void } -define void @deref_assumption_in_then_constant_trip_count(ptr noalias %a, ptr noalias %b, ptr noalias %c) nofree nosync{ +define void @deref_assumption_in_then_constant_trip_count(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c) nofree nosync{ ; CHECK-LABEL: define void @deref_assumption_in_then_constant_trip_count( -; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1]] { +; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -675,9 +675,9 @@ exit: ret void } -define void @deref_assumption_in_latch_constant_trip_count(ptr noalias %a, ptr noalias %b, ptr noalias %c) nofree nosync{ +define void @deref_assumption_in_latch_constant_trip_count(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c) nofree nosync{ ; CHECK-LABEL: define void @deref_assumption_in_latch_constant_trip_count( -; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1]] { +; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -777,9 +777,9 @@ exit: ret void } -define void @deref_assumption_in_header_variable_trip_count(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) nofree nosync{ +define void @deref_assumption_in_header_variable_trip_count(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c, i64 %N) nofree nosync{ ; CHECK-LABEL: define void @deref_assumption_in_header_variable_trip_count( -; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR1]] { +; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] @@ -881,9 +881,9 @@ exit: ret void } -define void @deref_assumption_in_preheader_constant_trip_count_align_1(ptr noalias %a, ptr noalias %b, ptr noalias %c) nofree nosync{ +define void @deref_assumption_in_preheader_constant_trip_count_align_1(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c) nofree nosync{ ; CHECK-LABEL: define void @deref_assumption_in_preheader_constant_trip_count_align_1( -; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1]] { +; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[A]], i64 4000) ] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] @@ -958,9 +958,9 @@ exit: ret void } -define void @deref_assumption_too_small_in_preheader_constant_trip_count_align_1(ptr noalias %a, ptr noalias %b, ptr noalias %c) nofree nosync{ +define void @deref_assumption_too_small_in_preheader_constant_trip_count_align_1(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c) nofree nosync{ ; CHECK-LABEL: define void @deref_assumption_too_small_in_preheader_constant_trip_count_align_1( -; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1]] { +; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[A]], i64 3999) ] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] @@ -1053,9 +1053,9 @@ exit: ret void } -define void @align_and_deref_assumption_in_preheader_constant_trip_count_align_4(ptr noalias %a, ptr noalias %b, ptr noalias %c) nofree nosync{ +define void @align_and_deref_assumption_in_preheader_constant_trip_count_align_4(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c) nofree nosync{ ; CHECK-LABEL: define void @align_and_deref_assumption_in_preheader_constant_trip_count_align_4( -; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1]] { +; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 4000) ] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] @@ -1131,9 +1131,9 @@ exit: } -define void @deref_assumption_in_preheader_constant_trip_count_align_4_known_via_argument_attr(ptr noalias align 4 %a, ptr noalias %b, ptr noalias %c) nofree nosync{ +define void @deref_assumption_in_preheader_constant_trip_count_align_4_known_via_argument_attr(ptr noalias noundef align 4 %a, ptr noalias %b, ptr noalias %c) nofree nosync{ ; CHECK-LABEL: define void @deref_assumption_in_preheader_constant_trip_count_align_4_known_via_argument_attr( -; CHECK-SAME: ptr noalias align 4 [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1]] { +; CHECK-SAME: ptr noalias noundef align 4 [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[A]], i64 4000) ] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] @@ -1145,9 +1145,9 @@ define void @deref_assumption_in_preheader_constant_trip_count_align_4_known_via ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP15:%.*]] = load <2 x i32>, ptr [[TMP5]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP15]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 @@ -1208,9 +1208,9 @@ exit: ret void } -define void @deref_assumption_in_preheader_constant_trip_count_align_4_not_known(ptr noalias %a, ptr noalias %b, ptr noalias %c) nofree nosync{ +define void @deref_assumption_in_preheader_constant_trip_count_align_4_not_known(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c) nofree nosync{ ; CHECK-LABEL: define void @deref_assumption_in_preheader_constant_trip_count_align_4_not_known( -; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1]] { +; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[A]], i64 4000) ] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] @@ -1303,9 +1303,9 @@ exit: ret void } -define void @deref_assumption_too_small_in_preheader_constant_trip_count_align_4(ptr noalias %a, ptr noalias %b, ptr noalias %c) nofree nosync{ +define void @deref_assumption_too_small_in_preheader_constant_trip_count_align_4(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c) nofree nosync{ ; CHECK-LABEL: define void @deref_assumption_too_small_in_preheader_constant_trip_count_align_4( -; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1]] { +; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[A]], i64 3999) ] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] @@ -1399,9 +1399,9 @@ exit: } ; %a may be freed between the dereferenceable assumption and accesses. -define void @may_free_align_deref_assumption_in_header_constant_trip_count_loop_invariant_ptr(ptr noalias %a, ptr noalias %b, ptr noalias %c) { +define void @may_free_align_deref_assumption_in_header_constant_trip_count_loop_invariant_ptr(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c) { ; CHECK-LABEL: define void @may_free_align_deref_assumption_in_header_constant_trip_count_loop_invariant_ptr( -; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) { +; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 4) ] ; CHECK-NEXT: call void @may_free() diff --git a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll index 6b46197295e32..ee65dc8cdcb1d 100644 --- a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll +++ b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll @@ -4,9 +4,9 @@ declare void @llvm.assume(i1) ; %a is known dereferenceable via assume for the whole loop. -define void @deref_assumption_in_preheader_non_constant_trip_count_access_i8(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) nofree nosync { +define void @deref_assumption_in_preheader_non_constant_trip_count_access_i8(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c, i64 %n) nofree nosync { ; CHECK-LABEL: define void @deref_assumption_in_preheader_non_constant_trip_count_access_i8( -; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 [[N]]) ] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2 @@ -86,9 +86,9 @@ exit: } ; %a is known dereferenceable via assume for the whole loop. -define void @deref_assumption_in_preheader_non_constant_trip_count_access_i32(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) nofree nosync { +define void @deref_assumption_in_preheader_non_constant_trip_count_access_i32(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c, i64 %n) nofree nosync { ; CHECK-LABEL: define void @deref_assumption_in_preheader_non_constant_trip_count_access_i32( -; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR1]] { +; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i64 [[N]], 4 ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 [[MUL]]) ] @@ -171,9 +171,9 @@ exit: ; %a is NOT known dereferenceable via assume for the whole loop. -define void @deref_assumption_in_preheader_too_small_non_constant_trip_count_access_i32(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) nofree nosync { +define void @deref_assumption_in_preheader_too_small_non_constant_trip_count_access_i32(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c, i64 %n) nofree nosync { ; CHECK-LABEL: define void @deref_assumption_in_preheader_too_small_non_constant_trip_count_access_i32( -; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR1]] { +; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 [[N]]) ] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2 @@ -253,9 +253,9 @@ exit: } ; %a is NOT known dereferenceable via assume for the whole loop. -define void @deref_assumption_in_preheader_too_small2_non_constant_trip_count_access_i32(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) nofree nosync { +define void @deref_assumption_in_preheader_too_small2_non_constant_trip_count_access_i32(ptr noalias noundef %a, ptr noalias %b, ptr noalias %c, i64 %n) nofree nosync { ; CHECK-LABEL: define void @deref_assumption_in_preheader_too_small2_non_constant_trip_count_access_i32( -; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR1]] { +; CHECK-SAME: ptr noalias noundef [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 4), "dereferenceable"(ptr [[A]], i64 100) ] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2 diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll index e27734755dfb2..3373c6d5cb81a 100644 --- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll +++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll @@ -7,7 +7,7 @@ target datalayout="p:16:16" ; Test cases for https://github.com/llvm/llvm-project/issues/142957 -; FIXME: Cannot speculatively execute %l, because %div may trigger UB and must be +; Cannot speculatively execute %l, because %div may trigger UB and must be ; predicated. define void @ptr_depends_on_sdiv(ptr noalias %dst, i16 noundef %off) { ; CHECK-LABEL: define void @ptr_depends_on_sdiv( @@ -26,36 +26,40 @@ define void @ptr_depends_on_sdiv(ptr noalias %dst, i16 noundef %off) { ; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_SDIV_IF:.*]], label %[[PRED_SDIV_CONTINUE:.*]] ; CHECK: [[PRED_SDIV_IF]]: ; CHECK-NEXT: [[TMP2:%.*]] = sdiv i16 24316, [[OFF]] +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i32 0 ; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE]] ; CHECK: [[PRED_SDIV_CONTINUE]]: -; CHECK-NEXT: [[TMP3:%.*]] = phi i16 [ poison, %[[VECTOR_BODY]] ], [ [[TMP2]], %[[PRED_SDIV_IF]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x i16> [ poison, %[[VECTOR_BODY]] ], [ [[TMP3]], %[[PRED_SDIV_IF]] ] ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 ; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_SDIV_IF1:.*]], label %[[PRED_SDIV_CONTINUE2:.*]] ; CHECK: [[PRED_SDIV_IF1]]: ; CHECK-NEXT: [[TMP18:%.*]] = sdiv i16 24316, [[OFF]] +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i16> [[TMP4]], i16 [[TMP18]], i32 1 ; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE2]] ; CHECK: [[PRED_SDIV_CONTINUE2]]: -; CHECK-NEXT: [[TMP13:%.*]] = add i16 [[OFFSET_IDX]], 16383 -; CHECK-NEXT: [[TMP7:%.*]] = shl i16 [[TMP3]], 14 -; CHECK-NEXT: [[TMP8:%.*]] = sub i16 [[TMP13]], [[TMP7]] -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i64, ptr @src, i16 [[TMP8]] -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i64, ptr [[TMP20]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP21]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i16> [ [[TMP4]], %[[PRED_SDIV_CONTINUE]] ], [ [[TMP7]], %[[PRED_SDIV_IF1]] ] +; CHECK-NEXT: [[TMP21:%.*]] = add <2 x i16> [[VEC_IND]], splat (i16 16383) +; CHECK-NEXT: [[TMP22:%.*]] = shl <2 x i16> [[TMP8]], splat (i16 14) +; CHECK-NEXT: [[TMP23:%.*]] = sub <2 x i16> [[TMP21]], [[TMP22]] ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 ; CHECK-NEXT: br i1 [[TMP19]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i16> [[TMP23]], i32 0 +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP13]] +; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP24]], align 1 ; CHECK-NEXT: [[TMP10:%.*]] = add i16 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP10]] -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 0 ; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP11]], align 1 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 ; CHECK-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4]] ; CHECK: [[PRED_STORE_IF3]]: +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i16> [[TMP23]], i32 1 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP25]] +; CHECK-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP20]], align 1 ; CHECK-NEXT: [[TMP16:%.*]] = add i16 [[OFFSET_IDX]], 1 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP16]] -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 1 ; CHECK-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 1 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]] ; CHECK: [[PRED_STORE_CONTINUE4]]: @@ -94,15 +98,17 @@ exit: ret void } -; FIXME: Cannot speculatively execute %l, because %off may be poison. +; Cannot speculatively execute %l, because %off may be poison. define void @ptr_depends_on_possibly_poison_value(ptr noalias %dst, i16 %off) { ; CHECK-LABEL: define void @ptr_depends_on_possibly_poison_value( ; CHECK-SAME: ptr noalias [[DST:%.*]], i16 [[OFF:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP0:%.*]] = sub i16 1, [[OFF]] -; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[TMP0]], [[OFF]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[OFF]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = sub <2 x i16> splat (i16 1), [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i16> [[TMP0]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ] @@ -110,25 +116,26 @@ define void @ptr_depends_on_possibly_poison_value(ptr noalias %dst, i16 %off) { ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i16 9, [[DOTCAST]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i16> [[VEC_IND]], splat (i16 10) -; CHECK-NEXT: [[TMP3:%.*]] = add i16 [[OFFSET_IDX]], [[TMP1]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr @src, i16 [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[TMP4]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i16> [[VEC_IND]], [[TMP1]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i16> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP5]] +; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP14]], align 1 ; CHECK-NEXT: [[TMP7:%.*]] = add i16 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 0 ; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP8]], align 1 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1 ; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_IF1]]: +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i16> [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP15]] +; CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP16]], align 1 ; CHECK-NEXT: [[TMP11:%.*]] = add i16 [[OFFSET_IDX]], 1 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 1 ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 1 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_CONTINUE2]]: @@ -237,7 +244,7 @@ exit: ret void } -; FIXME: Cannot speculatively execute %l, because %off may be poison. +; Cannot speculatively execute %l, because %off may be poison. define void @ptr_depends_on_possibly_poison_value_from_load(ptr noalias %dst) { ; CHECK-LABEL: define void @ptr_depends_on_possibly_poison_value_from_load( ; CHECK-SAME: ptr noalias [[DST:%.*]]) { @@ -251,28 +258,31 @@ define void @ptr_depends_on_possibly_poison_value_from_load(ptr noalias %dst) { ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i16 9, [[DOTCAST]] ; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr @src, align 1 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[TMP0]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i16> [[VEC_IND]], splat (i16 10) -; CHECK-NEXT: [[TMP2:%.*]] = sub i16 1, [[TMP0]] -; CHECK-NEXT: [[TMP3:%.*]] = add i16 [[TMP2]], [[TMP0]] -; CHECK-NEXT: [[TMP4:%.*]] = add i16 [[OFFSET_IDX]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr @src, i16 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = sub <2 x i16> splat (i16 1), [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i16> [[TMP2]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i16> [[VEC_IND]], [[TMP3]] ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 ; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i16> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP6]] +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP15]], align 1 ; CHECK-NEXT: [[TMP8:%.*]] = add i16 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 0 ; CHECK-NEXT: store i64 [[TMP10]], ptr [[TMP9]], align 1 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 ; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_IF1]]: +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i16> [[TMP4]], i32 1 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP16]] +; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP17]], align 1 ; CHECK-NEXT: [[TMP12:%.*]] = add i16 [[OFFSET_IDX]], 1 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 1 ; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP13]], align 1 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_CONTINUE2]]: