From c6de35083de9ff473fd47062c201f25fe3af6c67 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 9 Jan 2025 18:46:33 +0000 Subject: [PATCH] [Loads] Check loop-varying pointer in isDereferenceableAndAlignedInLoop. If the load executes in a successor of the header, check if the loop-varying pointer is dereferenceable and aligned the branch in the header. This is stricter than necessary and we could instead look for any block in the loop that executes unconditionally and post-dominates the block with the access. Also moves up the assumption check to make sure it is done for each pointer in the chain. --- llvm/lib/Analysis/Loads.cpp | 63 ++++++----- ...able-info-from-assumption-constant-size.ll | 101 +++++------------- 2 files changed, 63 insertions(+), 101 deletions(-) diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp index 7bbd469bd035d..ea3057a738e49 100644 --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -47,6 +47,31 @@ static bool isDereferenceableAndAlignedPointer( if (!Visited.insert(V).second) return false; + if (CtxI) { + /// Look through assumes to see if both dereferencability and alignment can + /// be proven by an assume if needed. + RetainedKnowledge AlignRK; + RetainedKnowledge DerefRK; + bool IsAligned = V->getPointerAlignment(DL) >= Alignment; + if (getKnowledgeForValue( + V, {Attribute::Dereferenceable, Attribute::Alignment}, AC, + [&](RetainedKnowledge RK, Instruction *Assume, auto) { + if (!isValidAssumeForContext(Assume, CtxI, DT)) + return false; + if (RK.AttrKind == Attribute::Alignment) + AlignRK = std::max(AlignRK, RK); + if (RK.AttrKind == Attribute::Dereferenceable) + DerefRK = std::max(DerefRK, RK); + IsAligned |= AlignRK && AlignRK.ArgValue >= Alignment.value(); + if (IsAligned && DerefRK && + DerefRK.ArgValue >= Size.getZExtValue()) + return true; // We have found what we needed so we stop looking + return false; // Other assumes may have better information. so + // keep looking + })) + return true; + } + // Note that it is not safe to speculate into a malloc'd region because // malloc may return null. @@ -168,31 +193,6 @@ static bool isDereferenceableAndAlignedPointer( Size, DL, CtxI, AC, DT, TLI, Visited, MaxDepth); - if (CtxI) { - /// Look through assumes to see if both dereferencability and alignment can - /// be proven by an assume if needed. - RetainedKnowledge AlignRK; - RetainedKnowledge DerefRK; - bool IsAligned = V->getPointerAlignment(DL) >= Alignment; - if (getKnowledgeForValue( - V, {Attribute::Dereferenceable, Attribute::Alignment}, AC, - [&](RetainedKnowledge RK, Instruction *Assume, auto) { - if (!isValidAssumeForContext(Assume, CtxI, DT)) - return false; - if (RK.AttrKind == Attribute::Alignment) - AlignRK = std::max(AlignRK, RK); - if (RK.AttrKind == Attribute::Dereferenceable) - DerefRK = std::max(DerefRK, RK); - IsAligned |= AlignRK && AlignRK.ArgValue >= Alignment.value(); - if (IsAligned && DerefRK && - DerefRK.ArgValue >= Size.getZExtValue()) - return true; // We have found what we needed so we stop looking - return false; // Other assumes may have better information. so - // keep looking - })) - return true; - } - // If we don't know, assume the worst. return false; } @@ -290,6 +290,19 @@ bool llvm::isDereferenceableAndAlignedInLoop( return isDereferenceableAndAlignedPointer(Ptr, Alignment, EltSize, DL, HeaderFirstNonPHI, AC, &DT); + // If the load executes in a successor of the header, check if the + // loop-varying pointer is dereferenceable and aligned at the branch in the + // header. This is stricter than necessary and we could instead look for any + // block in the loop that executes unconditionally and post-dominates the + // block with the access. + if (LI->getParent() != L->getHeader() && + L->getExitingBlock() == L->getLoopLatch() && + isDereferenceableAndAlignedPointer(Ptr, Alignment, EltSize, DL, + L->getHeader()->getTerminator(), AC, + &DT)) { + return true; + } + // Otherwise, check to see if we have a repeating access pattern where we can // prove that all accesses are well aligned and dereferenceable. auto *AddRec = dyn_cast(SE.getSCEV(Ptr)); diff --git a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll index 815e6bce52c0a..d9017433c5883 100644 --- a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll +++ b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll @@ -11,8 +11,8 @@ define void @deref_assumption_in_header_constant_trip_count(ptr noalias %a, ptr ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], <2 x i64> [[VEC_IND]] ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0 @@ -23,25 +23,8 @@ define void @deref_assumption_in_header_constant_trip_count(ptr noalias %a, ptr ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[TMP9:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP9]], splat (i1 true) -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0 -; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] -; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> poison, i32 [[TMP22]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] -; CHECK: [[PRED_LOAD_CONTINUE]]: -; CHECK-NEXT: [[TMP12:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP11]], %[[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1 -; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]] -; CHECK: [[PRED_LOAD_IF1]]: -; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP12]], i32 [[TMP27]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] -; CHECK: [[PRED_LOAD_CONTINUE2]]: -; CHECK-NEXT: [[TMP17:%.*]] = phi <2 x i32> [ [[TMP12]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], %[[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4 ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP17]] ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP30]], i32 0 @@ -290,8 +273,8 @@ define void @deref_assumption_in_header_constant_trip_count_align_1(ptr noalias ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], <2 x i64> [[VEC_IND]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0 @@ -302,25 +285,8 @@ define void @deref_assumption_in_header_constant_trip_count_align_1(ptr noalias ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = xor <2 x i1> [[TMP6]], splat (i1 true) -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 -; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] -; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 1 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] -; CHECK: [[PRED_LOAD_CONTINUE]]: -; CHECK-NEXT: [[TMP12:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP11]], %[[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 -; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]] -; CHECK: [[PRED_LOAD_IF1]]: -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 1 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP12]], i32 [[TMP15]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] -; CHECK: [[PRED_LOAD_CONTINUE2]]: -; CHECK-NEXT: [[TMP17:%.*]] = phi <2 x i32> [ [[TMP12]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], %[[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = load <2 x i32>, ptr [[TMP7]], align 1 ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP17]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 0 @@ -606,17 +572,17 @@ define void @deref_assumption_in_then_constant_trip_count(ptr noalias %a, ptr no ; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x ptr> [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP17]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP8]], i32 0 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] ; CHECK: [[PRED_LOAD_CONTINUE]]: ; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP9]], %[[PRED_LOAD_IF]] ] ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1 ; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_IF1]]: -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x ptr> [[TMP5]], i32 1 -; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP23]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x ptr> [[TMP5]], i32 1 +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP13]], i32 1 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: ; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP10]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] @@ -703,24 +669,24 @@ define void @deref_assumption_in_latch_constant_trip_count(ptr noalias %a, ptr n ; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP17]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP8]], i32 0 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] ; CHECK: [[PRED_LOAD_CONTINUE]]: ; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP9]], %[[PRED_LOAD_IF]] ] ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1 ; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_IF1]]: -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP23]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP13]], i32 1 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: ; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP10]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP15]] -; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0 -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP28]], i64 4), "dereferenceable"(ptr [[TMP20]], i64 4) ] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0 +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP16]], i64 4), "dereferenceable"(ptr [[TMP18]], i64 4) ] ; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1 ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1 ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP29]], i64 4), "dereferenceable"(ptr [[TMP19]], i64 4) ] @@ -796,8 +762,8 @@ define void @deref_assumption_in_header_variable_trip_count(ptr noalias %a, ptr ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], <2 x i64> [[VEC_IND]] ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0 @@ -808,25 +774,8 @@ define void @deref_assumption_in_header_variable_trip_count(ptr noalias %a, ptr ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[TMP9:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP9]], splat (i1 true) -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0 -; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] -; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> poison, i32 [[TMP22]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] -; CHECK: [[PRED_LOAD_CONTINUE]]: -; CHECK-NEXT: [[TMP12:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP11]], %[[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1 -; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]] -; CHECK: [[PRED_LOAD_IF1]]: -; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP12]], i32 [[TMP27]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] -; CHECK: [[PRED_LOAD_CONTINUE2]]: -; CHECK-NEXT: [[TMP17:%.*]] = phi <2 x i32> [ [[TMP12]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], %[[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4 ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP17]] ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP30]], i32 0