diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 022530dc846ea..f4f66447d1c3d 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -941,6 +941,7 @@ class TargetTransformInfo { const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, + bool ForPoisonSrc = true, ArrayRef VL = {}) const; /// Estimate the overhead of scalarizing an instructions unique diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 990252b1e5743..4808e7f76dfa3 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -453,7 +453,8 @@ class TargetTransformInfoImplBase { virtual InstructionCost getScalarizationOverhead( VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, - TTI::TargetCostKind CostKind, ArrayRef VL = {}) const { + TTI::TargetCostKind CostKind, bool ForPoisonSrc = true, + ArrayRef VL = {}) const { return 0; } diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 6e2f65c01bf77..4558d51c3a643 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -871,7 +871,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { /// extracted from vectors. InstructionCost getScalarizationOverhead( VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, - TTI::TargetCostKind CostKind, ArrayRef VL = {}) const override { + TTI::TargetCostKind CostKind, bool ForPoisonSrc = true, + ArrayRef VL = {}) const override { /// FIXME: a bitfield is not a reasonable abstraction for talking about /// which elements are needed from a scalable vector if (isa(InTy)) diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 8548afea72964..3ced70e113bf7 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -630,9 +630,10 @@ bool TargetTransformInfo::isTargetIntrinsicWithStructReturnOverloadAtField( InstructionCost TargetTransformInfo::getScalarizationOverhead( VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, - TTI::TargetCostKind CostKind, ArrayRef VL) const { + TTI::TargetCostKind CostKind, bool ForPoisonSrc, + ArrayRef VL) const { return TTIImpl->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract, - CostKind, VL); + CostKind, ForPoisonSrc, VL); } InstructionCost TargetTransformInfo::getOperandsScalarizationOverhead( diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index fcc5eb1c05ba0..48700f98d5934 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -3854,7 +3854,8 @@ InstructionCost AArch64TTIImpl::getVectorInstrCost(const Instruction &I, InstructionCost AArch64TTIImpl::getScalarizationOverhead( VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, - TTI::TargetCostKind CostKind, ArrayRef VL) const { + TTI::TargetCostKind CostKind, bool ForPoisonSrc, + ArrayRef VL) const { if (isa(Ty)) return InstructionCost::getInvalid(); if (Ty->getElementType()->isFloatingPointTy()) diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index adfaec0ea618b..be6bca2225eac 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -462,7 +462,8 @@ class AArch64TTIImpl : public BasicTTIImplBase { InstructionCost getScalarizationOverhead( VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, - TTI::TargetCostKind CostKind, ArrayRef VL = {}) const override; + TTI::TargetCostKind CostKind, bool ForPoisonSrc = true, + ArrayRef VL = {}) const override; /// Return the cost of the scaling factor used in the addressing /// mode represented by AM for this target, for a load/store diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h index 182f9f5b55d9f..a9bd5a0d01043 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h @@ -112,7 +112,8 @@ class NVPTXTTIImpl : public BasicTTIImplBase { InstructionCost getScalarizationOverhead( VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, - TTI::TargetCostKind CostKind, ArrayRef VL = {}) const override { + TTI::TargetCostKind CostKind, bool ForPoisonSrc = true, + ArrayRef VL = {}) const override { if (!InTy->getElementCount().isFixed()) return InstructionCost::getInvalid(); @@ -141,7 +142,8 @@ class NVPTXTTIImpl : public BasicTTIImplBase { Insert = false; } return Cost + BaseT::getScalarizationOverhead(InTy, DemandedElts, Insert, - Extract, CostKind, VL); + Extract, CostKind, + ForPoisonSrc, VL); } void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 15cf909526257..aa99622896b6b 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -860,7 +860,8 @@ static unsigned isM1OrSmaller(MVT VT) { InstructionCost RISCVTTIImpl::getScalarizationOverhead( VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, - TTI::TargetCostKind CostKind, ArrayRef VL) const { + TTI::TargetCostKind CostKind, bool ForPoisonSrc, + ArrayRef VL) const { if (isa(Ty)) return InstructionCost::getInvalid(); diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index ca4c0ccd27a74..06f2297da6ad3 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -168,7 +168,8 @@ class RISCVTTIImpl : public BasicTTIImplBase { InstructionCost getScalarizationOverhead( VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, - TTI::TargetCostKind CostKind, ArrayRef VL = {}) const override; + TTI::TargetCostKind CostKind, bool ForPoisonSrc = true, + ArrayRef VL = {}) const override; InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index ee142ccd20e20..ede65009cec29 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -495,7 +495,8 @@ static bool isFreeEltLoad(Value *Op) { InstructionCost SystemZTTIImpl::getScalarizationOverhead( VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, - TTI::TargetCostKind CostKind, ArrayRef VL) const { + TTI::TargetCostKind CostKind, bool ForPoisonSrc, + ArrayRef VL) const { unsigned NumElts = cast(Ty)->getNumElements(); InstructionCost Cost = 0; @@ -517,7 +518,7 @@ InstructionCost SystemZTTIImpl::getScalarizationOverhead( } Cost += BaseT::getScalarizationOverhead(Ty, DemandedElts, Insert, Extract, - CostKind, VL); + CostKind, ForPoisonSrc, VL); return Cost; } diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h index c83f8e2542470..0b1d797c41d1f 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -90,7 +90,8 @@ class SystemZTTIImpl : public BasicTTIImplBase { bool LSRWithInstrQueries() const override { return true; } InstructionCost getScalarizationOverhead( VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, - TTI::TargetCostKind CostKind, ArrayRef VL = {}) const override; + TTI::TargetCostKind CostKind, bool ForPoisonSrc = true, + ArrayRef VL = {}) const override; bool supportsEfficientVectorElementLoadStore() const override { return true; } bool enableInterleavedAccessVectorization() const override { return true; } diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 74bb25781b534..0970de1e3ecc7 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -4916,7 +4916,8 @@ InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, InstructionCost X86TTIImpl::getScalarizationOverhead( VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, - TTI::TargetCostKind CostKind, ArrayRef VL) const { + TTI::TargetCostKind CostKind, bool ForPoisonSrc, + ArrayRef VL) const { assert(DemandedElts.getBitWidth() == cast(Ty)->getNumElements() && "Vector size mismatch"); @@ -4935,7 +4936,24 @@ InstructionCost X86TTIImpl::getScalarizationOverhead( assert(NumLegalVectors >= 0 && "Negative cost!"); // For insertions, a ISD::BUILD_VECTOR style vector initialization can be much - // cheaper than an accumulation of ISD::INSERT_VECTOR_ELT. + // cheaper than an accumulation of ISD::INSERT_VECTOR_ELT. SLPVectorizer has + // a special heuristic regarding poison input which is passed here in + // ForPoisonSrc. + if (Insert && !ForPoisonSrc) { + // This is nearly identical to BaseT::getScalarizationOverhead(), except + // it is passing nullptr to getVectorInstrCost() for Op0 (instead of + // Constant::getNullValue()), which makes the X86TTIImpl + // getVectorInstrCost() return 0 instead of 1. + for (unsigned I : seq(DemandedElts.getBitWidth())) { + if (!DemandedElts[I]) + continue; + Cost += getVectorInstrCost(Instruction::InsertElement, Ty, CostKind, I, + Constant::getNullValue(Ty), + VL.empty() ? nullptr : VL[I]); + } + return Cost; + } + if (Insert) { if ((MScalarTy == MVT::i16 && ST->hasSSE2()) || (MScalarTy.isInteger() && ST->hasSSE41()) || diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h index be2ede504f322..da91d2b9b2053 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -170,7 +170,8 @@ class X86TTIImpl : public BasicTTIImplBase { Value *Op1) const override; InstructionCost getScalarizationOverhead( VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, - TTI::TargetCostKind CostKind, ArrayRef VL = {}) const override; + TTI::TargetCostKind CostKind, bool ForPoisonSrc = true, + ArrayRef VL = {}) const override; InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 26b5982f5bb18..03bbec3aae082 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5738,26 +5738,8 @@ getScalarizationOverhead(const TargetTransformInfo &TTI, Type *ScalarTy, } return Cost; } - APInt NewDemandedElts = DemandedElts; - InstructionCost Cost = 0; - if (!ForPoisonSrc && Insert) { - // Handle insert into non-poison vector. - // TODO: Need to teach getScalarizationOverhead about insert elements into - // non-poison input vector to better handle such cases. Currently, it is - // very conservative and may "pessimize" the vectorization. - for (unsigned I : seq(DemandedElts.getBitWidth())) { - if (!DemandedElts[I]) - continue; - Cost += TTI.getVectorInstrCost(Instruction::InsertElement, Ty, CostKind, - I, Constant::getNullValue(Ty), - VL.empty() ? nullptr : VL[I]); - } - NewDemandedElts.clearAllBits(); - } else if (!NewDemandedElts.isZero()) { - Cost += TTI.getScalarizationOverhead(Ty, NewDemandedElts, Insert, Extract, - CostKind, VL); - } - return Cost; + return TTI.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract, + CostKind, ForPoisonSrc, VL); } /// This is similar to TargetTransformInfo::getVectorInstrCost, but if ScalarTy diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/vec-elt-insertion.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/vec-elt-insertion.ll index afe01d3cd673d..85b8157c949f1 100644 --- a/llvm/test/Transforms/SLPVectorizer/SystemZ/vec-elt-insertion.ll +++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/vec-elt-insertion.ll @@ -114,13 +114,13 @@ define void @fun2(ptr %0, ptr %Dst) { ; CHECK: [[BB4]]: ; CHECK-NEXT: ret void ; CHECK: [[BB5]]: +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[DST]], i64 24 +; CHECK-NEXT: store i64 [[TMP2]], ptr [[TMP6]], align 8 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[DST]], i64 16 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> , i64 [[TMP2]], i32 1 -; CHECK-NEXT: store <2 x i64> [[TMP8]], ptr [[TMP7]], align 8 +; CHECK-NEXT: store i64 0, ptr [[TMP7]], align 8 ; CHECK-NEXT: br label %[[BB4]] ; -; Looks like there is bug in TTI, where insertion into index 1 is free, while insertion in to index 0 is 1. -; REMARK: Function: fun2 +; REMARK-NOT: Function: fun2 %3 = load i64, ptr %0, align 8 %4 = icmp eq i64 %3, 0 diff --git a/llvm/test/Transforms/SLPVectorizer/full-overlap-non-schedulable.ll b/llvm/test/Transforms/SLPVectorizer/full-overlap-non-schedulable.ll index 043205822b1c5..0e20ec7d37e7e 100644 --- a/llvm/test/Transforms/SLPVectorizer/full-overlap-non-schedulable.ll +++ b/llvm/test/Transforms/SLPVectorizer/full-overlap-non-schedulable.ll @@ -1,35 +1,37 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -S --passes=slp-vectorizer < %s | FileCheck %s +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx < %s \ +; RUN: | FileCheck %s +; REQUIRES: x86-registered-target define void @test(ptr %p1, ptr %0, i32 %1, i1 %c1, ptr %p2) { ; CHECK-LABEL: define void @test( -; CHECK-SAME: ptr [[P1:%.*]], ptr [[TMP0:%.*]], i32 [[TMP1:%.*]], i1 [[C1:%.*]], ptr [[P2:%.*]]) { +; CHECK-SAME: ptr [[P1:%.*]], ptr [[TMP0:%.*]], i32 [[TMP1:%.*]], i1 [[C1:%.*]], ptr [[P2:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[TOP:.*:]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP0]], i64 12 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP0]], i64 12 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[TMP0]], i64 16 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP0]], i64 20 ; CHECK-NEXT: br i1 [[C1]], label %[[L42:.*]], label %[[L41:.*]] ; CHECK: [[L41]]: ; CHECK-NEXT: [[DOTNOT276:%.*]] = icmp eq ptr [[TMP2]], null -; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[DOTNOT276]], i32 0, i32 [[TMP10]] -; CHECK-NEXT: [[DOTNOT277:%.*]] = icmp eq ptr [[TMP12]], null -; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP12]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[DOTNOT276]], i32 0, i32 [[TMP6]] +; CHECK-NEXT: [[DOTNOT277:%.*]] = icmp eq ptr [[TMP3]], null +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[DOTNOT277]], i32 0, i32 [[TMP8]] ; CHECK-NEXT: [[DOTNOT278:%.*]] = icmp eq ptr [[TMP4]], null -; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP4]], align 4 -; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[DOTNOT278]], i32 0, i32 [[TMP15]] +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP4]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[DOTNOT278]], i32 0, i32 [[TMP10]] ; CHECK-NEXT: [[DOTNOT279:%.*]] = icmp eq ptr [[TMP5]], null -; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP5]], align 4 -; CHECK-NEXT: [[TMP25:%.*]] = select i1 [[DOTNOT279]], i32 0, i32 [[TMP20]] +; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP5]], align 4 +; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[DOTNOT279]], i32 0, i32 [[TMP12]] ; CHECK-NEXT: br label %[[L112:.*]] ; CHECK: [[L42]]: ; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[DOTNOT280:%.*]] = icmp eq i32 [[TMP14]], 0 ; CHECK-NEXT: br i1 [[DOTNOT280]], label %[[L112]], label %[[L47:.*]] ; CHECK: [[L47]]: -; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[DOTNOT282:%.*]] = icmp eq ptr [[TMP4]], null ; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[DOTNOT282]], i32 0, i32 [[TMP16]] @@ -38,14 +40,14 @@ define void @test(ptr %p1, ptr %0, i32 %1, i1 %c1, ptr %p2) { ; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[DOTNOT283]], i32 0, i32 [[TMP18]] ; CHECK-NEXT: br label %[[L112]] ; CHECK: [[L112]]: -; CHECK-NEXT: [[TMP24:%.*]] = phi i32 [ [[TMP19]], %[[L47]] ], [ [[TMP25]], %[[L41]] ], [ 0, %[[L42]] ] -; CHECK-NEXT: [[TMP23:%.*]] = phi i32 [ [[TMP17]], %[[L47]] ], [ [[TMP11]], %[[L41]] ], [ [[TMP1]], %[[L42]] ] -; CHECK-NEXT: [[TMP22:%.*]] = phi i32 [ [[TMP13]], %[[L47]] ], [ [[TMP9]], %[[L41]] ], [ 0, %[[L42]] ] -; CHECK-NEXT: [[TMP21:%.*]] = phi i32 [ 0, %[[L47]] ], [ [[TMP7]], %[[L41]] ], [ 0, %[[L42]] ] -; CHECK-NEXT: store i32 [[TMP21]], ptr [[P2]], align 4 -; CHECK-NEXT: store i32 [[TMP22]], ptr [[P1]], align 4 -; CHECK-NEXT: store i32 [[TMP23]], ptr [[P2]], align 4 -; CHECK-NEXT: store i32 [[TMP24]], ptr [[P1]], align 4 +; CHECK-NEXT: [[VALUE_PHI13336:%.*]] = phi i32 [ [[TMP19]], %[[L47]] ], [ [[TMP13]], %[[L41]] ], [ 0, %[[L42]] ] +; CHECK-NEXT: [[VALUE_PHI12335:%.*]] = phi i32 [ [[TMP17]], %[[L47]] ], [ [[TMP11]], %[[L41]] ], [ [[TMP1]], %[[L42]] ] +; CHECK-NEXT: [[VALUE_PHI11334:%.*]] = phi i32 [ [[TMP15]], %[[L47]] ], [ [[TMP9]], %[[L41]] ], [ 0, %[[L42]] ] +; CHECK-NEXT: [[VALUE_PHI10333:%.*]] = phi i32 [ 0, %[[L47]] ], [ [[TMP7]], %[[L41]] ], [ 0, %[[L42]] ] +; CHECK-NEXT: store i32 [[VALUE_PHI10333]], ptr [[P2]], align 4 +; CHECK-NEXT: store i32 [[VALUE_PHI11334]], ptr [[P1]], align 4 +; CHECK-NEXT: store i32 [[VALUE_PHI12335]], ptr [[P2]], align 4 +; CHECK-NEXT: store i32 [[VALUE_PHI13336]], ptr [[P1]], align 4 ; CHECK-NEXT: ret void ; top: