Skip to content

Commit e6dd9c8

Browse files
fhahnaokblast
authored andcommitted
Revert "[VPlan] Run narrowInterleaveGroups during general VPlan optimizations. (llvm#149706)"
This reverts commit 8d29d09. There have been reports of mis-compiles in llvm#149706. Revert while I investigate.
1 parent aaa6fc7 commit e6dd9c8

13 files changed

+192
-382
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7231,6 +7231,9 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
72317231
return DenseMap<const SCEV *, Value *>();
72327232
}
72337233

7234+
VPlanTransforms::narrowInterleaveGroups(
7235+
BestVPlan, BestVF,
7236+
TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector));
72347237
VPlanTransforms::removeDeadRecipes(BestVPlan);
72357238

72367239
VPlanTransforms::convertToConcreteRecipes(BestVPlan);
@@ -8199,10 +8202,6 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
81998202
if (CM.foldTailWithEVL())
82008203
VPlanTransforms::runPass(VPlanTransforms::addExplicitVectorLength,
82018204
*Plan, CM.getMaxSafeElements());
8202-
8203-
if (auto P = VPlanTransforms::narrowInterleaveGroups(*Plan, TTI))
8204-
VPlans.push_back(std::move(P));
8205-
82068205
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
82078206
VPlans.push_back(std::move(Plan));
82088207
}

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1191,7 +1191,6 @@ VPlan *VPlan::duplicate() {
11911191
}
11921192
Old2NewVPValues[&VectorTripCount] = &NewPlan->VectorTripCount;
11931193
Old2NewVPValues[&VF] = &NewPlan->VF;
1194-
Old2NewVPValues[&UF] = &NewPlan->UF;
11951194
Old2NewVPValues[&VFxUF] = &NewPlan->VFxUF;
11961195
if (BackedgeTakenCount) {
11971196
NewPlan->BackedgeTakenCount = new VPValue();

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4152,9 +4152,6 @@ class VPlan {
41524152
/// Represents the vectorization factor of the loop.
41534153
VPValue VF;
41544154

4155-
/// Represents the symbolic unroll factor of the loop.
4156-
VPValue UF;
4157-
41584155
/// Represents the loop-invariant VF * UF of the vector loop region.
41594156
VPValue VFxUF;
41604157

@@ -4308,9 +4305,6 @@ class VPlan {
43084305
VPValue &getVF() { return VF; };
43094306
const VPValue &getVF() const { return VF; };
43104307

4311-
/// Returns the symbolic UF of the vector loop region.
4312-
VPValue &getSymbolicUF() { return UF; };
4313-
43144308
/// Returns VF * UF of the vector loop region.
43154309
VPValue &getVFxUF() { return VFxUF; }
43164310

@@ -4320,12 +4314,6 @@ class VPlan {
43204314

43214315
void addVF(ElementCount VF) { VFs.insert(VF); }
43224316

4323-
/// Remove \p VF from the plan.
4324-
void removeVF(ElementCount VF) {
4325-
assert(hasVF(VF) && "tried to remove VF not present in plan");
4326-
VFs.remove(VF);
4327-
}
4328-
43294317
void setVF(ElementCount VF) {
43304318
assert(hasVF(VF) && "Cannot set VF not already in plan");
43314319
VFs.clear();

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 41 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -3956,9 +3956,6 @@ void VPlanTransforms::materializeVFAndVFxUF(VPlan &Plan, VPBasicBlock *VectorPH,
39563956
// used.
39573957
// TODO: Assert that they aren't used.
39583958

3959-
VPValue *UF = Plan.getOrAddLiveIn(ConstantInt::get(TCTy, Plan.getUF()));
3960-
Plan.getSymbolicUF().replaceAllUsesWith(UF);
3961-
39623959
// If there are no users of the runtime VF, compute VFxUF by constant folding
39633960
// the multiplication of VF and UF.
39643961
if (VF.getNumUsers() == 0) {
@@ -3978,6 +3975,7 @@ void VPlanTransforms::materializeVFAndVFxUF(VPlan &Plan, VPBasicBlock *VectorPH,
39783975
}
39793976
VF.replaceAllUsesWith(RuntimeVF);
39803977

3978+
VPValue *UF = Plan.getOrAddLiveIn(ConstantInt::get(TCTy, Plan.getUF()));
39813979
VPValue *MulByUF = Builder.createNaryOp(Instruction::Mul, {RuntimeVF, UF});
39823980
VFxUF.replaceAllUsesWith(MulByUF);
39833981
}
@@ -4045,14 +4043,14 @@ static bool canNarrowLoad(VPWidenRecipe *WideMember0, unsigned OpIdx,
40454043
return false;
40464044
}
40474045

4048-
/// Returns VF from \p VFs if \p IR is a full interleave group with factor and
4049-
/// number of members both equal to VF. The interleave group must also access
4050-
/// the full vector width.
4051-
static std::optional<ElementCount> isConsecutiveInterleaveGroup(
4052-
VPInterleaveRecipe *InterleaveR, ArrayRef<ElementCount> VFs,
4053-
VPTypeAnalysis &TypeInfo, const TargetTransformInfo &TTI) {
4046+
/// Returns true if \p IR is a full interleave group with factor and number of
4047+
/// members both equal to \p VF. The interleave group must also access the full
4048+
/// vector width \p VectorRegWidth.
4049+
static bool isConsecutiveInterleaveGroup(VPInterleaveRecipe *InterleaveR,
4050+
unsigned VF, VPTypeAnalysis &TypeInfo,
4051+
unsigned VectorRegWidth) {
40544052
if (!InterleaveR || InterleaveR->getMask())
4055-
return std::nullopt;
4053+
return false;
40564054

40574055
Type *GroupElementTy = nullptr;
40584056
if (InterleaveR->getStoredValues().empty()) {
@@ -4061,35 +4059,21 @@ static std::optional<ElementCount> isConsecutiveInterleaveGroup(
40614059
[&TypeInfo, GroupElementTy](VPValue *Op) {
40624060
return TypeInfo.inferScalarType(Op) == GroupElementTy;
40634061
}))
4064-
return std::nullopt;
4062+
return false;
40654063
} else {
40664064
GroupElementTy =
40674065
TypeInfo.inferScalarType(InterleaveR->getStoredValues()[0]);
40684066
if (!all_of(InterleaveR->getStoredValues(),
40694067
[&TypeInfo, GroupElementTy](VPValue *Op) {
40704068
return TypeInfo.inferScalarType(Op) == GroupElementTy;
40714069
}))
4072-
return std::nullopt;
4070+
return false;
40734071
}
40744072

4075-
auto GetVectorWidthForVF = [&TTI](ElementCount VF) {
4076-
TypeSize Size = TTI.getRegisterBitWidth(
4077-
VF.isFixed() ? TargetTransformInfo::RGK_FixedWidthVector
4078-
: TargetTransformInfo::RGK_ScalableVector);
4079-
assert(Size.isScalable() == VF.isScalable() &&
4080-
"if Size is scalable, VF must to and vice versa");
4081-
return Size.getKnownMinValue();
4082-
};
4083-
4084-
for (ElementCount VF : VFs) {
4085-
unsigned MinVal = VF.getKnownMinValue();
4086-
unsigned GroupSize = GroupElementTy->getScalarSizeInBits() * MinVal;
4087-
auto IG = InterleaveR->getInterleaveGroup();
4088-
if (IG->getFactor() == MinVal && IG->getNumMembers() == MinVal &&
4089-
GroupSize == GetVectorWidthForVF(VF))
4090-
return {VF};
4091-
}
4092-
return std::nullopt;
4073+
unsigned GroupSize = GroupElementTy->getScalarSizeInBits() * VF;
4074+
auto IG = InterleaveR->getInterleaveGroup();
4075+
return IG->getFactor() == VF && IG->getNumMembers() == VF &&
4076+
GroupSize == VectorRegWidth;
40934077
}
40944078

40954079
/// Returns true if \p VPValue is a narrow VPValue.
@@ -4100,18 +4084,16 @@ static bool isAlreadyNarrow(VPValue *VPV) {
41004084
return RepR && RepR->isSingleScalar();
41014085
}
41024086

4103-
std::unique_ptr<VPlan>
4104-
VPlanTransforms::narrowInterleaveGroups(VPlan &Plan,
4105-
const TargetTransformInfo &TTI) {
4106-
using namespace llvm::VPlanPatternMatch;
4087+
void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
4088+
unsigned VectorRegWidth) {
41074089
VPRegionBlock *VectorLoop = Plan.getVectorLoopRegion();
4108-
41094090
if (!VectorLoop)
4110-
return nullptr;
4091+
return;
41114092

41124093
VPTypeAnalysis TypeInfo(Plan);
4094+
4095+
unsigned VFMinVal = VF.getKnownMinValue();
41134096
SmallVector<VPInterleaveRecipe *> StoreGroups;
4114-
std::optional<ElementCount> VFToOptimize;
41154097
for (auto &R : *VectorLoop->getEntryBasicBlock()) {
41164098
if (isa<VPCanonicalIVPHIRecipe>(&R) || match(&R, m_BranchOnCount()))
41174099
continue;
@@ -4125,33 +4107,30 @@ VPlanTransforms::narrowInterleaveGroups(VPlan &Plan,
41254107
// * recipes writing to memory except interleave groups
41264108
// Only support plans with a canonical induction phi.
41274109
if (R.isPhi())
4128-
return nullptr;
4110+
return;
41294111

41304112
auto *InterleaveR = dyn_cast<VPInterleaveRecipe>(&R);
41314113
if (R.mayWriteToMemory() && !InterleaveR)
4132-
return nullptr;
4114+
return;
4115+
4116+
// Do not narrow interleave groups if there are VectorPointer recipes and
4117+
// the plan was unrolled. The recipe implicitly uses VF from
4118+
// VPTransformState.
4119+
// TODO: Remove restriction once the VF for the VectorPointer offset is
4120+
// modeled explicitly as operand.
4121+
if (isa<VPVectorPointerRecipe>(&R) && Plan.getUF() > 1)
4122+
return;
41334123

41344124
// All other ops are allowed, but we reject uses that cannot be converted
41354125
// when checking all allowed consumers (store interleave groups) below.
41364126
if (!InterleaveR)
41374127
continue;
41384128

4139-
// Try to find a single VF, where all interleave groups are consecutive and
4140-
// saturate the full vector width. If we already have a candidate VF, check
4141-
// if it is applicable for the current InterleaveR, otherwise look for a
4142-
// suitable VF across the Plans VFs.
4143-
//
4144-
if (VFToOptimize) {
4145-
if (!isConsecutiveInterleaveGroup(InterleaveR, {*VFToOptimize}, TypeInfo,
4146-
TTI))
4147-
return nullptr;
4148-
} else {
4149-
if (auto VF = isConsecutiveInterleaveGroup(
4150-
InterleaveR, to_vector(Plan.vectorFactors()), TypeInfo, TTI))
4151-
VFToOptimize = *VF;
4152-
else
4153-
return nullptr;
4154-
}
4129+
// Bail out on non-consecutive interleave groups.
4130+
if (!isConsecutiveInterleaveGroup(InterleaveR, VFMinVal, TypeInfo,
4131+
VectorRegWidth))
4132+
return;
4133+
41554134
// Skip read interleave groups.
41564135
if (InterleaveR->getStoredValues().empty())
41574136
continue;
@@ -4185,34 +4164,24 @@ VPlanTransforms::narrowInterleaveGroups(VPlan &Plan,
41854164
auto *WideMember0 = dyn_cast_or_null<VPWidenRecipe>(
41864165
InterleaveR->getStoredValues()[0]->getDefiningRecipe());
41874166
if (!WideMember0)
4188-
return nullptr;
4167+
return;
41894168
for (const auto &[I, V] : enumerate(InterleaveR->getStoredValues())) {
41904169
auto *R = dyn_cast_or_null<VPWidenRecipe>(V->getDefiningRecipe());
41914170
if (!R || R->getOpcode() != WideMember0->getOpcode() ||
41924171
R->getNumOperands() > 2)
4193-
return nullptr;
4172+
return;
41944173
if (any_of(enumerate(R->operands()),
41954174
[WideMember0, Idx = I](const auto &P) {
41964175
const auto &[OpIdx, OpV] = P;
41974176
return !canNarrowLoad(WideMember0, OpIdx, OpV, Idx);
41984177
}))
4199-
return nullptr;
4178+
return;
42004179
}
42014180
StoreGroups.push_back(InterleaveR);
42024181
}
42034182

42044183
if (StoreGroups.empty())
4205-
return nullptr;
4206-
4207-
// All interleave groups in Plan can be narrowed for VFToOptimize. Split the
4208-
// original Plan into 2: a) a new clone which contains all VFs of Plan, except
4209-
// VFToOptimize, and b) the original Plan with VFToOptimize as single VF.
4210-
std::unique_ptr<VPlan> NewPlan;
4211-
if (size(Plan.vectorFactors()) != 1) {
4212-
NewPlan = std::unique_ptr<VPlan>(Plan.duplicate());
4213-
Plan.setVF(*VFToOptimize);
4214-
NewPlan->removeVF(*VFToOptimize);
4215-
}
4184+
return;
42164185

42174186
// Convert InterleaveGroup \p R to a single VPWidenLoadRecipe.
42184187
SmallPtrSet<VPValue *, 4> NarrowedOps;
@@ -4283,8 +4252,9 @@ VPlanTransforms::narrowInterleaveGroups(VPlan &Plan,
42834252
auto *Inc = cast<VPInstruction>(CanIV->getBackedgeValue());
42844253
VPBuilder PHBuilder(Plan.getVectorPreheader());
42854254

4286-
VPValue *UF = &Plan.getSymbolicUF();
4287-
if (VFToOptimize->isScalable()) {
4255+
VPValue *UF = Plan.getOrAddLiveIn(
4256+
ConstantInt::get(CanIV->getScalarType(), 1 * Plan.getUF()));
4257+
if (VF.isScalable()) {
42884258
VPValue *VScale = PHBuilder.createElementCount(
42894259
CanIV->getScalarType(), ElementCount::getScalable(1));
42904260
VPValue *VScaleUF = PHBuilder.createNaryOp(Instruction::Mul, {VScale, UF});
@@ -4296,10 +4266,6 @@ VPlanTransforms::narrowInterleaveGroups(VPlan &Plan,
42964266
Plan.getOrAddLiveIn(ConstantInt::get(CanIV->getScalarType(), 1)));
42974267
}
42984268
removeDeadRecipes(Plan);
4299-
assert(none_of(*VectorLoop->getEntryBasicBlock(),
4300-
IsaPred<VPVectorPointerRecipe>) &&
4301-
"All VPVectorPointerRecipes should have been removed");
4302-
return NewPlan;
43034269
}
43044270

43054271
/// Add branch weight metadata, if the \p Plan's middle block is terminated by a

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -341,20 +341,14 @@ struct VPlanTransforms {
341341
static DenseMap<const SCEV *, Value *> expandSCEVs(VPlan &Plan,
342342
ScalarEvolution &SE);
343343

344-
/// Try to find a single VF among \p Plan's VFs for which all interleave
345-
/// groups (with known minimum VF elements) can be replaced by wide loads and
346-
/// stores processing VF elements, if all transformed interleave groups access
347-
/// the full vector width (checked via the maximum vector register width). If
348-
/// the transformation can be applied, the original \p Plan will be split in
349-
/// 2:
350-
/// 1. The original Plan with the single VF containing the optimized recipes
351-
/// using wide loads instead of interleave groups.
352-
/// 2. A new clone which contains all VFs of Plan except the optimized VF.
353-
///
354-
/// This effectively is a very simple form of loop-aware SLP, where we use
355-
/// interleave groups to identify candidates.
356-
static std::unique_ptr<VPlan>
357-
narrowInterleaveGroups(VPlan &Plan, const TargetTransformInfo &TTI);
344+
/// Try to convert a plan with interleave groups with VF elements to a plan
345+
/// with the interleave groups replaced by wide loads and stores processing VF
346+
/// elements, if all transformed interleave groups access the full vector
347+
/// width (checked via \o VectorRegWidth). This effectively is a very simple
348+
/// form of loop-aware SLP, where we use interleave groups to identify
349+
/// candidates.
350+
static void narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
351+
unsigned VectorRegWidth);
358352

359353
/// Predicate and linearize the control-flow in the only loop region of
360354
/// \p Plan. If \p FoldTail is true, create a mask guarding the loop

llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-constant-ops.ll

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -175,18 +175,28 @@ define void @test_add_double_same_var_args_1(ptr %res, ptr noalias %A, ptr noali
175175
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
176176
; CHECK: [[VECTOR_BODY]]:
177177
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
178-
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1
178+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 2
179179
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[A]], i64 [[INDEX]]
180180
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[A]], i64 [[TMP0]]
181-
; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = load <2 x double>, ptr [[TMP1]], align 4
182-
; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = load <2 x double>, ptr [[TMP2]], align 4
181+
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <4 x double>, ptr [[TMP1]], align 4
182+
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x double> [[WIDE_VEC]], <4 x double> poison, <2 x i32> <i32 0, i32 2>
183+
; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x double> [[WIDE_VEC]], <4 x double> poison, <2 x i32> <i32 1, i32 3>
184+
; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load <4 x double>, ptr [[TMP2]], align 4
185+
; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <4 x double> [[WIDE_VEC2]], <4 x double> poison, <2 x i32> <i32 0, i32 2>
186+
; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <4 x double> [[WIDE_VEC2]], <4 x double> poison, <2 x i32> <i32 1, i32 3>
187+
; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
188+
; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[STRIDED_VEC3]], [[BROADCAST_SPLAT]]
183189
; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[STRIDED_VEC1]], [[BROADCAST_SPLAT]]
184190
; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[STRIDED_VEC4]], [[BROADCAST_SPLAT]]
185191
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[RES]], i64 [[INDEX]]
186192
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[RES]], i64 [[TMP0]]
187-
; CHECK-NEXT: store <2 x double> [[TMP5]], ptr [[TMP7]], align 4
188-
; CHECK-NEXT: store <2 x double> [[TMP6]], ptr [[TMP8]], align 4
189-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
193+
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
194+
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
195+
; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 4
196+
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
197+
; CHECK-NEXT: [[INTERLEAVED_VEC5:%.*]] = shufflevector <4 x double> [[TMP10]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
198+
; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC5]], ptr [[TMP8]], align 4
199+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
190200
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
191201
; CHECK-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
192202
; CHECK: [[MIDDLE_BLOCK]]:
@@ -227,18 +237,28 @@ define void @test_add_double_same_var_args_2(ptr %res, ptr noalias %A, ptr noali
227237
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
228238
; CHECK: [[VECTOR_BODY]]:
229239
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
230-
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1
240+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 2
231241
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[A]], i64 [[INDEX]]
232242
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[A]], i64 [[TMP0]]
233-
; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = load <2 x double>, ptr [[TMP1]], align 4
234-
; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = load <2 x double>, ptr [[TMP2]], align 4
243+
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <4 x double>, ptr [[TMP1]], align 4
244+
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x double> [[WIDE_VEC]], <4 x double> poison, <2 x i32> <i32 0, i32 2>
245+
; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x double> [[WIDE_VEC]], <4 x double> poison, <2 x i32> <i32 1, i32 3>
246+
; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load <4 x double>, ptr [[TMP2]], align 4
247+
; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <4 x double> [[WIDE_VEC2]], <4 x double> poison, <2 x i32> <i32 0, i32 2>
248+
; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <4 x double> [[WIDE_VEC2]], <4 x double> poison, <2 x i32> <i32 1, i32 3>
249+
; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[BROADCAST_SPLAT]], [[STRIDED_VEC]]
250+
; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[BROADCAST_SPLAT]], [[STRIDED_VEC3]]
235251
; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[BROADCAST_SPLAT]], [[STRIDED_VEC1]]
236252
; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> [[BROADCAST_SPLAT]], [[STRIDED_VEC4]]
237253
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[RES]], i64 [[INDEX]]
238254
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[RES]], i64 [[TMP0]]
239-
; CHECK-NEXT: store <2 x double> [[TMP5]], ptr [[TMP7]], align 4
240-
; CHECK-NEXT: store <2 x double> [[TMP6]], ptr [[TMP8]], align 4
241-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
255+
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
256+
; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
257+
; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 4
258+
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
259+
; CHECK-NEXT: [[INTERLEAVED_VEC5:%.*]] = shufflevector <4 x double> [[TMP10]], <4 x double> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
260+
; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC5]], ptr [[TMP8]], align 4
261+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
242262
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
243263
; CHECK-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
244264
; CHECK: [[MIDDLE_BLOCK]]:

0 commit comments

Comments
 (0)