Skip to content

Commit fad25b7

Browse files
committed
[VPlan] Explicitly Unpack.
1 parent aacbe23 commit fad25b7

File tree

52 files changed

+1454
-1410
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+1454
-1410
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -971,6 +971,7 @@ class VPInstruction : public VPRecipeWithIRFlags,
971971
StepVector,
972972

973973
Pack,
974+
Unpack,
974975

975976
};
976977

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
101101
return Type::getIntNTy(Ctx, 32);
102102
case Instruction::PHI:
103103
case VPInstruction::Pack:
104+
case VPInstruction::Unpack:
104105
// Infer the type of first operand only, as other operands of header phi's
105106
// may lead to infinite recursion.
106107
return inferScalarType(R->getOperand(0));
@@ -442,7 +443,8 @@ SmallVector<VPRegisterUsage, 8> llvm::calculateRegisterUsageForPlan(
442443
break;
443444
for (VPRecipeBase &R : *VPBB) {
444445
if (isa<VPInstruction>(&R) &&
445-
cast<VPInstruction>(&R)->getOpcode() == VPInstruction::Pack)
446+
(cast<VPInstruction>(&R)->getOpcode() == VPInstruction::Pack ||
447+
cast<VPInstruction>(&R)->getOpcode() == VPInstruction::Unpack))
446448
continue;
447449
Idx2Recipe.push_back(&R);
448450

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,8 @@ VPInstruction::VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
418418
}
419419

420420
bool VPInstruction::doesGeneratePerAllLanes() const {
421-
return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this);
421+
return (Opcode == VPInstruction::PtrAdd || Opcode == VPInstruction::Unpack) &&
422+
!vputils::onlyFirstLaneUsed(this);
422423
}
423424

424425
bool VPInstruction::canGenerateScalarForFirstLane() const {
@@ -438,6 +439,7 @@ bool VPInstruction::canGenerateScalarForFirstLane() const {
438439
case VPInstruction::PtrAdd:
439440
case VPInstruction::ExplicitVectorLength:
440441
case VPInstruction::AnyOf:
442+
case VPInstruction::Unpack:
441443
return true;
442444
default:
443445
return false;
@@ -448,10 +450,17 @@ Value *VPInstruction::generatePerLane(VPTransformState &State,
448450
const VPLane &Lane) {
449451
IRBuilderBase &Builder = State.Builder;
450452

451-
assert(getOpcode() == VPInstruction::PtrAdd &&
452-
"only PtrAdd opcodes are supported for now");
453-
return Builder.CreatePtrAdd(State.get(getOperand(0), Lane),
454-
State.get(getOperand(1), Lane), Name);
453+
switch (getOpcode()) {
454+
case VPInstruction::PtrAdd:
455+
return Builder.CreatePtrAdd(State.get(getOperand(0), Lane),
456+
State.get(getOperand(1), Lane), Name);
457+
458+
case VPInstruction::Unpack: {
459+
Value *LaneV = Lane.getAsRuntimeExpr(State.Builder, State.VF);
460+
return Builder.CreateExtractElement(State.get(getOperand(0)), LaneV);
461+
}
462+
}
463+
llvm_unreachable("all supported opcodes must be handled above");
455464
}
456465

457466
/// Create a conditional branch using \p Cond branching to the successors of \p
@@ -775,6 +784,11 @@ Value *VPInstruction::generate(VPTransformState &State) {
775784
State.packScalarIntoVectorizedValue(getOperand(0), WideValue, Lane);
776785
return WideValue;
777786
}
787+
case VPInstruction::Unpack: {
788+
assert(vputils::onlyFirstLaneUsed(this) &&
789+
"can only generate first lane for PtrAdd");
790+
return generatePerLane(State, VPLane(0));
791+
}
778792
default:
779793
llvm_unreachable("Unsupported opcode for instruction");
780794
}
@@ -934,6 +948,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
934948
case VPInstruction::StepVector:
935949
case VPInstruction::ReductionStartVector:
936950
case VPInstruction::Pack:
951+
case VPInstruction::Unpack:
937952
return false;
938953
default:
939954
return true;
@@ -1077,6 +1092,10 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
10771092
case VPInstruction::Pack:
10781093
O << "pack-into-vector";
10791094
break;
1095+
case VPInstruction::Unpack:
1096+
O << "unpack-into-scalars";
1097+
break;
1098+
10801099
default:
10811100
O << Instruction::getOpcodeName(getOpcode());
10821101
}

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1927,6 +1927,27 @@ static void materializePack(VPlan &Plan) {
19271927
cast<VPInstruction>(&R)->doesGeneratePerAllLanes()))
19281928
continue;
19291929
auto *Def = cast<VPSingleDefRecipe>(&R);
1930+
for (auto *Op : to_vector(Def->operands())) {
1931+
VPRecipeBase *OpDef = Op->getDefiningRecipe();
1932+
if (!OpDef || isa<VPReplicateRecipe>(OpDef) ||
1933+
vputils::isSingleScalar(Op) ||
1934+
(isa<VPInstruction>(OpDef) &&
1935+
cast<VPInstruction>(OpDef)->doesGeneratePerAllLanes()) ||
1936+
isa<VPScalarIVStepsRecipe>(OpDef))
1937+
continue;
1938+
1939+
auto *Unpack = new VPInstruction(VPInstruction::Unpack, {Op});
1940+
if (OpDef->isPhi())
1941+
Unpack->insertBefore(*OpDef->getParent(),
1942+
OpDef->getParent()->getFirstNonPhi());
1943+
else
1944+
Unpack->insertAfter(OpDef);
1945+
Op->replaceUsesWithIf(Unpack, [](VPUser &U, unsigned) {
1946+
auto *RepR = dyn_cast<VPReplicateRecipe>(&U);
1947+
return RepR && (!isa<StoreInst>(RepR->getUnderlyingInstr()) ||
1948+
!vputils::isSingleScalar(RepR->getOperand(1)));
1949+
});
1950+
}
19301951
if (all_of(Def->users(),
19311952
[Def](VPUser *U) { return U->usesScalars(Def); }))
19321953
continue;

llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,12 @@ define void @test_widen_ptr_induction(ptr %ptr.start.1) {
2929
; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <2 x ptr> [[TMP5]], zeroinitializer
3030
; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <2 x ptr> [[TMP7]], zeroinitializer
3131
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
32-
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP10]])
3332
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1
34-
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]])
3533
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0
36-
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP12]])
3734
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP9]], i32 1
35+
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP10]])
36+
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]])
37+
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP12]])
3838
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP13]])
3939
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
4040
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 2
@@ -62,8 +62,8 @@ define void @test_widen_ptr_induction(ptr %ptr.start.1) {
6262
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x ptr> [[TMP19]], ptr [[NEXT_GEP8]], i32 1
6363
; CHECK-NEXT: [[TMP21:%.*]] = icmp ne <2 x ptr> [[TMP20]], zeroinitializer
6464
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP21]], i32 0
65-
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP22]])
6665
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP21]], i32 1
66+
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP22]])
6767
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP23]])
6868
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[NEXT_GEP7]], i32 0
6969
; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP24]], align 1

llvm/test/Transforms/LoopVectorize/AArch64/struct-return-cost.ll

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,14 @@ target triple = "aarch64--linux-gnu"
1515
; CHECK-COST: Cost of 0 for VF 2: WIDEN ir<%extract_a> = extractvalue ir<%call>, ir<0>
1616
; CHECK-COST: Cost of 0 for VF 2: WIDEN ir<%extract_b> = extractvalue ir<%call>, ir<1>
1717
;
18-
; CHECK-COST: Cost of 58 for VF 4: REPLICATE ir<%call> = call @foo(ir<%in_val>)
18+
; CHECK-COST: Cost of 0 for VF 4: EMIT vp<[[UNPACK:%.+]]> = unpack-into-scalars ir<%in_val>
19+
; CHECK-COST: Cost of 58 for VF 4: REPLICATE ir<%call> = call @foo(vp<[[UNPACK]]>)
1920
; CHECK-COST: Cost of 0 for VF 4: EMIT vp<[[PACK:%.+]]> = pack-into-vector ir<%call>
2021
; CHECK-COST: Cost of 0 for VF 4: WIDEN ir<%extract_a> = extractvalue vp<[[PACK]]>, ir<0>
2122
; CHECK-COST: Cost of 0 for VF 4: WIDEN ir<%extract_b> = extractvalue vp<[[PACK]]>, ir<1>
2223
;
23-
; CHECK-COST: Cost of 122 for VF 8: REPLICATE ir<%call> = call @foo(ir<%in_val>)
24+
; CHECK-COST: Cost of 0 for VF 8: EMIT vp<[[UNPACK:%.+]]> = unpack-into-scalars ir<%in_val>
25+
; CHECK-COST: Cost of 122 for VF 8: REPLICATE ir<%call> = call @foo(vp<[[UNPACK]]>)
2426
; CHECK-COST: Cost of 0 for VF 8: EMIT vp<[[PACK:%.+]]> = pack-into-vector ir<%call>
2527
; CHECK-COST: Cost of 0 for VF 8: WIDEN ir<%extract_a> = extractvalue vp<[[PACK]]>, ir<0>
2628
; CHECK-COST: Cost of 0 for VF 8: WIDEN ir<%extract_b> = extractvalue vp<[[PACK]]>, ir<1>
@@ -66,17 +68,20 @@ exit:
6668
; CHECK-COST: LV: Found an estimated cost of 0 for VF 1 For instruction: %extract_a = extractvalue { half, half } %call, 0
6769
; CHECK-COST: LV: Found an estimated cost of 0 for VF 1 For instruction: %extract_b = extractvalue { half, half } %call, 1
6870
;
69-
; CHECK-COST: Cost of 26 for VF 2: REPLICATE ir<%call> = call @foo(ir<%in_val>)
71+
; CHECK-COST: Cost of 0 for VF 2: EMIT vp<[[UNPACK:%.+]]> = unpack-into-scalars ir<%in_val>
72+
; CHECK-COST: Cost of 26 for VF 2: REPLICATE ir<%call> = call @foo(vp<[[UNPACK]]>)
7073
; CHECK-COST: Cost of 0 for VF 2: EMIT vp<[[PACK:%.+]]> = pack-into-vector ir<%call>
7174
; CHECK-COST: Cost of 0 for VF 2: WIDEN ir<%extract_a> = extractvalue vp<[[PACK]]>, ir<0>
7275
; CHECK-COST: Cost of 0 for VF 2: WIDEN ir<%extract_b> = extractvalue vp<[[PACK]]>, ir<1>
7376
;
74-
; CHECK-COST: Cost of 58 for VF 4: REPLICATE ir<%call> = call @foo(ir<%in_val>)
77+
; CHECK-COST: Cost of 0 for VF 4: EMIT vp<[[UNPACK:%.+]]> = unpack-into-scalars ir<%in_val>
78+
; CHECK-COST: Cost of 58 for VF 4: REPLICATE ir<%call> = call @foo(vp<[[UNPACK]]>)
7579
; CHECK-COST: Cost of 0 for VF 4: EMIT vp<[[PACK:%.+]]> = pack-into-vector ir<%call>
7680
; CHECK-COST: Cost of 0 for VF 4: WIDEN ir<%extract_a> = extractvalue vp<[[PACK]]>, ir<0>
7781
; CHECK-COST: Cost of 0 for VF 4: WIDEN ir<%extract_b> = extractvalue vp<[[PACK]]>, ir<1>
7882
;
79-
; CHECK-COST: Cost of 122 for VF 8: REPLICATE ir<%call> = call @foo(ir<%in_val>)
83+
; CHECK-COST: Cost of 0 for VF 8: EMIT vp<[[UNPACK:%.+]]> = unpack-into-scalars ir<%in_val>
84+
; CHECK-COST: Cost of 122 for VF 8: REPLICATE ir<%call> = call @foo(vp<[[UNPACK]]>)
8085
; CHECK-COST: Cost of 0 for VF 8: EMIT vp<[[PACK:%.+]]> = pack-into-vector ir<%call>
8186
; CHECK-COST: Cost of 0 for VF 8: WIDEN ir<%extract_a> = extractvalue vp<[[PACK]]>, ir<0>
8287
; CHECK-COST: Cost of 0 for VF 8: WIDEN ir<%extract_b> = extractvalue vp<[[PACK]]>, ir<1>
@@ -123,32 +128,38 @@ exit:
123128
; CHECK-COST: LV: Found an estimated cost of 0 for VF 1 For instruction: %extract_a = extractvalue { half, half } %call, 0
124129
; CHECK-COST: LV: Found an estimated cost of 0 for VF 1 For instruction: %extract_b = extractvalue { half, half } %call, 1
125130
;
126-
; CHECK-COST: Cost of 26 for VF 2: REPLICATE ir<%call> = call @foo(ir<%in_val>)
131+
; CHECK-COST: Cost of 0 for VF 2: EMIT vp<[[UNPACK:%.+]]> = unpack-into-scalars ir<%in_val>
132+
; CHECK-COST: Cost of 26 for VF 2: REPLICATE ir<%call> = call @foo(vp<[[UNPACK]]>)
127133
; CHECK-COST: Cost of 0 for VF 2: EMIT vp<[[PACK:%.+]]> = pack-into-vector ir<%call>
128134
; CHECK-COST: Cost of 0 for VF 2: WIDEN ir<%extract_a> = extractvalue vp<[[PACK]]>, ir<0>
129135
; CHECK-COST: Cost of 0 for VF 2: WIDEN ir<%extract_b> = extractvalue vp<[[PACK]]>, ir<1>
130136
;
131-
; CHECK-COST: Cost of 58 for VF 4: REPLICATE ir<%call> = call @foo(ir<%in_val>)
137+
; CHECK-COST: Cost of 0 for VF 4: EMIT vp<[[UNPACK:%.+]]> = unpack-into-scalars ir<%in_val>
138+
; CHECK-COST: Cost of 58 for VF 4: REPLICATE ir<%call> = call @foo(vp<[[UNPACK]]>)
132139
; CHECK-COST: Cost of 0 for VF 4: EMIT vp<[[PACK:%.+]]> = pack-into-vector ir<%call>
133140
; CHECK-COST: Cost of 0 for VF 4: WIDEN ir<%extract_a> = extractvalue vp<[[PACK]]>, ir<0>
134141
; CHECK-COST: Cost of 0 for VF 4: WIDEN ir<%extract_b> = extractvalue vp<[[PACK]]>, ir<1>
135142
;
136-
; CHECK-COST: Cost of 122 for VF 8: REPLICATE ir<%call> = call @foo(ir<%in_val>)
143+
; CHECK-COST: Cost of 0 for VF 8: EMIT vp<[[UNPACK:%.+]]> = unpack-into-scalars ir<%in_val>
144+
; CHECK-COST: Cost of 122 for VF 8: REPLICATE ir<%call> = call @foo(vp<[[UNPACK]]>)
137145
; CHECK-COST: Cost of 0 for VF 8: EMIT vp<[[PACK:%.+]]> = pack-into-vector ir<%call>
138146
; CHECK-COST: Cost of 0 for VF 8: WIDEN ir<%extract_a> = extractvalue vp<[[PACK]]>, ir<0>
139147
; CHECK-COST: Cost of 0 for VF 8: WIDEN ir<%extract_b> = extractvalue vp<[[PACK]]>, ir<1>
140148
;
141-
; CHECK-COST: Cost of Invalid for VF vscale x 1: REPLICATE ir<%call> = call @foo(ir<%in_val>)
149+
; CHECK-COST: Cost of 0 for VF vscale x 1: EMIT vp<[[UNPACK:%.+]]> = unpack-into-scalars ir<%in_val>
150+
; CHECK-COST: Cost of Invalid for VF vscale x 1: REPLICATE ir<%call> = call @foo(vp<[[UNPACK]]>)
142151
; CHECK-COST: Cost of 0 for VF vscale x 1: EMIT vp<[[PACK:%.+]]> = pack-into-vector ir<%call>
143152
; CHECK-COST: Cost of 0 for VF vscale x 1: WIDEN ir<%extract_a> = extractvalue vp<[[PACK]]>, ir<0>
144153
; CHECK-COST: Cost of 0 for VF vscale x 1: WIDEN ir<%extract_b> = extractvalue vp<[[PACK]]>, ir<1>
145154
;
146-
; CHECK-COST: Cost of Invalid for VF vscale x 2: REPLICATE ir<%call> = call @foo(ir<%in_val>)
155+
; CHECK-COST: Cost of 0 for VF vscale x 2: EMIT vp<[[UNPACK:%.+]]> = unpack-into-scalars ir<%in_val>
156+
; CHECK-COST: Cost of Invalid for VF vscale x 2: REPLICATE ir<%call> = call @foo(vp<[[UNPACK]]>)
147157
; CHECK-COST: Cost of 0 for VF vscale x 2: EMIT vp<[[PACK:%.+]]> = pack-into-vector ir<%call>
148158
; CHECK-COST: Cost of 0 for VF vscale x 2: WIDEN ir<%extract_a> = extractvalue vp<[[PACK]]>, ir<0>
149159
; CHECK-COST: Cost of 0 for VF vscale x 2: WIDEN ir<%extract_b> = extractvalue vp<[[PACK]]>, ir<1>
150160
;
151-
; CHECK-COST: Cost of Invalid for VF vscale x 4: REPLICATE ir<%call> = call @foo(ir<%in_val>)
161+
; CHECK-COST: Cost of 0 for VF vscale x 4: EMIT vp<[[UNPACK:%.+]]> = unpack-into-scalars ir<%in_val>
162+
; CHECK-COST: Cost of Invalid for VF vscale x 4: REPLICATE ir<%call> = call @foo(vp<[[UNPACK]]>)
152163
; CHECK-COST: Cost of 0 for VF vscale x 4: EMIT vp<[[PACK:%.+]]> = pack-into-vector ir<%call>
153164
; CHECK-COST: Cost of 0 for VF vscale x 4: WIDEN ir<%extract_a> = extractvalue vp<[[PACK]]>, ir<0>
154165
; CHECK-COST: Cost of 0 for VF vscale x 4: WIDEN ir<%extract_b> = extractvalue vp<[[PACK]]>, ir<1>

llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@ target triple = "aarch64-unknown-linux-gnu"
2828
; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%b>, vp<[[STEPS]]>
2929
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep>
3030
; CHECK-NEXT: WIDEN ir<%load> = load vp<[[VEC_PTR]]>
31-
; CHECK-NEXT: REPLICATE ir<%call> = call @foo(ir<%load>)
31+
; CHECK-NEXT: EMIT vp<[[UNPACK:%.+]]> = unpack-into-scalars ir<%load>
32+
; CHECK-NEXT: REPLICATE ir<%call> = call @foo(vp<[[UNPACK]]>)
3233
; CHECK-NEXT: EMIT vp<[[PACK:%.+]]> = pack-into-vector ir<%call>
3334
; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%a>, vp<[[STEPS]]>
3435
; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%arrayidx>

llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -768,15 +768,15 @@ define void @test_2xi32(ptr noalias %data, ptr noalias %factor) {
768768
; VF2-NEXT: [[TMP22:%.*]] = shufflevector <6 x i32> [[WIDE_VEC1]], <6 x i32> poison, <2 x i32> <i32 1, i32 4>
769769
; VF2-NEXT: [[TMP14:%.*]] = mul <2 x i32> [[TMP7]], [[TMP13]]
770770
; VF2-NEXT: [[TMP15:%.*]] = extractelement <2 x i32> [[TMP14]], i32 0
771-
; VF2-NEXT: store i32 [[TMP15]], ptr [[TMP8]], align 8
772771
; VF2-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[TMP14]], i32 1
772+
; VF2-NEXT: store i32 [[TMP15]], ptr [[TMP8]], align 8
773773
; VF2-NEXT: store i32 [[TMP16]], ptr [[TMP9]], align 8
774774
; VF2-NEXT: [[TMP17:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP0]], i32 1
775775
; VF2-NEXT: [[TMP18:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP1]], i32 1
776776
; VF2-NEXT: [[TMP23:%.*]] = mul <2 x i32> [[TMP7]], [[TMP22]]
777777
; VF2-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP23]], i32 0
778-
; VF2-NEXT: store i32 [[TMP24]], ptr [[TMP17]], align 8
779778
; VF2-NEXT: [[TMP25:%.*]] = extractelement <2 x i32> [[TMP23]], i32 1
779+
; VF2-NEXT: store i32 [[TMP24]], ptr [[TMP17]], align 8
780780
; VF2-NEXT: store i32 [[TMP25]], ptr [[TMP18]], align 8
781781
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
782782
; VF2-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 98
@@ -809,25 +809,25 @@ define void @test_2xi32(ptr noalias %data, ptr noalias %factor) {
809809
; VF4-NEXT: [[TMP44:%.*]] = shufflevector <12 x i32> [[WIDE_VEC1]], <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
810810
; VF4-NEXT: [[TMP28:%.*]] = mul <4 x i32> [[TMP15]], [[TMP27]]
811811
; VF4-NEXT: [[TMP29:%.*]] = extractelement <4 x i32> [[TMP28]], i32 0
812-
; VF4-NEXT: store i32 [[TMP29]], ptr [[TMP16]], align 8
813812
; VF4-NEXT: [[TMP30:%.*]] = extractelement <4 x i32> [[TMP28]], i32 1
814-
; VF4-NEXT: store i32 [[TMP30]], ptr [[TMP17]], align 8
815813
; VF4-NEXT: [[TMP31:%.*]] = extractelement <4 x i32> [[TMP28]], i32 2
816-
; VF4-NEXT: store i32 [[TMP31]], ptr [[TMP18]], align 8
817814
; VF4-NEXT: [[TMP32:%.*]] = extractelement <4 x i32> [[TMP28]], i32 3
815+
; VF4-NEXT: store i32 [[TMP29]], ptr [[TMP16]], align 8
816+
; VF4-NEXT: store i32 [[TMP30]], ptr [[TMP17]], align 8
817+
; VF4-NEXT: store i32 [[TMP31]], ptr [[TMP18]], align 8
818818
; VF4-NEXT: store i32 [[TMP32]], ptr [[TMP19]], align 8
819819
; VF4-NEXT: [[TMP33:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP0]], i32 1
820820
; VF4-NEXT: [[TMP34:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP1]], i32 1
821821
; VF4-NEXT: [[TMP35:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP2]], i32 1
822822
; VF4-NEXT: [[TMP36:%.*]] = getelementptr inbounds { i32, i32, i32 }, ptr [[DATA]], i64 [[TMP3]], i32 1
823823
; VF4-NEXT: [[TMP45:%.*]] = mul <4 x i32> [[TMP15]], [[TMP44]]
824824
; VF4-NEXT: [[TMP46:%.*]] = extractelement <4 x i32> [[TMP45]], i32 0
825-
; VF4-NEXT: store i32 [[TMP46]], ptr [[TMP33]], align 8
826825
; VF4-NEXT: [[TMP47:%.*]] = extractelement <4 x i32> [[TMP45]], i32 1
827-
; VF4-NEXT: store i32 [[TMP47]], ptr [[TMP34]], align 8
828826
; VF4-NEXT: [[TMP48:%.*]] = extractelement <4 x i32> [[TMP45]], i32 2
829-
; VF4-NEXT: store i32 [[TMP48]], ptr [[TMP35]], align 8
830827
; VF4-NEXT: [[TMP49:%.*]] = extractelement <4 x i32> [[TMP45]], i32 3
828+
; VF4-NEXT: store i32 [[TMP46]], ptr [[TMP33]], align 8
829+
; VF4-NEXT: store i32 [[TMP47]], ptr [[TMP34]], align 8
830+
; VF4-NEXT: store i32 [[TMP48]], ptr [[TMP35]], align 8
831831
; VF4-NEXT: store i32 [[TMP49]], ptr [[TMP36]], align 8
832832
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
833833
; VF4-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96

0 commit comments

Comments
 (0)