Skip to content

Commit 39605b4

Browse files
committed
!fixup address comments, thanks
1 parent dc2df3a commit 39605b4

File tree

5 files changed

+88
-92
lines changed

5 files changed

+88
-92
lines changed

llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,12 @@ m_ExtractLastElement(const Op0_t &Op0) {
368368
return m_VPInstruction<VPInstruction::ExtractLastElement>(Op0);
369369
}
370370

371+
template <typename Op0_t, typename Op1_t>
372+
inline VPInstruction_match<Instruction::ExtractElement, Op0_t, Op1_t>
373+
m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1) {
374+
return m_VPInstruction<Instruction::ExtractElement>(Op0, Op1);
375+
}
376+
371377
template <typename Op0_t, typename Op1_t, typename Op2_t>
372378
inline VPInstruction_match<VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t>
373379
m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) {

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1224,12 +1224,10 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
12241224
return;
12251225
}
12261226

1227-
VPValue *Idx;
1228-
if (match(&R, m_VPInstruction<Instruction::ExtractElement>(m_BuildVector(),
1229-
m_VPValue(Idx)))) {
1227+
uint64_t Idx;
1228+
if (match(&R, m_ExtractElement(m_BuildVector(), m_ConstantInt(Idx)))) {
12301229
auto *BuildVector = cast<VPInstruction>(R.getOperand(0));
1231-
Def->replaceAllUsesWith(BuildVector->getOperand(
1232-
cast<ConstantInt>(Idx->getLiveInIRValue())->getZExtValue()));
1230+
Def->replaceAllUsesWith(BuildVector->getOperand(Idx));
12331231
return;
12341232
}
12351233

@@ -3806,29 +3804,23 @@ void VPlanTransforms::materializeBuildAndUnpackVectors(VPlan &Plan) {
38063804
}
38073805

38083806
// Create explicit VPInstructions to convert vectors to scalars.
3809-
for (VPBasicBlock *VPBB :
3810-
concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion)) {
3807+
for (VPBasicBlock *VPBB : VPBBsInsideLoopRegion) {
38113808
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
38123809
if (isa<VPReplicateRecipe, VPInstruction, VPScalarIVStepsRecipe>(&R))
38133810
continue;
38143811
for (VPValue *Def : R.definedValues()) {
38153812
if (vputils::isSingleScalar(Def) || vputils::onlyFirstLaneUsed(Def))
38163813
continue;
38173814

3818-
if (VPBB->getParent() != Plan.getVectorLoopRegion())
3819-
continue;
3820-
3821-
auto UsesVectorOrInsideReplicateRegion = [LoopRegion](VPUser *U) {
3815+
auto IsInsideReplicateRegion = [LoopRegion](VPUser *U) {
38223816
VPRegionBlock *ParentRegion =
38233817
cast<VPRecipeBase>(U)->getParent()->getParent();
38243818
return ParentRegion && ParentRegion != LoopRegion;
38253819
};
38263820

3827-
if (none_of(Def->users(),
3828-
[Def, &UsesVectorOrInsideReplicateRegion](VPUser *U) {
3829-
return !UsesVectorOrInsideReplicateRegion(U) &&
3830-
U->usesScalars(Def);
3831-
}))
3821+
if (none_of(Def->users(), [Def, &IsInsideReplicateRegion](VPUser *U) {
3822+
return !IsInsideReplicateRegion(U) && U->usesScalars(Def);
3823+
}))
38323824
continue;
38333825

38343826
auto *UnpackVector =
@@ -3838,10 +3830,8 @@ void VPlanTransforms::materializeBuildAndUnpackVectors(VPlan &Plan) {
38383830
else
38393831
UnpackVector->insertAfter(&R);
38403832
Def->replaceUsesWithIf(
3841-
UnpackVector,
3842-
[Def, &UsesVectorOrInsideReplicateRegion](VPUser &U, unsigned) {
3843-
return !UsesVectorOrInsideReplicateRegion(&U) &&
3844-
U.usesScalars(Def);
3833+
UnpackVector, [Def, &IsInsideReplicateRegion](VPUser &U, unsigned) {
3834+
return !IsInsideReplicateRegion(&U) && U.usesScalars(Def);
38453835
});
38463836
}
38473837
}

llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll

Lines changed: 54 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,25 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 {
4545
; I64-NEXT: [[TMP18:%.*]] = add i32 [[INDEX]], 14
4646
; I64-NEXT: [[TMP19:%.*]] = add i32 [[INDEX]], 15
4747
; I64-NEXT: [[TMP20:%.*]] = uitofp <4 x i32> [[VEC_IND]] to <4 x double>
48+
; I64-NEXT: [[CONV:%.*]] = extractelement <4 x double> [[TMP20]], i32 0
49+
; I64-NEXT: [[TMP57:%.*]] = extractelement <4 x double> [[TMP20]], i32 1
50+
; I64-NEXT: [[TMP58:%.*]] = extractelement <4 x double> [[TMP20]], i32 2
51+
; I64-NEXT: [[TMP59:%.*]] = extractelement <4 x double> [[TMP20]], i32 3
4852
; I64-NEXT: [[TMP21:%.*]] = uitofp <4 x i32> [[STEP_ADD]] to <4 x double>
53+
; I64-NEXT: [[TMP60:%.*]] = extractelement <4 x double> [[TMP21]], i32 0
54+
; I64-NEXT: [[TMP61:%.*]] = extractelement <4 x double> [[TMP21]], i32 1
55+
; I64-NEXT: [[TMP62:%.*]] = extractelement <4 x double> [[TMP21]], i32 2
56+
; I64-NEXT: [[TMP63:%.*]] = extractelement <4 x double> [[TMP21]], i32 3
4957
; I64-NEXT: [[TMP22:%.*]] = uitofp <4 x i32> [[STEP_ADD_2]] to <4 x double>
58+
; I64-NEXT: [[TMP64:%.*]] = extractelement <4 x double> [[TMP22]], i32 0
59+
; I64-NEXT: [[TMP65:%.*]] = extractelement <4 x double> [[TMP22]], i32 1
60+
; I64-NEXT: [[TMP66:%.*]] = extractelement <4 x double> [[TMP22]], i32 2
61+
; I64-NEXT: [[TMP67:%.*]] = extractelement <4 x double> [[TMP22]], i32 3
5062
; I64-NEXT: [[TMP23:%.*]] = uitofp <4 x i32> [[STEP_ADD_3]] to <4 x double>
63+
; I64-NEXT: [[TMP68:%.*]] = extractelement <4 x double> [[TMP23]], i32 0
64+
; I64-NEXT: [[TMP69:%.*]] = extractelement <4 x double> [[TMP23]], i32 1
65+
; I64-NEXT: [[TMP70:%.*]] = extractelement <4 x double> [[TMP23]], i32 2
66+
; I64-NEXT: [[TMP71:%.*]] = extractelement <4 x double> [[TMP23]], i32 3
5167
; I64-NEXT: [[ADD_PTR_I:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[IV]]
5268
; I64-NEXT: [[TMP25:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP5]]
5369
; I64-NEXT: [[TMP26:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP6]]
@@ -80,37 +96,21 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 {
8096
; I64-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP37]], align 4
8197
; I64-NEXT: [[TMP54:%.*]] = load ptr, ptr [[TMP38]], align 4
8298
; I64-NEXT: [[TMP55:%.*]] = load ptr, ptr [[TMP39]], align 4
83-
; I64-NEXT: [[CONV:%.*]] = extractelement <4 x double> [[TMP20]], i32 0
8499
; I64-NEXT: store double [[CONV]], ptr [[TMP0]], align 4
85-
; I64-NEXT: [[TMP57:%.*]] = extractelement <4 x double> [[TMP20]], i32 1
86100
; I64-NEXT: store double [[TMP57]], ptr [[TMP41]], align 4
87-
; I64-NEXT: [[TMP58:%.*]] = extractelement <4 x double> [[TMP20]], i32 2
88101
; I64-NEXT: store double [[TMP58]], ptr [[TMP42]], align 4
89-
; I64-NEXT: [[TMP59:%.*]] = extractelement <4 x double> [[TMP20]], i32 3
90102
; I64-NEXT: store double [[TMP59]], ptr [[TMP43]], align 4
91-
; I64-NEXT: [[TMP60:%.*]] = extractelement <4 x double> [[TMP21]], i32 0
92103
; I64-NEXT: store double [[TMP60]], ptr [[TMP44]], align 4
93-
; I64-NEXT: [[TMP61:%.*]] = extractelement <4 x double> [[TMP21]], i32 1
94104
; I64-NEXT: store double [[TMP61]], ptr [[TMP45]], align 4
95-
; I64-NEXT: [[TMP62:%.*]] = extractelement <4 x double> [[TMP21]], i32 2
96105
; I64-NEXT: store double [[TMP62]], ptr [[TMP46]], align 4
97-
; I64-NEXT: [[TMP63:%.*]] = extractelement <4 x double> [[TMP21]], i32 3
98106
; I64-NEXT: store double [[TMP63]], ptr [[TMP47]], align 4
99-
; I64-NEXT: [[TMP64:%.*]] = extractelement <4 x double> [[TMP22]], i32 0
100107
; I64-NEXT: store double [[TMP64]], ptr [[TMP48]], align 4
101-
; I64-NEXT: [[TMP65:%.*]] = extractelement <4 x double> [[TMP22]], i32 1
102108
; I64-NEXT: store double [[TMP65]], ptr [[TMP49]], align 4
103-
; I64-NEXT: [[TMP66:%.*]] = extractelement <4 x double> [[TMP22]], i32 2
104109
; I64-NEXT: store double [[TMP66]], ptr [[TMP50]], align 4
105-
; I64-NEXT: [[TMP67:%.*]] = extractelement <4 x double> [[TMP22]], i32 3
106110
; I64-NEXT: store double [[TMP67]], ptr [[TMP51]], align 4
107-
; I64-NEXT: [[TMP68:%.*]] = extractelement <4 x double> [[TMP23]], i32 0
108111
; I64-NEXT: store double [[TMP68]], ptr [[TMP52]], align 4
109-
; I64-NEXT: [[TMP69:%.*]] = extractelement <4 x double> [[TMP23]], i32 1
110112
; I64-NEXT: store double [[TMP69]], ptr [[TMP53]], align 4
111-
; I64-NEXT: [[TMP70:%.*]] = extractelement <4 x double> [[TMP23]], i32 2
112113
; I64-NEXT: store double [[TMP70]], ptr [[TMP54]], align 4
113-
; I64-NEXT: [[TMP71:%.*]] = extractelement <4 x double> [[TMP23]], i32 3
114114
; I64-NEXT: store double [[TMP71]], ptr [[TMP55]], align 4
115115
; I64-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
116116
; I64-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD_3]], splat (i32 4)
@@ -139,21 +139,21 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 {
139139
; I64-NEXT: [[TMP77:%.*]] = add i32 [[INDEX4]], 2
140140
; I64-NEXT: [[TMP78:%.*]] = add i32 [[INDEX4]], 3
141141
; I64-NEXT: [[TMP79:%.*]] = uitofp <4 x i32> [[VEC_IND5]] to <4 x double>
142-
; I64-NEXT: [[TMP80:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP75]]
143-
; I64-NEXT: [[TMP81:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP76]]
144-
; I64-NEXT: [[TMP82:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP77]]
145-
; I64-NEXT: [[TMP83:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP78]]
146-
; I64-NEXT: [[TMP84:%.*]] = load ptr, ptr [[TMP80]], align 4
147-
; I64-NEXT: [[TMP85:%.*]] = load ptr, ptr [[TMP81]], align 4
148-
; I64-NEXT: [[TMP86:%.*]] = load ptr, ptr [[TMP82]], align 4
149-
; I64-NEXT: [[TMP87:%.*]] = load ptr, ptr [[TMP83]], align 4
150142
; I64-NEXT: [[TMP88:%.*]] = extractelement <4 x double> [[TMP79]], i32 0
151-
; I64-NEXT: store double [[TMP88]], ptr [[TMP84]], align 4
152143
; I64-NEXT: [[TMP89:%.*]] = extractelement <4 x double> [[TMP79]], i32 1
153-
; I64-NEXT: store double [[TMP89]], ptr [[TMP85]], align 4
154144
; I64-NEXT: [[TMP90:%.*]] = extractelement <4 x double> [[TMP79]], i32 2
155-
; I64-NEXT: store double [[TMP90]], ptr [[TMP86]], align 4
156145
; I64-NEXT: [[TMP91:%.*]] = extractelement <4 x double> [[TMP79]], i32 3
146+
; I64-NEXT: [[TMP84:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP75]]
147+
; I64-NEXT: [[TMP85:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP76]]
148+
; I64-NEXT: [[TMP86:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP77]]
149+
; I64-NEXT: [[TMP93:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP78]]
150+
; I64-NEXT: [[TMP94:%.*]] = load ptr, ptr [[TMP84]], align 4
151+
; I64-NEXT: [[TMP95:%.*]] = load ptr, ptr [[TMP85]], align 4
152+
; I64-NEXT: [[TMP96:%.*]] = load ptr, ptr [[TMP86]], align 4
153+
; I64-NEXT: [[TMP87:%.*]] = load ptr, ptr [[TMP93]], align 4
154+
; I64-NEXT: store double [[TMP88]], ptr [[TMP94]], align 4
155+
; I64-NEXT: store double [[TMP89]], ptr [[TMP95]], align 4
156+
; I64-NEXT: store double [[TMP90]], ptr [[TMP96]], align 4
157157
; I64-NEXT: store double [[TMP91]], ptr [[TMP87]], align 4
158158
; I64-NEXT: [[INDEX_NEXT6]] = add nuw i32 [[INDEX4]], 4
159159
; I64-NEXT: [[VEC_IND_NEXT7]] = add <4 x i32> [[VEC_IND5]], splat (i32 4)
@@ -201,9 +201,25 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 {
201201
; I32-NEXT: [[TMP42:%.*]] = add i32 [[INDEX]], 14
202202
; I32-NEXT: [[TMP43:%.*]] = add i32 [[INDEX]], 15
203203
; I32-NEXT: [[TMP44:%.*]] = uitofp <4 x i32> [[VEC_IND]] to <4 x double>
204+
; I32-NEXT: [[TMP31:%.*]] = extractelement <4 x double> [[TMP44]], i32 0
205+
; I32-NEXT: [[TMP32:%.*]] = extractelement <4 x double> [[TMP44]], i32 1
206+
; I32-NEXT: [[TMP33:%.*]] = extractelement <4 x double> [[TMP44]], i32 2
207+
; I32-NEXT: [[TMP34:%.*]] = extractelement <4 x double> [[TMP44]], i32 3
204208
; I32-NEXT: [[TMP45:%.*]] = uitofp <4 x i32> [[STEP_ADD]] to <4 x double>
209+
; I32-NEXT: [[TMP35:%.*]] = extractelement <4 x double> [[TMP45]], i32 0
210+
; I32-NEXT: [[TMP36:%.*]] = extractelement <4 x double> [[TMP45]], i32 1
211+
; I32-NEXT: [[TMP37:%.*]] = extractelement <4 x double> [[TMP45]], i32 2
212+
; I32-NEXT: [[TMP38:%.*]] = extractelement <4 x double> [[TMP45]], i32 3
205213
; I32-NEXT: [[TMP46:%.*]] = uitofp <4 x i32> [[STEP_ADD_2]] to <4 x double>
214+
; I32-NEXT: [[TMP63:%.*]] = extractelement <4 x double> [[TMP46]], i32 0
215+
; I32-NEXT: [[TMP64:%.*]] = extractelement <4 x double> [[TMP46]], i32 1
216+
; I32-NEXT: [[TMP65:%.*]] = extractelement <4 x double> [[TMP46]], i32 2
217+
; I32-NEXT: [[TMP66:%.*]] = extractelement <4 x double> [[TMP46]], i32 3
206218
; I32-NEXT: [[TMP55:%.*]] = uitofp <4 x i32> [[STEP_ADD_3]] to <4 x double>
219+
; I32-NEXT: [[TMP67:%.*]] = extractelement <4 x double> [[TMP55]], i32 0
220+
; I32-NEXT: [[TMP68:%.*]] = extractelement <4 x double> [[TMP55]], i32 1
221+
; I32-NEXT: [[TMP69:%.*]] = extractelement <4 x double> [[TMP55]], i32 2
222+
; I32-NEXT: [[TMP70:%.*]] = extractelement <4 x double> [[TMP55]], i32 3
207223
; I32-NEXT: [[TMP15:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP3]]
208224
; I32-NEXT: [[TMP16:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP4]]
209225
; I32-NEXT: [[TMP17:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP5]]
@@ -236,37 +252,21 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 {
236252
; I32-NEXT: [[TMP52:%.*]] = load ptr, ptr [[TMP61]], align 4
237253
; I32-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP62]], align 4
238254
; I32-NEXT: [[TMP54:%.*]] = load ptr, ptr [[TMP71]], align 4
239-
; I32-NEXT: [[TMP31:%.*]] = extractelement <4 x double> [[TMP44]], i32 0
240255
; I32-NEXT: store double [[TMP31]], ptr [[TMP23]], align 4
241-
; I32-NEXT: [[TMP32:%.*]] = extractelement <4 x double> [[TMP44]], i32 1
242256
; I32-NEXT: store double [[TMP32]], ptr [[TMP24]], align 4
243-
; I32-NEXT: [[TMP33:%.*]] = extractelement <4 x double> [[TMP44]], i32 2
244257
; I32-NEXT: store double [[TMP33]], ptr [[TMP25]], align 4
245-
; I32-NEXT: [[TMP34:%.*]] = extractelement <4 x double> [[TMP44]], i32 3
246258
; I32-NEXT: store double [[TMP34]], ptr [[TMP26]], align 4
247-
; I32-NEXT: [[TMP35:%.*]] = extractelement <4 x double> [[TMP45]], i32 0
248259
; I32-NEXT: store double [[TMP35]], ptr [[TMP27]], align 4
249-
; I32-NEXT: [[TMP36:%.*]] = extractelement <4 x double> [[TMP45]], i32 1
250260
; I32-NEXT: store double [[TMP36]], ptr [[TMP28]], align 4
251-
; I32-NEXT: [[TMP37:%.*]] = extractelement <4 x double> [[TMP45]], i32 2
252261
; I32-NEXT: store double [[TMP37]], ptr [[TMP29]], align 4
253-
; I32-NEXT: [[TMP38:%.*]] = extractelement <4 x double> [[TMP45]], i32 3
254262
; I32-NEXT: store double [[TMP38]], ptr [[TMP30]], align 4
255-
; I32-NEXT: [[TMP63:%.*]] = extractelement <4 x double> [[TMP46]], i32 0
256263
; I32-NEXT: store double [[TMP63]], ptr [[TMP47]], align 4
257-
; I32-NEXT: [[TMP64:%.*]] = extractelement <4 x double> [[TMP46]], i32 1
258264
; I32-NEXT: store double [[TMP64]], ptr [[TMP48]], align 4
259-
; I32-NEXT: [[TMP65:%.*]] = extractelement <4 x double> [[TMP46]], i32 2
260265
; I32-NEXT: store double [[TMP65]], ptr [[TMP49]], align 4
261-
; I32-NEXT: [[TMP66:%.*]] = extractelement <4 x double> [[TMP46]], i32 3
262266
; I32-NEXT: store double [[TMP66]], ptr [[TMP50]], align 4
263-
; I32-NEXT: [[TMP67:%.*]] = extractelement <4 x double> [[TMP55]], i32 0
264267
; I32-NEXT: store double [[TMP67]], ptr [[TMP51]], align 4
265-
; I32-NEXT: [[TMP68:%.*]] = extractelement <4 x double> [[TMP55]], i32 1
266268
; I32-NEXT: store double [[TMP68]], ptr [[TMP52]], align 4
267-
; I32-NEXT: [[TMP69:%.*]] = extractelement <4 x double> [[TMP55]], i32 2
268269
; I32-NEXT: store double [[TMP69]], ptr [[TMP53]], align 4
269-
; I32-NEXT: [[TMP70:%.*]] = extractelement <4 x double> [[TMP55]], i32 3
270270
; I32-NEXT: store double [[TMP70]], ptr [[TMP54]], align 4
271271
; I32-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
272272
; I32-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD_3]], splat (i32 4)
@@ -295,21 +295,21 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 {
295295
; I32-NEXT: [[TMP76:%.*]] = add i32 [[INDEX4]], 2
296296
; I32-NEXT: [[TMP77:%.*]] = add i32 [[INDEX4]], 3
297297
; I32-NEXT: [[TMP78:%.*]] = uitofp <4 x i32> [[VEC_IND5]] to <4 x double>
298-
; I32-NEXT: [[TMP79:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP74]]
299-
; I32-NEXT: [[TMP80:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP75]]
300-
; I32-NEXT: [[TMP81:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP76]]
301-
; I32-NEXT: [[TMP82:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP77]]
302-
; I32-NEXT: [[TMP83:%.*]] = load ptr, ptr [[TMP79]], align 4
303-
; I32-NEXT: [[TMP84:%.*]] = load ptr, ptr [[TMP80]], align 4
304-
; I32-NEXT: [[TMP85:%.*]] = load ptr, ptr [[TMP81]], align 4
305-
; I32-NEXT: [[TMP86:%.*]] = load ptr, ptr [[TMP82]], align 4
306298
; I32-NEXT: [[TMP87:%.*]] = extractelement <4 x double> [[TMP78]], i32 0
307-
; I32-NEXT: store double [[TMP87]], ptr [[TMP83]], align 4
308299
; I32-NEXT: [[TMP88:%.*]] = extractelement <4 x double> [[TMP78]], i32 1
309-
; I32-NEXT: store double [[TMP88]], ptr [[TMP84]], align 4
310300
; I32-NEXT: [[TMP89:%.*]] = extractelement <4 x double> [[TMP78]], i32 2
311-
; I32-NEXT: store double [[TMP89]], ptr [[TMP85]], align 4
312301
; I32-NEXT: [[TMP90:%.*]] = extractelement <4 x double> [[TMP78]], i32 3
302+
; I32-NEXT: [[TMP83:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP74]]
303+
; I32-NEXT: [[TMP84:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP75]]
304+
; I32-NEXT: [[TMP85:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP76]]
305+
; I32-NEXT: [[TMP92:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP77]]
306+
; I32-NEXT: [[TMP93:%.*]] = load ptr, ptr [[TMP83]], align 4
307+
; I32-NEXT: [[TMP94:%.*]] = load ptr, ptr [[TMP84]], align 4
308+
; I32-NEXT: [[TMP95:%.*]] = load ptr, ptr [[TMP85]], align 4
309+
; I32-NEXT: [[TMP86:%.*]] = load ptr, ptr [[TMP92]], align 4
310+
; I32-NEXT: store double [[TMP87]], ptr [[TMP93]], align 4
311+
; I32-NEXT: store double [[TMP88]], ptr [[TMP94]], align 4
312+
; I32-NEXT: store double [[TMP89]], ptr [[TMP95]], align 4
313313
; I32-NEXT: store double [[TMP90]], ptr [[TMP86]], align 4
314314
; I32-NEXT: [[INDEX_NEXT6]] = add nuw i32 [[INDEX4]], 4
315315
; I32-NEXT: [[VEC_IND_NEXT7]] = add <4 x i32> [[VEC_IND5]], splat (i32 4)

llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,8 +131,8 @@ define void @widen_ptr_induction_dbg(ptr %start, ptr %end) {
131131
; DEBUGLOC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
132132
; DEBUGLOC-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], %[[VECTOR_PH]] ], [ [[PTR_IND:%.*]], %[[VECTOR_BODY]] ], !dbg [[DBG35:![0-9]+]]
133133
; DEBUGLOC-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> <i64 0, i64 8, i64 16, i64 24>, !dbg [[DBG35]]
134-
; DEBUGLOC-NEXT: [[TMP6:%.*]] = extractelement <4 x ptr> [[VECTOR_GEP]], i32 0, !dbg [[DBG36:![0-9]+]]
135-
; DEBUGLOC-NEXT: store <4 x ptr> [[VECTOR_GEP]], ptr [[TMP6]], align 1, !dbg [[DBG36]]
134+
; DEBUGLOC-NEXT: [[TMP6:%.*]] = extractelement <4 x ptr> [[VECTOR_GEP]], i32 0
135+
; DEBUGLOC-NEXT: store <4 x ptr> [[VECTOR_GEP]], ptr [[TMP6]], align 1, !dbg [[DBG36:![0-9]+]]
136136
; DEBUGLOC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
137137
; DEBUGLOC-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 32, !dbg [[DBG35]]
138138
; DEBUGLOC-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]], !dbg [[DBG37:![0-9]+]]

0 commit comments

Comments
 (0)