Skip to content

Commit cb03669

Browse files
committed
[VPlan] Fix packed replication of struct types
I ran into this crash when llvm#158690 caused a loop with a struct call to be vectorized. If we have a replicate recipe in a branch-on-mask predicated region that's used by a widened recipe in another block then it will be packed together with the other lanes via a VPPredInstPHIRecipe. If we're replicating a call with a struct return type then we currently crash. The code that handles structs in packScalarIntoVectorizedValue seemed to be untested at least on test/Transforms/LoopVectorize. There's two places that need to be fixed. The poison value that the scalar is packed into needs to use toVectorizedTy to correctly handle structs (not to be confused with toVectorTy!) The other is that VPPredInstPHIRecipe expects its operand to be an InsertElementInstr when stringing together the different lanes. For structs this will be an InsertVlaueInstr, and the value for the previous lane will be at the back of a chain of InsertValueInstrs.
1 parent 70ab120 commit cb03669

File tree

2 files changed

+239
-10
lines changed

2 files changed

+239
-10
lines changed

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3051,7 +3051,7 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
30513051
if (State.VF.isVector() && shouldPack()) {
30523052
Value *WideValue =
30533053
State.Lane->isFirstLane()
3054-
? PoisonValue::get(VectorType::get(UI->getType(), State.VF))
3054+
? PoisonValue::get(toVectorizedTy(UI->getType(), State.VF))
30553055
: State.get(this);
30563056
State.set(this, State.packScalarIntoVectorizedValue(this, WideValue,
30573057
*State.Lane));
@@ -3267,11 +3267,21 @@ void VPPredInstPHIRecipe::execute(VPTransformState &State) {
32673267
// also do that packing, thereby "hoisting" the insert-element sequence.
32683268
// Otherwise, a phi node for the scalar value is needed.
32693269
if (State.hasVectorValue(getOperand(0))) {
3270-
Value *VectorValue = State.get(getOperand(0));
3271-
InsertElementInst *IEI = cast<InsertElementInst>(VectorValue);
3272-
PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2);
3273-
VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector.
3274-
VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element.
3270+
auto *VecI = cast<Instruction>(State.get(getOperand(0)));
3271+
assert(isa<InsertElementInst>(VecI) || isa<InsertValueInst>(VecI));
3272+
3273+
// If VectorI is a struct, it will be a sequence like:
3274+
// %1 = insertvalue %unmodified, %x, 0
3275+
// %2 = insertvalue %1, %y, 1
3276+
// %VectorI = insertvalue %2, %z, 2
3277+
// To get the unmodified vector we need to look through the chain.
3278+
if (auto *StructTy = dyn_cast<StructType>(VecI->getType()))
3279+
for (unsigned I = 0; I < StructTy->getNumContainedTypes() - 1; I++)
3280+
VecI = cast<Instruction>(VecI->getOperand(0));
3281+
3282+
PHINode *VPhi = State.Builder.CreatePHI(VecI->getType(), 2);
3283+
VPhi->addIncoming(VecI->getOperand(0), PredicatingBB); // Unmodified vector.
3284+
VPhi->addIncoming(VecI, PredicatedBB); // New vector with inserted element.
32753285
if (State.hasVectorValue(this))
32763286
State.reset(this, VPhi);
32773287
else

llvm/test/Transforms/LoopVectorize/struct-return-replicate.ll

Lines changed: 223 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ define void @struct_return_2xf32_replicate(ptr noalias %in, ptr noalias writeonl
166166
; VF4-NEXT: store <4 x float> [[TMP42]], ptr [[TMP45]], align 4
167167
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
168168
; VF4-NEXT: [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
169-
; VF4-NEXT: br i1 [[TMP47]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
169+
; VF4-NEXT: br i1 [[TMP47]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
170170
; VF4: [[MIDDLE_BLOCK]]:
171171
;
172172
; VF2IC2-LABEL: define void @struct_return_2xf32_replicate(
@@ -233,7 +233,7 @@ define void @struct_return_2xf32_replicate(ptr noalias %in, ptr noalias writeonl
233233
; VF2IC2-NEXT: store <2 x float> [[TMP44]], ptr [[TMP50]], align 4
234234
; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
235235
; VF2IC2-NEXT: [[TMP51:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
236-
; VF2IC2-NEXT: br i1 [[TMP51]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
236+
; VF2IC2-NEXT: br i1 [[TMP51]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
237237
; VF2IC2: [[MIDDLE_BLOCK]]:
238238
;
239239
entry:
@@ -336,7 +336,7 @@ define void @struct_return_3xi32_replicate(ptr noalias %in, ptr noalias writeonl
336336
; VF4-NEXT: store <4 x i32> [[TMP63]], ptr [[TMP64]], align 4
337337
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
338338
; VF4-NEXT: [[TMP66:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
339-
; VF4-NEXT: br i1 [[TMP66]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
339+
; VF4-NEXT: br i1 [[TMP66]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
340340
; VF4: [[MIDDLE_BLOCK]]:
341341
;
342342
; VF2IC2-LABEL: define void @struct_return_3xi32_replicate(
@@ -425,7 +425,7 @@ define void @struct_return_3xi32_replicate(ptr noalias %in, ptr noalias writeonl
425425
; VF2IC2-NEXT: store <2 x i32> [[TMP68]], ptr [[TMP71]], align 4
426426
; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
427427
; VF2IC2-NEXT: [[TMP72:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
428-
; VF2IC2-NEXT: br i1 [[TMP72]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
428+
; VF2IC2-NEXT: br i1 [[TMP72]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
429429
; VF2IC2: [[MIDDLE_BLOCK]]:
430430
;
431431
entry:
@@ -453,6 +453,224 @@ exit:
453453
ret void
454454
}
455455

456+
define void @struct_return_2xf32_replicate_predicated(ptr %a) {
457+
; CHECK-LABEL: define void @scalarized_predicated_struct_return
458+
; CHECK: vector.body:
459+
; CHECK: [[WIDE_CALL:%.*]] = call { <vscale x 2 x double>, <vscale x 2 x double> } @scalable_vec_masked_bar(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
460+
; VF4-LABEL: define void @struct_return_2xf32_replicate_predicated(
461+
; VF4-SAME: ptr [[A:%.*]]) {
462+
; VF4-NEXT: [[ENTRY:.*:]]
463+
; VF4-NEXT: br label %[[VECTOR_PH:.*]]
464+
; VF4: [[VECTOR_PH]]:
465+
; VF4-NEXT: br label %[[VECTOR_BODY:.*]]
466+
; VF4: [[VECTOR_BODY]]:
467+
; VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE12:.*]] ]
468+
; VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
469+
; VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 8
470+
; VF4-NEXT: [[TMP1:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], zeroinitializer
471+
; VF4-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
472+
; VF4-NEXT: br i1 [[TMP2]], label %[[PRED_CALL_IF:.*]], label %[[PRED_CALL_CONTINUE:.*]]
473+
; VF4: [[PRED_CALL_IF]]:
474+
; VF4-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 0
475+
; VF4-NEXT: [[TMP4:%.*]] = tail call { float, float } @fn2(float [[TMP3]]) #[[ATTR3:[0-9]+]]
476+
; VF4-NEXT: [[TMP5:%.*]] = extractvalue { float, float } [[TMP4]], 0
477+
; VF4-NEXT: [[TMP6:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i32 0
478+
; VF4-NEXT: [[TMP7:%.*]] = insertvalue { <4 x float>, <4 x float> } poison, <4 x float> [[TMP6]], 0
479+
; VF4-NEXT: [[TMP8:%.*]] = extractvalue { float, float } [[TMP4]], 1
480+
; VF4-NEXT: [[TMP9:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP7]], 1
481+
; VF4-NEXT: [[TMP10:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP8]], i32 0
482+
; VF4-NEXT: [[TMP11:%.*]] = insertvalue { <4 x float>, <4 x float> } [[TMP7]], <4 x float> [[TMP10]], 1
483+
; VF4-NEXT: br label %[[PRED_CALL_CONTINUE]]
484+
; VF4: [[PRED_CALL_CONTINUE]]:
485+
; VF4-NEXT: [[TMP12:%.*]] = phi { <4 x float>, <4 x float> } [ poison, %[[VECTOR_BODY]] ], [ [[TMP7]], %[[PRED_CALL_IF]] ]
486+
; VF4-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
487+
; VF4-NEXT: br i1 [[TMP13]], label %[[PRED_CALL_IF1:.*]], label %[[PRED_CALL_CONTINUE2:.*]]
488+
; VF4: [[PRED_CALL_IF1]]:
489+
; VF4-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 1
490+
; VF4-NEXT: [[TMP15:%.*]] = tail call { float, float } @fn2(float [[TMP14]]) #[[ATTR3]]
491+
; VF4-NEXT: [[TMP16:%.*]] = extractvalue { float, float } [[TMP15]], 0
492+
; VF4-NEXT: [[TMP17:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP12]], 0
493+
; VF4-NEXT: [[TMP18:%.*]] = insertelement <4 x float> [[TMP17]], float [[TMP16]], i32 1
494+
; VF4-NEXT: [[TMP19:%.*]] = insertvalue { <4 x float>, <4 x float> } [[TMP12]], <4 x float> [[TMP18]], 0
495+
; VF4-NEXT: [[TMP20:%.*]] = extractvalue { float, float } [[TMP15]], 1
496+
; VF4-NEXT: [[TMP21:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP19]], 1
497+
; VF4-NEXT: [[TMP22:%.*]] = insertelement <4 x float> [[TMP21]], float [[TMP20]], i32 1
498+
; VF4-NEXT: [[TMP23:%.*]] = insertvalue { <4 x float>, <4 x float> } [[TMP19]], <4 x float> [[TMP22]], 1
499+
; VF4-NEXT: br label %[[PRED_CALL_CONTINUE2]]
500+
; VF4: [[PRED_CALL_CONTINUE2]]:
501+
; VF4-NEXT: [[TMP24:%.*]] = phi { <4 x float>, <4 x float> } [ [[TMP12]], %[[PRED_CALL_CONTINUE]] ], [ [[TMP19]], %[[PRED_CALL_IF1]] ]
502+
; VF4-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
503+
; VF4-NEXT: br i1 [[TMP25]], label %[[PRED_CALL_IF3:.*]], label %[[PRED_CALL_CONTINUE4:.*]]
504+
; VF4: [[PRED_CALL_IF3]]:
505+
; VF4-NEXT: [[TMP26:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 2
506+
; VF4-NEXT: [[TMP27:%.*]] = tail call { float, float } @fn2(float [[TMP26]]) #[[ATTR3]]
507+
; VF4-NEXT: [[TMP28:%.*]] = extractvalue { float, float } [[TMP27]], 0
508+
; VF4-NEXT: [[TMP29:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP24]], 0
509+
; VF4-NEXT: [[TMP30:%.*]] = insertelement <4 x float> [[TMP29]], float [[TMP28]], i32 2
510+
; VF4-NEXT: [[TMP31:%.*]] = insertvalue { <4 x float>, <4 x float> } [[TMP24]], <4 x float> [[TMP30]], 0
511+
; VF4-NEXT: [[TMP32:%.*]] = extractvalue { float, float } [[TMP27]], 1
512+
; VF4-NEXT: [[TMP33:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP31]], 1
513+
; VF4-NEXT: [[TMP34:%.*]] = insertelement <4 x float> [[TMP33]], float [[TMP32]], i32 2
514+
; VF4-NEXT: [[TMP35:%.*]] = insertvalue { <4 x float>, <4 x float> } [[TMP31]], <4 x float> [[TMP34]], 1
515+
; VF4-NEXT: br label %[[PRED_CALL_CONTINUE4]]
516+
; VF4: [[PRED_CALL_CONTINUE4]]:
517+
; VF4-NEXT: [[TMP36:%.*]] = phi { <4 x float>, <4 x float> } [ [[TMP24]], %[[PRED_CALL_CONTINUE2]] ], [ [[TMP31]], %[[PRED_CALL_IF3]] ]
518+
; VF4-NEXT: [[TMP37:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
519+
; VF4-NEXT: br i1 [[TMP37]], label %[[PRED_CALL_IF5:.*]], label %[[PRED_CALL_CONTINUE6:.*]]
520+
; VF4: [[PRED_CALL_IF5]]:
521+
; VF4-NEXT: [[TMP38:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 3
522+
; VF4-NEXT: [[TMP39:%.*]] = tail call { float, float } @fn2(float [[TMP38]]) #[[ATTR3]]
523+
; VF4-NEXT: [[TMP40:%.*]] = extractvalue { float, float } [[TMP39]], 0
524+
; VF4-NEXT: [[TMP41:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP36]], 0
525+
; VF4-NEXT: [[TMP42:%.*]] = insertelement <4 x float> [[TMP41]], float [[TMP40]], i32 3
526+
; VF4-NEXT: [[TMP43:%.*]] = insertvalue { <4 x float>, <4 x float> } [[TMP36]], <4 x float> [[TMP42]], 0
527+
; VF4-NEXT: [[TMP44:%.*]] = extractvalue { float, float } [[TMP39]], 1
528+
; VF4-NEXT: [[TMP45:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP43]], 1
529+
; VF4-NEXT: [[TMP46:%.*]] = insertelement <4 x float> [[TMP45]], float [[TMP44]], i32 3
530+
; VF4-NEXT: [[TMP47:%.*]] = insertvalue { <4 x float>, <4 x float> } [[TMP43]], <4 x float> [[TMP46]], 1
531+
; VF4-NEXT: br label %[[PRED_CALL_CONTINUE6]]
532+
; VF4: [[PRED_CALL_CONTINUE6]]:
533+
; VF4-NEXT: [[TMP48:%.*]] = phi { <4 x float>, <4 x float> } [ [[TMP36]], %[[PRED_CALL_CONTINUE4]] ], [ [[TMP43]], %[[PRED_CALL_IF5]] ]
534+
; VF4-NEXT: [[TMP49:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP48]], 0
535+
; VF4-NEXT: [[TMP50:%.*]] = fdiv <4 x float> [[TMP49]], [[WIDE_LOAD]]
536+
; VF4-NEXT: [[TMP51:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
537+
; VF4-NEXT: br i1 [[TMP51]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
538+
; VF4: [[PRED_STORE_IF]]:
539+
; VF4-NEXT: [[TMP52:%.*]] = add i64 [[INDEX]], 0
540+
; VF4-NEXT: [[TMP53:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP52]]
541+
; VF4-NEXT: [[TMP54:%.*]] = extractelement <4 x float> [[TMP50]], i32 0
542+
; VF4-NEXT: store float [[TMP54]], ptr [[TMP53]], align 8
543+
; VF4-NEXT: br label %[[PRED_STORE_CONTINUE]]
544+
; VF4: [[PRED_STORE_CONTINUE]]:
545+
; VF4-NEXT: [[TMP55:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
546+
; VF4-NEXT: br i1 [[TMP55]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
547+
; VF4: [[PRED_STORE_IF7]]:
548+
; VF4-NEXT: [[TMP56:%.*]] = add i64 [[INDEX]], 1
549+
; VF4-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP56]]
550+
; VF4-NEXT: [[TMP58:%.*]] = extractelement <4 x float> [[TMP50]], i32 1
551+
; VF4-NEXT: store float [[TMP58]], ptr [[TMP57]], align 8
552+
; VF4-NEXT: br label %[[PRED_STORE_CONTINUE8]]
553+
; VF4: [[PRED_STORE_CONTINUE8]]:
554+
; VF4-NEXT: [[TMP59:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
555+
; VF4-NEXT: br i1 [[TMP59]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
556+
; VF4: [[PRED_STORE_IF9]]:
557+
; VF4-NEXT: [[TMP60:%.*]] = add i64 [[INDEX]], 2
558+
; VF4-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP60]]
559+
; VF4-NEXT: [[TMP62:%.*]] = extractelement <4 x float> [[TMP50]], i32 2
560+
; VF4-NEXT: store float [[TMP62]], ptr [[TMP61]], align 8
561+
; VF4-NEXT: br label %[[PRED_STORE_CONTINUE10]]
562+
; VF4: [[PRED_STORE_CONTINUE10]]:
563+
; VF4-NEXT: [[TMP63:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
564+
; VF4-NEXT: br i1 [[TMP63]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12]]
565+
; VF4: [[PRED_STORE_IF11]]:
566+
; VF4-NEXT: [[TMP64:%.*]] = add i64 [[INDEX]], 3
567+
; VF4-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP64]]
568+
; VF4-NEXT: [[TMP66:%.*]] = extractelement <4 x float> [[TMP50]], i32 3
569+
; VF4-NEXT: store float [[TMP66]], ptr [[TMP65]], align 8
570+
; VF4-NEXT: br label %[[PRED_STORE_CONTINUE12]]
571+
; VF4: [[PRED_STORE_CONTINUE12]]:
572+
; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
573+
; VF4-NEXT: [[TMP67:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
574+
; VF4-NEXT: br i1 [[TMP67]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
575+
; VF4: [[MIDDLE_BLOCK]]:
576+
;
577+
; VF2IC2-LABEL: define void @struct_return_2xf32_replicate_predicated(
578+
; VF2IC2-SAME: ptr [[A:%.*]]) {
579+
; VF2IC2-NEXT: [[ENTRY:.*:]]
580+
; VF2IC2-NEXT: br label %[[VECTOR_PH:.*]]
581+
; VF2IC2: [[VECTOR_PH]]:
582+
; VF2IC2-NEXT: br label %[[VECTOR_BODY:.*]]
583+
; VF2IC2: [[VECTOR_BODY]]:
584+
; VF2IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ]
585+
; VF2IC2-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
586+
; VF2IC2-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 2
587+
; VF2IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP0]], align 8
588+
; VF2IC2-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x float>, ptr [[TMP1]], align 8
589+
; VF2IC2-NEXT: [[TMP2:%.*]] = fcmp ogt <2 x float> [[WIDE_LOAD]], zeroinitializer
590+
; VF2IC2-NEXT: [[TMP3:%.*]] = fcmp ogt <2 x float> [[WIDE_LOAD1]], zeroinitializer
591+
; VF2IC2-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
592+
; VF2IC2-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
593+
; VF2IC2: [[PRED_STORE_IF]]:
594+
; VF2IC2-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 0
595+
; VF2IC2-NEXT: [[TMP6:%.*]] = tail call { float, float } @fn2(float [[TMP5]]) #[[ATTR3:[0-9]+]]
596+
; VF2IC2-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0
597+
; VF2IC2-NEXT: [[TMP8:%.*]] = extractvalue { float, float } [[TMP6]], 0
598+
; VF2IC2-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP7]]
599+
; VF2IC2-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 0
600+
; VF2IC2-NEXT: [[TMP11:%.*]] = fdiv float [[TMP8]], [[TMP10]]
601+
; VF2IC2-NEXT: store float [[TMP11]], ptr [[TMP9]], align 8
602+
; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE]]
603+
; VF2IC2: [[PRED_STORE_CONTINUE]]:
604+
; VF2IC2-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
605+
; VF2IC2-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3:.*]]
606+
; VF2IC2: [[PRED_STORE_IF2]]:
607+
; VF2IC2-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 1
608+
; VF2IC2-NEXT: [[TMP14:%.*]] = tail call { float, float } @fn2(float [[TMP13]]) #[[ATTR3]]
609+
; VF2IC2-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 1
610+
; VF2IC2-NEXT: [[TMP16:%.*]] = extractvalue { float, float } [[TMP14]], 0
611+
; VF2IC2-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP15]]
612+
; VF2IC2-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 1
613+
; VF2IC2-NEXT: [[TMP19:%.*]] = fdiv float [[TMP16]], [[TMP18]]
614+
; VF2IC2-NEXT: store float [[TMP19]], ptr [[TMP17]], align 8
615+
; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE3]]
616+
; VF2IC2: [[PRED_STORE_CONTINUE3]]:
617+
; VF2IC2-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
618+
; VF2IC2-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5:.*]]
619+
; VF2IC2: [[PRED_STORE_IF4]]:
620+
; VF2IC2-NEXT: [[TMP21:%.*]] = extractelement <2 x float> [[WIDE_LOAD1]], i32 0
621+
; VF2IC2-NEXT: [[TMP22:%.*]] = tail call { float, float } @fn2(float [[TMP21]]) #[[ATTR3]]
622+
; VF2IC2-NEXT: [[TMP23:%.*]] = add i64 [[INDEX]], 2
623+
; VF2IC2-NEXT: [[TMP24:%.*]] = extractvalue { float, float } [[TMP22]], 0
624+
; VF2IC2-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP23]]
625+
; VF2IC2-NEXT: [[TMP26:%.*]] = extractelement <2 x float> [[WIDE_LOAD1]], i32 0
626+
; VF2IC2-NEXT: [[TMP27:%.*]] = fdiv float [[TMP24]], [[TMP26]]
627+
; VF2IC2-NEXT: store float [[TMP27]], ptr [[TMP25]], align 8
628+
; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE5]]
629+
; VF2IC2: [[PRED_STORE_CONTINUE5]]:
630+
; VF2IC2-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
631+
; VF2IC2-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7]]
632+
; VF2IC2: [[PRED_STORE_IF6]]:
633+
; VF2IC2-NEXT: [[TMP29:%.*]] = extractelement <2 x float> [[WIDE_LOAD1]], i32 1
634+
; VF2IC2-NEXT: [[TMP30:%.*]] = tail call { float, float } @fn2(float [[TMP29]]) #[[ATTR3]]
635+
; VF2IC2-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 3
636+
; VF2IC2-NEXT: [[TMP32:%.*]] = extractvalue { float, float } [[TMP30]], 0
637+
; VF2IC2-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP31]]
638+
; VF2IC2-NEXT: [[TMP34:%.*]] = extractelement <2 x float> [[WIDE_LOAD1]], i32 1
639+
; VF2IC2-NEXT: [[TMP35:%.*]] = fdiv float [[TMP32]], [[TMP34]]
640+
; VF2IC2-NEXT: store float [[TMP35]], ptr [[TMP33]], align 8
641+
; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE7]]
642+
; VF2IC2: [[PRED_STORE_CONTINUE7]]:
643+
; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
644+
; VF2IC2-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
645+
; VF2IC2-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
646+
; VF2IC2: [[MIDDLE_BLOCK]]:
647+
;
648+
entry:
649+
br label %for.body
650+
651+
for.body:
652+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ]
653+
%arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
654+
%in_val = load float, ptr %arrayidx, align 8
655+
%sgt_zero = fcmp ogt float %in_val, 0.0
656+
br i1 %sgt_zero, label %if.then, label %for.inc
657+
658+
if.then:
659+
%call = tail call { float, float } @fn2(float %in_val) #3
660+
%extract_a = extractvalue { float, float } %call, 0
661+
%div = fdiv float %extract_a, %in_val
662+
store float %div, ptr %arrayidx, align 8
663+
br label %for.inc
664+
665+
for.inc:
666+
%iv.next = add nuw nsw i64 %iv, 1
667+
%exitcond.not = icmp eq i64 %iv.next, 1024
668+
br i1 %exitcond.not, label %exit, label %for.body
669+
670+
exit:
671+
ret void
672+
}
673+
456674
declare { i64 } @fn1(float)
457675
declare { float, float } @fn2(float)
458676
declare { i32, i32, i32 } @fn3(i32)
@@ -464,3 +682,4 @@ declare { <8 x i32>, <8 x i32>, <8 x i32> } @fixed_vec_fn3(<8 x i32>)
464682
attributes #0 = { nounwind "vector-function-abi-variant"="_ZGVnN8v_fn1(fixed_vec_fn1)" }
465683
attributes #1 = { nounwind "vector-function-abi-variant"="_ZGVnN8v_fn2(fixed_vec_fn2)" }
466684
attributes #2 = { nounwind "vector-function-abi-variant"="_ZGVnN8v_fn3(fixed_vec_fn3)" }
685+
attributes #3 = { nounwind "vector-function-abi-variant"="_ZGVnM8v_fn2(fixed_vec_fn2)" }

0 commit comments

Comments
 (0)