Skip to content

Commit b983c4f

Browse files
committed
[VPlan] Remove ILV::sinkScalarOperands.
Remove legacy ILV sinkScalarOperands, which is superseded by the sinkScalarOperands VPlan transforms. There are a few cases that aren't handled by VPlan's sinkScalarOperands, namely when we have scalar-steps where the first lane is used outside a replicate region; the IR-based sinking is able to sink computing the steps for all lanes except the first. We could improve this further, by duplicating such scalar-steps recipes, one to serve the first-lane-only users and users in replicate regions, but I am not sure if the extra complexity is warrented.
1 parent c7fae59 commit b983c4f

22 files changed

+273
-358
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 0 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -541,10 +541,6 @@ class InnerLoopVectorizer {
541541
protected:
542542
friend class LoopVectorizationPlanner;
543543

544-
/// Iteratively sink the scalarized operands of a predicated instruction into
545-
/// the block that was created for it.
546-
void sinkScalarOperands(Instruction *PredInst);
547-
548544
/// Returns (and creates if needed) the trip count of the widened loop.
549545
Value *getOrCreateVectorTripCount(BasicBlock *InsertBlock);
550546

@@ -629,9 +625,6 @@ class InnerLoopVectorizer {
629625
/// A list of all bypass blocks. The first block is the entry of the loop.
630626
SmallVector<BasicBlock *, 4> LoopBypassBlocks;
631627

632-
/// Store instructions that were predicated.
633-
SmallVector<Instruction *, 4> PredicatedInstructions;
634-
635628
/// Trip count of the original loop.
636629
Value *TripCount = nullptr;
637630

@@ -2385,15 +2378,12 @@ void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr,
23852378

23862379
// End if-block.
23872380
VPRegionBlock *Parent = RepRecipe->getParent()->getParent();
2388-
bool IfPredicateInstr = Parent ? Parent->isReplicator() : false;
23892381
assert(
23902382
(Parent || !RepRecipe->getParent()->getPlan()->getVectorLoopRegion() ||
23912383
all_of(RepRecipe->operands(),
23922384
[](VPValue *Op) { return Op->isDefinedOutsideLoopRegions(); })) &&
23932385
"Expected a recipe is either within a region or all of its operands "
23942386
"are defined outside the vectorized region.");
2395-
if (IfPredicateInstr)
2396-
PredicatedInstructions.push_back(Cloned);
23972387
}
23982388

23992389
Value *
@@ -2867,8 +2857,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
28672857
if (!State.Plan->getVectorLoopRegion())
28682858
return;
28692859

2870-
for (Instruction *PI : PredicatedInstructions)
2871-
sinkScalarOperands(&*PI);
28722860

28732861
VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion();
28742862
VPBasicBlock *HeaderVPBB = VectorRegion->getEntryBasicBlock();
@@ -2895,82 +2883,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
28952883
VF.getKnownMinValue() * UF);
28962884
}
28972885

2898-
void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) {
2899-
// The basic block and loop containing the predicated instruction.
2900-
auto *PredBB = PredInst->getParent();
2901-
auto *VectorLoop = LI->getLoopFor(PredBB);
2902-
2903-
// Initialize a worklist with the operands of the predicated instruction.
2904-
SetVector<Value *> Worklist(PredInst->op_begin(), PredInst->op_end());
2905-
2906-
// Holds instructions that we need to analyze again. An instruction may be
2907-
// reanalyzed if we don't yet know if we can sink it or not.
2908-
SmallVector<Instruction *, 8> InstsToReanalyze;
2909-
2910-
// Returns true if a given use occurs in the predicated block. Phi nodes use
2911-
// their operands in their corresponding predecessor blocks.
2912-
auto IsBlockOfUsePredicated = [&](Use &U) -> bool {
2913-
auto *I = cast<Instruction>(U.getUser());
2914-
BasicBlock *BB = I->getParent();
2915-
if (auto *Phi = dyn_cast<PHINode>(I))
2916-
BB = Phi->getIncomingBlock(
2917-
PHINode::getIncomingValueNumForOperand(U.getOperandNo()));
2918-
return BB == PredBB;
2919-
};
2920-
2921-
// Iteratively sink the scalarized operands of the predicated instruction
2922-
// into the block we created for it. When an instruction is sunk, it's
2923-
// operands are then added to the worklist. The algorithm ends after one pass
2924-
// through the worklist doesn't sink a single instruction.
2925-
bool Changed;
2926-
do {
2927-
// Add the instructions that need to be reanalyzed to the worklist, and
2928-
// reset the changed indicator.
2929-
Worklist.insert_range(InstsToReanalyze);
2930-
InstsToReanalyze.clear();
2931-
Changed = false;
2932-
2933-
while (!Worklist.empty()) {
2934-
auto *I = dyn_cast<Instruction>(Worklist.pop_back_val());
2935-
2936-
// We can't sink an instruction if it is a phi node, is not in the loop,
2937-
// may have side effects or may read from memory.
2938-
// TODO: Could do more granular checking to allow sinking
2939-
// a load past non-store instructions.
2940-
if (!I || isa<PHINode>(I) || !VectorLoop->contains(I) ||
2941-
I->mayHaveSideEffects() || I->mayReadFromMemory())
2942-
continue;
2943-
2944-
// If the instruction is already in PredBB, check if we can sink its
2945-
// operands. In that case, VPlan's sinkScalarOperands() succeeded in
2946-
// sinking the scalar instruction I, hence it appears in PredBB; but it
2947-
// may have failed to sink I's operands (recursively), which we try
2948-
// (again) here.
2949-
if (I->getParent() == PredBB) {
2950-
Worklist.insert_range(I->operands());
2951-
continue;
2952-
}
2953-
2954-
// It's legal to sink the instruction if all its uses occur in the
2955-
// predicated block. Otherwise, there's nothing to do yet, and we may
2956-
// need to reanalyze the instruction.
2957-
if (!llvm::all_of(I->uses(), IsBlockOfUsePredicated)) {
2958-
InstsToReanalyze.push_back(I);
2959-
continue;
2960-
}
2961-
2962-
// Move the instruction to the beginning of the predicated block, and add
2963-
// it's operands to the worklist.
2964-
I->moveBefore(PredBB->getFirstInsertionPt());
2965-
Worklist.insert_range(I->operands());
2966-
2967-
// The sinking may have enabled other instructions to be sunk, so we will
2968-
// need to iterate.
2969-
Changed = true;
2970-
}
2971-
} while (Changed);
2972-
}
2973-
29742886
void InnerLoopVectorizer::fixNonInductionPHIs(VPTransformState &State) {
29752887
auto Iter = vp_depth_first_deep(Plan.getEntry());
29762888
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {

llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,21 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
213213
; CHECK: [[VECTOR_BODY]]:
214214
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE30:.*]] ]
215215
; CHECK-NEXT: [[IV:%.*]] = add i32 [[INDEX]], 0
216+
; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[INDEX]], 1
217+
; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[INDEX]], 2
218+
; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[INDEX]], 3
219+
; CHECK-NEXT: [[TMP24:%.*]] = add i32 [[INDEX]], 4
220+
; CHECK-NEXT: [[TMP28:%.*]] = add i32 [[INDEX]], 5
221+
; CHECK-NEXT: [[TMP32:%.*]] = add i32 [[INDEX]], 6
222+
; CHECK-NEXT: [[TMP36:%.*]] = add i32 [[INDEX]], 7
223+
; CHECK-NEXT: [[TMP40:%.*]] = add i32 [[INDEX]], 8
224+
; CHECK-NEXT: [[TMP44:%.*]] = add i32 [[INDEX]], 9
225+
; CHECK-NEXT: [[TMP48:%.*]] = add i32 [[INDEX]], 10
226+
; CHECK-NEXT: [[TMP52:%.*]] = add i32 [[INDEX]], 11
227+
; CHECK-NEXT: [[TMP56:%.*]] = add i32 [[INDEX]], 12
228+
; CHECK-NEXT: [[TMP60:%.*]] = add i32 [[INDEX]], 13
229+
; CHECK-NEXT: [[TMP64:%.*]] = add i32 [[INDEX]], 14
230+
; CHECK-NEXT: [[TMP68:%.*]] = add i32 [[INDEX]], 15
216231
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[IV]]
217232
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[GEP_SRC]], i32 0
218233
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
@@ -232,7 +247,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
232247
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <16 x i1> [[TMP7]], i32 1
233248
; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
234249
; CHECK: [[PRED_STORE_IF1]]:
235-
; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[INDEX]], 1
236250
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP12]]
237251
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 1
238252
; CHECK-NEXT: store i8 [[TMP14]], ptr [[TMP13]], align 1
@@ -241,7 +255,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
241255
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i1> [[TMP7]], i32 2
242256
; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
243257
; CHECK: [[PRED_STORE_IF3]]:
244-
; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[INDEX]], 2
245258
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP16]]
246259
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 2
247260
; CHECK-NEXT: store i8 [[TMP18]], ptr [[TMP17]], align 1
@@ -250,7 +263,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
250263
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i1> [[TMP7]], i32 3
251264
; CHECK-NEXT: br i1 [[TMP19]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
252265
; CHECK: [[PRED_STORE_IF5]]:
253-
; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[INDEX]], 3
254266
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP20]]
255267
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 3
256268
; CHECK-NEXT: store i8 [[TMP22]], ptr [[TMP21]], align 1
@@ -259,7 +271,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
259271
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i1> [[TMP7]], i32 4
260272
; CHECK-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
261273
; CHECK: [[PRED_STORE_IF7]]:
262-
; CHECK-NEXT: [[TMP24:%.*]] = add i32 [[INDEX]], 4
263274
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP24]]
264275
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 4
265276
; CHECK-NEXT: store i8 [[TMP26]], ptr [[TMP25]], align 1
@@ -268,7 +279,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
268279
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i1> [[TMP7]], i32 5
269280
; CHECK-NEXT: br i1 [[TMP27]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
270281
; CHECK: [[PRED_STORE_IF9]]:
271-
; CHECK-NEXT: [[TMP28:%.*]] = add i32 [[INDEX]], 5
272282
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP28]]
273283
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 5
274284
; CHECK-NEXT: store i8 [[TMP30]], ptr [[TMP29]], align 1
@@ -277,7 +287,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
277287
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i1> [[TMP7]], i32 6
278288
; CHECK-NEXT: br i1 [[TMP31]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
279289
; CHECK: [[PRED_STORE_IF11]]:
280-
; CHECK-NEXT: [[TMP32:%.*]] = add i32 [[INDEX]], 6
281290
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP32]]
282291
; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 6
283292
; CHECK-NEXT: store i8 [[TMP34]], ptr [[TMP33]], align 1
@@ -286,7 +295,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
286295
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i1> [[TMP7]], i32 7
287296
; CHECK-NEXT: br i1 [[TMP35]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
288297
; CHECK: [[PRED_STORE_IF13]]:
289-
; CHECK-NEXT: [[TMP36:%.*]] = add i32 [[INDEX]], 7
290298
; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP36]]
291299
; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 7
292300
; CHECK-NEXT: store i8 [[TMP38]], ptr [[TMP37]], align 1
@@ -295,7 +303,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
295303
; CHECK-NEXT: [[TMP39:%.*]] = extractelement <16 x i1> [[TMP7]], i32 8
296304
; CHECK-NEXT: br i1 [[TMP39]], label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
297305
; CHECK: [[PRED_STORE_IF15]]:
298-
; CHECK-NEXT: [[TMP40:%.*]] = add i32 [[INDEX]], 8
299306
; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP40]]
300307
; CHECK-NEXT: [[TMP42:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 8
301308
; CHECK-NEXT: store i8 [[TMP42]], ptr [[TMP41]], align 1
@@ -304,7 +311,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
304311
; CHECK-NEXT: [[TMP43:%.*]] = extractelement <16 x i1> [[TMP7]], i32 9
305312
; CHECK-NEXT: br i1 [[TMP43]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
306313
; CHECK: [[PRED_STORE_IF17]]:
307-
; CHECK-NEXT: [[TMP44:%.*]] = add i32 [[INDEX]], 9
308314
; CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP44]]
309315
; CHECK-NEXT: [[TMP46:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 9
310316
; CHECK-NEXT: store i8 [[TMP46]], ptr [[TMP45]], align 1
@@ -313,7 +319,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
313319
; CHECK-NEXT: [[TMP47:%.*]] = extractelement <16 x i1> [[TMP7]], i32 10
314320
; CHECK-NEXT: br i1 [[TMP47]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
315321
; CHECK: [[PRED_STORE_IF19]]:
316-
; CHECK-NEXT: [[TMP48:%.*]] = add i32 [[INDEX]], 10
317322
; CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP48]]
318323
; CHECK-NEXT: [[TMP50:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 10
319324
; CHECK-NEXT: store i8 [[TMP50]], ptr [[TMP49]], align 1
@@ -322,7 +327,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
322327
; CHECK-NEXT: [[TMP51:%.*]] = extractelement <16 x i1> [[TMP7]], i32 11
323328
; CHECK-NEXT: br i1 [[TMP51]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
324329
; CHECK: [[PRED_STORE_IF21]]:
325-
; CHECK-NEXT: [[TMP52:%.*]] = add i32 [[INDEX]], 11
326330
; CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP52]]
327331
; CHECK-NEXT: [[TMP54:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 11
328332
; CHECK-NEXT: store i8 [[TMP54]], ptr [[TMP53]], align 1
@@ -331,7 +335,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
331335
; CHECK-NEXT: [[TMP55:%.*]] = extractelement <16 x i1> [[TMP7]], i32 12
332336
; CHECK-NEXT: br i1 [[TMP55]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
333337
; CHECK: [[PRED_STORE_IF23]]:
334-
; CHECK-NEXT: [[TMP56:%.*]] = add i32 [[INDEX]], 12
335338
; CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP56]]
336339
; CHECK-NEXT: [[TMP58:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 12
337340
; CHECK-NEXT: store i8 [[TMP58]], ptr [[TMP57]], align 1
@@ -340,7 +343,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
340343
; CHECK-NEXT: [[TMP59:%.*]] = extractelement <16 x i1> [[TMP7]], i32 13
341344
; CHECK-NEXT: br i1 [[TMP59]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
342345
; CHECK: [[PRED_STORE_IF25]]:
343-
; CHECK-NEXT: [[TMP60:%.*]] = add i32 [[INDEX]], 13
344346
; CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP60]]
345347
; CHECK-NEXT: [[TMP62:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 13
346348
; CHECK-NEXT: store i8 [[TMP62]], ptr [[TMP61]], align 1
@@ -349,7 +351,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
349351
; CHECK-NEXT: [[TMP63:%.*]] = extractelement <16 x i1> [[TMP7]], i32 14
350352
; CHECK-NEXT: br i1 [[TMP63]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]]
351353
; CHECK: [[PRED_STORE_IF27]]:
352-
; CHECK-NEXT: [[TMP64:%.*]] = add i32 [[INDEX]], 14
353354
; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP64]]
354355
; CHECK-NEXT: [[TMP66:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 14
355356
; CHECK-NEXT: store i8 [[TMP66]], ptr [[TMP65]], align 1
@@ -358,7 +359,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i
358359
; CHECK-NEXT: [[TMP67:%.*]] = extractelement <16 x i1> [[TMP7]], i32 15
359360
; CHECK-NEXT: br i1 [[TMP67]], label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30]]
360361
; CHECK: [[PRED_STORE_IF29]]:
361-
; CHECK-NEXT: [[TMP68:%.*]] = add i32 [[INDEX]], 15
362362
; CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP68]]
363363
; CHECK-NEXT: [[TMP70:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 15
364364
; CHECK-NEXT: store i8 [[TMP70]], ptr [[TMP69]], align 1

llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -996,28 +996,31 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
996996
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = icmp ult i64 1, [[TMP0]]
997997
; TFA_INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]]
998998
; TFA_INTERLEAVE: [[VECTOR_BODY]]:
999-
; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[TMP27:%.*]], %[[PRED_STORE_CONTINUE5:.*]] ]
1000-
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[PRED_STORE_CONTINUE5]] ]
1001-
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT6:%.*]], %[[PRED_STORE_CONTINUE5]] ]
999+
; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[TMP27:%.*]], %[[TMP19:.*]] ]
1000+
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[TMP19]] ]
1001+
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT6:%.*]], %[[TMP19]] ]
10021002
; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = load double, ptr [[P2]], align 8
1003-
; TFA_INTERLEAVE-NEXT: br i1 [[ACTIVE_LANE_MASK]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
1004-
; TFA_INTERLEAVE: [[PRED_STORE_IF]]:
10051003
; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR7:[0-9]+]]
1006-
; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = fcmp ogt double [[TMP5]], 0.000000e+00
1007-
; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = xor i1 [[TMP6]], true
1008-
; TFA_INTERLEAVE-NEXT: [[TMP24:%.*]] = select i1 [[TMP7]], double 1.000000e+00, double 0.000000e+00
1009-
; TFA_INTERLEAVE-NEXT: store double [[TMP24]], ptr [[P]], align 8
1010-
; TFA_INTERLEAVE-NEXT: br label %[[PRED_STORE_CONTINUE]]
1011-
; TFA_INTERLEAVE: [[PRED_STORE_CONTINUE]]:
1012-
; TFA_INTERLEAVE-NEXT: br i1 [[ACTIVE_LANE_MASK2]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5]]
1013-
; TFA_INTERLEAVE: [[PRED_STORE_IF4]]:
10141004
; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR7]]
1005+
; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = fcmp ogt double [[TMP5]], 0.000000e+00
10151006
; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = fcmp ogt double [[TMP8]], 0.000000e+00
1016-
; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = xor i1 [[TMP9]], true
1007+
; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = xor i1 [[TMP6]], true
1008+
; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = xor i1 [[TMP9]], true
1009+
; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = select i1 [[ACTIVE_LANE_MASK]], i1 [[TMP18]], i1 false
1010+
; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = select i1 [[ACTIVE_LANE_MASK2]], i1 [[TMP20]], i1 false
10171011
; TFA_INTERLEAVE-NEXT: [[TMP26:%.*]] = select i1 [[TMP10]], double 1.000000e+00, double 0.000000e+00
1018-
; TFA_INTERLEAVE-NEXT: store double [[TMP26]], ptr [[P]], align 8
1019-
; TFA_INTERLEAVE-NEXT: br label %[[PRED_STORE_CONTINUE5]]
1020-
; TFA_INTERLEAVE: [[PRED_STORE_CONTINUE5]]:
1012+
; TFA_INTERLEAVE-NEXT: [[PREDPHI3:%.*]] = select i1 [[TMP21]], double 1.000000e+00, double 0.000000e+00
1013+
; TFA_INTERLEAVE-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[ACTIVE_LANE_MASK2]], double [[PREDPHI3]], double [[TMP26]]
1014+
; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = xor i1 [[ACTIVE_LANE_MASK]], true
1015+
; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = xor i1 [[ACTIVE_LANE_MASK2]], true
1016+
; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = xor i1 [[TMP13]], true
1017+
; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = xor i1 [[TMP14]], true
1018+
; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]]
1019+
; TFA_INTERLEAVE-NEXT: br i1 [[TMP17]], label %[[BB18:.*]], label %[[TMP19]]
1020+
; TFA_INTERLEAVE: [[BB18]]:
1021+
; TFA_INTERLEAVE-NEXT: store double [[SPEC_SELECT]], ptr [[P]], align 8
1022+
; TFA_INTERLEAVE-NEXT: br label %[[TMP19]]
1023+
; TFA_INTERLEAVE: [[TMP19]]:
10211024
; TFA_INTERLEAVE-NEXT: [[TMP27]] = add i64 [[INDEX]], 2
10221025
; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
10231026
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = icmp ult i64 [[INDEX]], [[TMP3]]

0 commit comments

Comments
 (0)