@@ -2529,19 +2529,30 @@ static void buildScalarSteps(Value *ScalarIV, Value *Step,
2529
2529
}
2530
2530
}
2531
2531
2532
+ // Generate code for the induction step. Note that induction steps are
2533
+ // required to be loop-invariant
2534
+ static Value *CreateStepValue (const SCEV *Step, ScalarEvolution &SE,
2535
+ Instruction *InsertBefore,
2536
+ Loop *OrigLoop = nullptr ) {
2537
+ const DataLayout &DL = SE.getDataLayout ();
2538
+ assert ((!OrigLoop || SE.isLoopInvariant (Step, OrigLoop)) &&
2539
+ " Induction step should be loop invariant" );
2540
+ if (auto *E = dyn_cast<SCEVUnknown>(Step))
2541
+ return E->getValue ();
2542
+
2543
+ SCEVExpander Exp (SE, DL, " induction" );
2544
+ return Exp.expandCodeFor (Step, Step->getType (), InsertBefore);
2545
+ }
2546
+
2532
2547
// / Compute the transformed value of Index at offset StartValue using step
2533
2548
// / StepValue.
2534
2549
// / For integer induction, returns StartValue + Index * StepValue.
2535
2550
// / For pointer induction, returns StartValue[Index * StepValue].
2536
2551
// / FIXME: The newly created binary instructions should contain nsw/nuw
2537
2552
// / flags, which can be found from the original scalar operations.
2538
- static Value *emitTransformedIndex (IRBuilderBase &B, Value *Index,
2539
- ScalarEvolution *SE, const DataLayout &DL,
2540
- const InductionDescriptor &ID, LoopInfo &LI,
2541
- BasicBlock *VectorHeader) {
2553
+ static Value *emitTransformedIndex (IRBuilderBase &B, Value *Index, Value *Step,
2554
+ const InductionDescriptor &ID) {
2542
2555
2543
- SCEVExpander Exp (*SE, DL, " induction" );
2544
- auto Step = ID.getStep ();
2545
2556
auto StartValue = ID.getStartValue ();
2546
2557
assert (Index->getType ()->getScalarType () == Step->getType () &&
2547
2558
" Index scalar type does not match StepValue type" );
@@ -2580,39 +2591,21 @@ static Value *emitTransformedIndex(IRBuilderBase &B, Value *Index,
2580
2591
return B.CreateMul (X, Y);
2581
2592
};
2582
2593
2583
- // Get a suitable insert point for SCEV expansion. For blocks in the vector
2584
- // loop, choose the end of the vector loop header (=VectorHeader), because
2585
- // the DomTree is not kept up-to-date for additional blocks generated in the
2586
- // vector loop. By using the header as insertion point, we guarantee that the
2587
- // expanded instructions dominate all their uses.
2588
- auto GetInsertPoint = [&B, &LI, VectorHeader]() {
2589
- BasicBlock *InsertBB = B.GetInsertPoint ()->getParent ();
2590
- if (InsertBB != VectorHeader &&
2591
- LI.getLoopFor (VectorHeader) == LI.getLoopFor (InsertBB))
2592
- return VectorHeader->getTerminator ();
2593
- return &*B.GetInsertPoint ();
2594
- };
2595
-
2596
2594
switch (ID.getKind ()) {
2597
2595
case InductionDescriptor::IK_IntInduction: {
2598
2596
assert (!isa<VectorType>(Index->getType ()) &&
2599
2597
" Vector indices not supported for integer inductions yet" );
2600
2598
assert (Index->getType () == StartValue->getType () &&
2601
2599
" Index type does not match StartValue type" );
2602
- if (ID. getConstIntStepValue ( ) && ID. getConstIntStepValue ( )->isMinusOne ())
2600
+ if (isa<ConstantInt>(Step ) && cast<ConstantInt>(Step )->isMinusOne ())
2603
2601
return B.CreateSub (StartValue, Index);
2604
- auto *Offset = CreateMul (
2605
- Index, Exp.expandCodeFor (Step, Index->getType (), GetInsertPoint ()));
2602
+ auto *Offset = CreateMul (Index, Step);
2606
2603
return CreateAdd (StartValue, Offset);
2607
2604
}
2608
2605
case InductionDescriptor::IK_PtrInduction: {
2609
- assert (isa<SCEVConstant >(Step) &&
2606
+ assert (isa<Constant >(Step) &&
2610
2607
" Expected constant step for pointer induction" );
2611
- return B.CreateGEP (
2612
- ID.getElementType (), StartValue,
2613
- CreateMul (Index,
2614
- Exp.expandCodeFor (Step, Index->getType ()->getScalarType (),
2615
- GetInsertPoint ())));
2608
+ return B.CreateGEP (ID.getElementType (), StartValue, CreateMul (Index, Step));
2616
2609
}
2617
2610
case InductionDescriptor::IK_FpInduction: {
2618
2611
assert (!isa<VectorType>(Index->getType ()) &&
@@ -2624,8 +2617,7 @@ static Value *emitTransformedIndex(IRBuilderBase &B, Value *Index,
2624
2617
InductionBinOp->getOpcode () == Instruction::FSub) &&
2625
2618
" Original bin op should be defined for FP induction" );
2626
2619
2627
- Value *StepValue = cast<SCEVUnknown>(Step)->getValue ();
2628
- Value *MulExp = B.CreateFMul (StepValue, Index);
2620
+ Value *MulExp = B.CreateFMul (Step, Index);
2629
2621
return B.CreateBinOp (InductionBinOp->getOpcode (), StartValue, MulExp,
2630
2622
" induction" );
2631
2623
}
@@ -2676,8 +2668,7 @@ void InnerLoopVectorizer::widenIntOrFpInduction(
2676
2668
NeededType->isIntegerTy ()
2677
2669
? Builder.CreateSExtOrTrunc (ScalarIV, NeededType)
2678
2670
: Builder.CreateCast (Instruction::SIToFP, ScalarIV, NeededType);
2679
- ScalarIV = emitTransformedIndex (Builder, ScalarIV, PSE.getSE (), DL, ID,
2680
- *State.LI , State.CFG .PrevBB );
2671
+ ScalarIV = emitTransformedIndex (Builder, ScalarIV, Step, ID);
2681
2672
ScalarIV->setName (" offset.idx" );
2682
2673
}
2683
2674
if (Trunc) {
@@ -3410,20 +3401,21 @@ void InnerLoopVectorizer::createInductionResumeValues(
3410
3401
Instruction::CastOps CastOp =
3411
3402
CastInst::getCastOpcode (VectorTripCount, true , StepType, true );
3412
3403
Value *CRD = B.CreateCast (CastOp, VectorTripCount, StepType, " cast.crd" );
3413
- const DataLayout &DL = LoopScalarBody-> getModule ()-> getDataLayout ();
3414
- EndValue = emitTransformedIndex (B, CRD, PSE.getSE (), DL, II, *LI,
3415
- LoopVectorBody );
3404
+ Value *Step =
3405
+ CreateStepValue (II. getStep (), * PSE.getSE (), &*B. GetInsertPoint ());
3406
+ EndValue = emitTransformedIndex (B, CRD, Step, II );
3416
3407
EndValue->setName (" ind.end" );
3417
3408
3418
3409
// Compute the end value for the additional bypass (if applicable).
3419
3410
if (AdditionalBypass.first ) {
3420
3411
B.SetInsertPoint (&(*AdditionalBypass.first ->getFirstInsertionPt ()));
3421
3412
CastOp = CastInst::getCastOpcode (AdditionalBypass.second , true ,
3422
3413
StepType, true );
3414
+ Value *Step =
3415
+ CreateStepValue (II.getStep (), *PSE.getSE (), &*B.GetInsertPoint ());
3423
3416
CRD =
3424
3417
B.CreateCast (CastOp, AdditionalBypass.second , StepType, " cast.crd" );
3425
- EndValueFromAdditionalBypass = emitTransformedIndex (
3426
- B, CRD, PSE.getSE (), DL, II, *LI, LoopVectorBody);
3418
+ EndValueFromAdditionalBypass = emitTransformedIndex (B, CRD, Step, II);
3427
3419
EndValueFromAdditionalBypass->setName (" ind.end" );
3428
3420
}
3429
3421
}
@@ -3597,8 +3589,6 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
3597
3589
for (User *U : OrigPhi->users ()) {
3598
3590
auto *UI = cast<Instruction>(U);
3599
3591
if (!OrigLoop->contains (UI)) {
3600
- const DataLayout &DL =
3601
- OrigLoop->getHeader ()->getModule ()->getDataLayout ();
3602
3592
assert (isa<PHINode>(UI) && " Expected LCSSA form" );
3603
3593
3604
3594
IRBuilder<> B (MiddleBlock->getTerminator ());
@@ -3615,8 +3605,10 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
3615
3605
II.getStep ()->getType ())
3616
3606
: B.CreateSExtOrTrunc (CountMinusOne, II.getStep ()->getType ());
3617
3607
CMO->setName (" cast.cmo" );
3618
- Value *Escape = emitTransformedIndex (B, CMO, PSE.getSE (), DL, II, *LI,
3619
- LoopVectorBody);
3608
+
3609
+ Value *Step = CreateStepValue (II.getStep (), *PSE.getSE (),
3610
+ LoopVectorBody->getTerminator ());
3611
+ Value *Escape = emitTransformedIndex (B, CMO, Step, II);
3620
3612
Escape->setName (" ind.escape" );
3621
3613
MissingVals[UI] = Escape;
3622
3614
}
@@ -4504,9 +4496,10 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
4504
4496
Value *Idx = Builder.CreateAdd (
4505
4497
PartStart, ConstantInt::get (PtrInd->getType (), Lane));
4506
4498
Value *GlobalIdx = Builder.CreateAdd (PtrInd, Idx);
4507
- Value *SclrGep =
4508
- emitTransformedIndex (Builder, GlobalIdx, PSE.getSE (), DL, II,
4509
- *State.LI , State.CFG .PrevBB );
4499
+
4500
+ Value *Step = CreateStepValue (II.getStep (), *PSE.getSE (),
4501
+ State.CFG .PrevBB ->getTerminator ());
4502
+ Value *SclrGep = emitTransformedIndex (Builder, GlobalIdx, Step, II);
4510
4503
SclrGep->setName (" next.gep" );
4511
4504
State.set (PhiR, SclrGep, VPIteration (Part, Lane));
4512
4505
}
0 commit comments