@@ -502,14 +502,27 @@ static Value *promoteAllocaUserToVector(
502502 Instruction *Inst, const DataLayout &DL, FixedVectorType *VectorTy,
503503 unsigned VecStoreSize, unsigned ElementSize,
504504 DenseMap<MemTransferInst *, MemTransferInfo> &TransferInfo,
505- std::map<GetElementPtrInst *, WeakTrackingVH> &GEPVectorIdx,
506- function_ref<Value *()> GetCurVal ) {
505+ std::map<GetElementPtrInst *, WeakTrackingVH> &GEPVectorIdx, Value *CurVal,
506+ SmallVectorImpl<LoadInst *> &DeferredLoads ) {
507507 // Note: we use InstSimplifyFolder because it can leverage the DataLayout
508508 // to do more folding, especially in the case of vector splats.
509509 IRBuilder<InstSimplifyFolder> Builder (Inst->getContext (),
510510 InstSimplifyFolder (DL));
511511 Builder.SetInsertPoint (Inst);
512512
513+ const auto GetOrLoadCurrentVectorValue = [&]() -> Value * {
514+ if (CurVal)
515+ return CurVal;
516+
517+ // If the current value is not known, insert a dummy load and lower it on
518+ // the second pass.
519+ LoadInst *Dummy =
520+ Builder.CreateLoad (VectorTy, PoisonValue::get (Builder.getPtrTy ()),
521+ " promotealloca.dummyload" );
522+ DeferredLoads.push_back (Dummy);
523+ return Dummy;
524+ };
525+
513526 const auto CreateTempPtrIntCast = [&Builder, DL](Value *Val,
514527 Type *PtrTy) -> Value * {
515528 assert (DL.getTypeStoreSize (Val->getType ()) == DL.getTypeStoreSize (PtrTy));
@@ -529,7 +542,12 @@ static Value *promoteAllocaUserToVector(
529542
530543 switch (Inst->getOpcode ()) {
531544 case Instruction::Load: {
532- Value *CurVal = GetCurVal ();
545+ // Loads can only be lowered if the value is known.
546+ if (!CurVal) {
547+ DeferredLoads.push_back (cast<LoadInst>(Inst));
548+ return nullptr ;
549+ }
550+
533551 Value *Index = calculateVectorIndex (
534552 cast<LoadInst>(Inst)->getPointerOperand (), GEPVectorIdx);
535553
@@ -619,7 +637,7 @@ static Value *promoteAllocaUserToVector(
619637
620638 Val = Builder.CreateBitOrPointerCast (Val, SubVecTy);
621639
622- Value *CurVec = GetCurVal ();
640+ Value *CurVec = GetOrLoadCurrentVectorValue ();
623641 for (unsigned K = 0 , NumElts = std::min (NumWrittenElts, NumVecElts);
624642 K < NumElts; ++K) {
625643 Value *CurIdx =
@@ -632,7 +650,8 @@ static Value *promoteAllocaUserToVector(
632650
633651 if (Val->getType () != VecEltTy)
634652 Val = Builder.CreateBitOrPointerCast (Val, VecEltTy);
635- return Builder.CreateInsertElement (GetCurVal (), Val, Index);
653+ return Builder.CreateInsertElement (GetOrLoadCurrentVectorValue (), Val,
654+ Index);
636655 }
637656 case Instruction::Call: {
638657 if (auto *MTI = dyn_cast<MemTransferInst>(Inst)) {
@@ -654,7 +673,7 @@ static Value *promoteAllocaUserToVector(
654673 }
655674 }
656675
657- return Builder.CreateShuffleVector (GetCurVal (), Mask);
676+ return Builder.CreateShuffleVector (GetOrLoadCurrentVectorValue (), Mask);
658677 }
659678
660679 if (auto *MSI = dyn_cast<MemSetInst>(Inst)) {
@@ -1019,44 +1038,37 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
10191038
10201039 Updater.AddAvailableValue (EntryBB, AllocaInitValue);
10211040
1022- // First handle the initial worklist, in basic block order.
1023- //
1024- // Insert a placeholder whenever we need the vector value at the top of a
1025- // basic block.
1026- SmallVector<Instruction *> Placeholders;
1041+ // First handle the initial worklist.
1042+ SmallVector<LoadInst *, 4 > DeferredLoads;
10271043 forEachWorkListItem (WorkList, [&](Instruction *I) {
10281044 BasicBlock *BB = I->getParent ();
1029- auto GetCurVal = [&]() -> Value * {
1030- if (Value *CurVal = Updater.FindValueForBlock (BB))
1031- return CurVal;
1032-
1033- // If the current value in the basic block is not yet known, insert a
1034- // placeholder that we will replace later.
1035- IRBuilder<> Builder (I);
1036- auto *Placeholder = cast<Instruction>(Builder.CreateFreeze (
1037- PoisonValue::get (VectorTy), " promotealloca.placeholder" ));
1038- Placeholders.push_back (Placeholder);
1039- Updater.AddAvailableValue (BB, Placeholder);
1040- return Placeholder;
1041- };
1042-
1043- Value *Result =
1044- promoteAllocaUserToVector (I, *DL, VectorTy, VecStoreSize, ElementSize,
1045- TransferInfo, GEPVectorIdx, GetCurVal);
1045+ // On the first pass, we only take values that are trivially known, i.e.
1046+ // where AddAvailableValue was already called in this block.
1047+ Value *Result = promoteAllocaUserToVector (
1048+ I, *DL, VectorTy, VecStoreSize, ElementSize, TransferInfo, GEPVectorIdx,
1049+ Updater.FindValueForBlock (BB), DeferredLoads);
10461050 if (Result)
10471051 Updater.AddAvailableValue (BB, Result);
10481052 });
10491053
1050- // Now fixup the placeholders.
1051- for (Instruction *Placeholder : Placeholders) {
1052- Placeholder->replaceAllUsesWith (
1053- Updater.GetValueInMiddleOfBlock (Placeholder->getParent ()));
1054- Placeholder->eraseFromParent ();
1055- }
1054+ // Then handle deferred loads.
1055+ forEachWorkListItem (DeferredLoads, [&](Instruction *I) {
1056+ SmallVector<LoadInst *, 0 > NewDLs;
1057+ BasicBlock *BB = I->getParent ();
1058+ // On the second pass, we use GetValueInMiddleOfBlock to guarantee we always
1059+ // get a value, inserting PHIs as needed.
1060+ Value *Result = promoteAllocaUserToVector (
1061+ I, *DL, VectorTy, VecStoreSize, ElementSize, TransferInfo, GEPVectorIdx,
1062+ Updater.GetValueInMiddleOfBlock (I->getParent ()), NewDLs);
1063+ if (Result)
1064+ Updater.AddAvailableValue (BB, Result);
1065+ assert (NewDLs.empty () && " No more deferred loads should be queued!" );
1066+ });
10561067
10571068 // Delete all instructions. On the first pass, new dummy loads may have been
10581069 // added so we need to collect them too.
10591070 DenseSet<Instruction *> InstsToDelete (WorkList.begin (), WorkList.end ());
1071+ InstsToDelete.insert_range (DeferredLoads);
10601072 for (Instruction *I : InstsToDelete) {
10611073 assert (I->use_empty ());
10621074 I->eraseFromParent ();
0 commit comments