Skip to content

Commit c0f7873

Browse files
committed
AMDGPU/PromoteAlloca: Simplify how deferred loads work
The second pass of promotion to vector can be quite simple. Reflect that simplicity in the code for better maintainability. commit-id:cbc1e9ae
1 parent a443460 commit c0f7873

File tree

1 file changed

+34
-46
lines changed

1 file changed

+34
-46
lines changed

llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp

Lines changed: 34 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -502,27 +502,14 @@ static Value *promoteAllocaUserToVector(
502502
Instruction *Inst, const DataLayout &DL, FixedVectorType *VectorTy,
503503
unsigned VecStoreSize, unsigned ElementSize,
504504
DenseMap<MemTransferInst *, MemTransferInfo> &TransferInfo,
505-
std::map<GetElementPtrInst *, WeakTrackingVH> &GEPVectorIdx, Value *CurVal,
506-
SmallVectorImpl<LoadInst *> &DeferredLoads) {
505+
std::map<GetElementPtrInst *, WeakTrackingVH> &GEPVectorIdx,
506+
function_ref<Value *()> GetCurVal) {
507507
// Note: we use InstSimplifyFolder because it can leverage the DataLayout
508508
// to do more folding, especially in the case of vector splats.
509509
IRBuilder<InstSimplifyFolder> Builder(Inst->getContext(),
510510
InstSimplifyFolder(DL));
511511
Builder.SetInsertPoint(Inst);
512512

513-
const auto GetOrLoadCurrentVectorValue = [&]() -> Value * {
514-
if (CurVal)
515-
return CurVal;
516-
517-
// If the current value is not known, insert a dummy load and lower it on
518-
// the second pass.
519-
LoadInst *Dummy =
520-
Builder.CreateLoad(VectorTy, PoisonValue::get(Builder.getPtrTy()),
521-
"promotealloca.dummyload");
522-
DeferredLoads.push_back(Dummy);
523-
return Dummy;
524-
};
525-
526513
const auto CreateTempPtrIntCast = [&Builder, DL](Value *Val,
527514
Type *PtrTy) -> Value * {
528515
assert(DL.getTypeStoreSize(Val->getType()) == DL.getTypeStoreSize(PtrTy));
@@ -542,12 +529,7 @@ static Value *promoteAllocaUserToVector(
542529

543530
switch (Inst->getOpcode()) {
544531
case Instruction::Load: {
545-
// Loads can only be lowered if the value is known.
546-
if (!CurVal) {
547-
DeferredLoads.push_back(cast<LoadInst>(Inst));
548-
return nullptr;
549-
}
550-
532+
Value *CurVal = GetCurVal();
551533
Value *Index = calculateVectorIndex(
552534
cast<LoadInst>(Inst)->getPointerOperand(), GEPVectorIdx);
553535

@@ -637,7 +619,7 @@ static Value *promoteAllocaUserToVector(
637619

638620
Val = Builder.CreateBitOrPointerCast(Val, SubVecTy);
639621

640-
Value *CurVec = GetOrLoadCurrentVectorValue();
622+
Value *CurVec = GetCurVal();
641623
for (unsigned K = 0, NumElts = std::min(NumWrittenElts, NumVecElts);
642624
K < NumElts; ++K) {
643625
Value *CurIdx =
@@ -650,8 +632,7 @@ static Value *promoteAllocaUserToVector(
650632

651633
if (Val->getType() != VecEltTy)
652634
Val = Builder.CreateBitOrPointerCast(Val, VecEltTy);
653-
return Builder.CreateInsertElement(GetOrLoadCurrentVectorValue(), Val,
654-
Index);
635+
return Builder.CreateInsertElement(GetCurVal(), Val, Index);
655636
}
656637
case Instruction::Call: {
657638
if (auto *MTI = dyn_cast<MemTransferInst>(Inst)) {
@@ -673,7 +654,7 @@ static Value *promoteAllocaUserToVector(
673654
}
674655
}
675656

676-
return Builder.CreateShuffleVector(GetOrLoadCurrentVectorValue(), Mask);
657+
return Builder.CreateShuffleVector(GetCurVal(), Mask);
677658
}
678659

679660
if (auto *MSI = dyn_cast<MemSetInst>(Inst)) {
@@ -1038,37 +1019,44 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
10381019

10391020
Updater.AddAvailableValue(EntryBB, AllocaInitValue);
10401021

1041-
// First handle the initial worklist.
1042-
SmallVector<LoadInst *, 4> DeferredLoads;
1022+
// First handle the initial worklist, in basic block order.
1023+
//
1024+
// Insert a placeholder whenever we need the vector value at the top of a
1025+
// basic block.
1026+
SmallVector<Instruction *> Placeholders;
10431027
forEachWorkListItem(WorkList, [&](Instruction *I) {
10441028
BasicBlock *BB = I->getParent();
1045-
// On the first pass, we only take values that are trivially known, i.e.
1046-
// where AddAvailableValue was already called in this block.
1047-
Value *Result = promoteAllocaUserToVector(
1048-
I, *DL, VectorTy, VecStoreSize, ElementSize, TransferInfo, GEPVectorIdx,
1049-
Updater.FindValueForBlock(BB), DeferredLoads);
1029+
auto GetCurVal = [&]() -> Value * {
1030+
if (Value *CurVal = Updater.FindValueForBlock(BB))
1031+
return CurVal;
1032+
1033+
// If the current value in the basic block is not yet known, insert a
1034+
// placeholder that we will replace later.
1035+
IRBuilder<> Builder(I);
1036+
auto *Placeholder = cast<Instruction>(Builder.CreateFreeze(
1037+
PoisonValue::get(VectorTy), "promotealloca.placeholder"));
1038+
Placeholders.push_back(Placeholder);
1039+
Updater.AddAvailableValue(BB, Placeholder);
1040+
return Placeholder;
1041+
};
1042+
1043+
Value *Result =
1044+
promoteAllocaUserToVector(I, *DL, VectorTy, VecStoreSize, ElementSize,
1045+
TransferInfo, GEPVectorIdx, GetCurVal);
10501046
if (Result)
10511047
Updater.AddAvailableValue(BB, Result);
10521048
});
10531049

1054-
// Then handle deferred loads.
1055-
forEachWorkListItem(DeferredLoads, [&](Instruction *I) {
1056-
SmallVector<LoadInst *, 0> NewDLs;
1057-
BasicBlock *BB = I->getParent();
1058-
// On the second pass, we use GetValueInMiddleOfBlock to guarantee we always
1059-
// get a value, inserting PHIs as needed.
1060-
Value *Result = promoteAllocaUserToVector(
1061-
I, *DL, VectorTy, VecStoreSize, ElementSize, TransferInfo, GEPVectorIdx,
1062-
Updater.GetValueInMiddleOfBlock(I->getParent()), NewDLs);
1063-
if (Result)
1064-
Updater.AddAvailableValue(BB, Result);
1065-
assert(NewDLs.empty() && "No more deferred loads should be queued!");
1066-
});
1050+
// Now fixup the placeholders.
1051+
for (Instruction *Placeholder : Placeholders) {
1052+
Placeholder->replaceAllUsesWith(
1053+
Updater.GetValueInMiddleOfBlock(Placeholder->getParent()));
1054+
Placeholder->eraseFromParent();
1055+
}
10671056

10681057
// Delete all instructions. On the first pass, new dummy loads may have been
10691058
// added so we need to collect them too.
10701059
DenseSet<Instruction *> InstsToDelete(WorkList.begin(), WorkList.end());
1071-
InstsToDelete.insert_range(DeferredLoads);
10721060
for (Instruction *I : InstsToDelete) {
10731061
assert(I->use_empty());
10741062
I->eraseFromParent();

0 commit comments

Comments
 (0)