Skip to content

Commit 0e0ec4c

Browse files
committed
Revert "AMDGPU/PromoteAlloca: Simplify how deferred loads work (llvm#170510)"
This reverts commit 22a2c27. Failure on clang-hip-vega20: https://lab.llvm.org/buildbot/#/builders/123/builds/31779
1 parent de86696 commit 0e0ec4c

File tree

1 file changed

+46
-34
lines changed

1 file changed

+46
-34
lines changed

llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp

Lines changed: 46 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -502,14 +502,27 @@ static Value *promoteAllocaUserToVector(
502502
Instruction *Inst, const DataLayout &DL, FixedVectorType *VectorTy,
503503
unsigned VecStoreSize, unsigned ElementSize,
504504
DenseMap<MemTransferInst *, MemTransferInfo> &TransferInfo,
505-
std::map<GetElementPtrInst *, WeakTrackingVH> &GEPVectorIdx,
506-
function_ref<Value *()> GetCurVal) {
505+
std::map<GetElementPtrInst *, WeakTrackingVH> &GEPVectorIdx, Value *CurVal,
506+
SmallVectorImpl<LoadInst *> &DeferredLoads) {
507507
// Note: we use InstSimplifyFolder because it can leverage the DataLayout
508508
// to do more folding, especially in the case of vector splats.
509509
IRBuilder<InstSimplifyFolder> Builder(Inst->getContext(),
510510
InstSimplifyFolder(DL));
511511
Builder.SetInsertPoint(Inst);
512512

513+
const auto GetOrLoadCurrentVectorValue = [&]() -> Value * {
514+
if (CurVal)
515+
return CurVal;
516+
517+
// If the current value is not known, insert a dummy load and lower it on
518+
// the second pass.
519+
LoadInst *Dummy =
520+
Builder.CreateLoad(VectorTy, PoisonValue::get(Builder.getPtrTy()),
521+
"promotealloca.dummyload");
522+
DeferredLoads.push_back(Dummy);
523+
return Dummy;
524+
};
525+
513526
const auto CreateTempPtrIntCast = [&Builder, DL](Value *Val,
514527
Type *PtrTy) -> Value * {
515528
assert(DL.getTypeStoreSize(Val->getType()) == DL.getTypeStoreSize(PtrTy));
@@ -529,7 +542,12 @@ static Value *promoteAllocaUserToVector(
529542

530543
switch (Inst->getOpcode()) {
531544
case Instruction::Load: {
532-
Value *CurVal = GetCurVal();
545+
// Loads can only be lowered if the value is known.
546+
if (!CurVal) {
547+
DeferredLoads.push_back(cast<LoadInst>(Inst));
548+
return nullptr;
549+
}
550+
533551
Value *Index = calculateVectorIndex(
534552
cast<LoadInst>(Inst)->getPointerOperand(), GEPVectorIdx);
535553

@@ -619,7 +637,7 @@ static Value *promoteAllocaUserToVector(
619637

620638
Val = Builder.CreateBitOrPointerCast(Val, SubVecTy);
621639

622-
Value *CurVec = GetCurVal();
640+
Value *CurVec = GetOrLoadCurrentVectorValue();
623641
for (unsigned K = 0, NumElts = std::min(NumWrittenElts, NumVecElts);
624642
K < NumElts; ++K) {
625643
Value *CurIdx =
@@ -632,7 +650,8 @@ static Value *promoteAllocaUserToVector(
632650

633651
if (Val->getType() != VecEltTy)
634652
Val = Builder.CreateBitOrPointerCast(Val, VecEltTy);
635-
return Builder.CreateInsertElement(GetCurVal(), Val, Index);
653+
return Builder.CreateInsertElement(GetOrLoadCurrentVectorValue(), Val,
654+
Index);
636655
}
637656
case Instruction::Call: {
638657
if (auto *MTI = dyn_cast<MemTransferInst>(Inst)) {
@@ -654,7 +673,7 @@ static Value *promoteAllocaUserToVector(
654673
}
655674
}
656675

657-
return Builder.CreateShuffleVector(GetCurVal(), Mask);
676+
return Builder.CreateShuffleVector(GetOrLoadCurrentVectorValue(), Mask);
658677
}
659678

660679
if (auto *MSI = dyn_cast<MemSetInst>(Inst)) {
@@ -1019,44 +1038,37 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
10191038

10201039
Updater.AddAvailableValue(EntryBB, AllocaInitValue);
10211040

1022-
// First handle the initial worklist, in basic block order.
1023-
//
1024-
// Insert a placeholder whenever we need the vector value at the top of a
1025-
// basic block.
1026-
SmallVector<Instruction *> Placeholders;
1041+
// First handle the initial worklist.
1042+
SmallVector<LoadInst *, 4> DeferredLoads;
10271043
forEachWorkListItem(WorkList, [&](Instruction *I) {
10281044
BasicBlock *BB = I->getParent();
1029-
auto GetCurVal = [&]() -> Value * {
1030-
if (Value *CurVal = Updater.FindValueForBlock(BB))
1031-
return CurVal;
1032-
1033-
// If the current value in the basic block is not yet known, insert a
1034-
// placeholder that we will replace later.
1035-
IRBuilder<> Builder(I);
1036-
auto *Placeholder = cast<Instruction>(Builder.CreateFreeze(
1037-
PoisonValue::get(VectorTy), "promotealloca.placeholder"));
1038-
Placeholders.push_back(Placeholder);
1039-
Updater.AddAvailableValue(BB, Placeholder);
1040-
return Placeholder;
1041-
};
1042-
1043-
Value *Result =
1044-
promoteAllocaUserToVector(I, *DL, VectorTy, VecStoreSize, ElementSize,
1045-
TransferInfo, GEPVectorIdx, GetCurVal);
1045+
// On the first pass, we only take values that are trivially known, i.e.
1046+
// where AddAvailableValue was already called in this block.
1047+
Value *Result = promoteAllocaUserToVector(
1048+
I, *DL, VectorTy, VecStoreSize, ElementSize, TransferInfo, GEPVectorIdx,
1049+
Updater.FindValueForBlock(BB), DeferredLoads);
10461050
if (Result)
10471051
Updater.AddAvailableValue(BB, Result);
10481052
});
10491053

1050-
// Now fixup the placeholders.
1051-
for (Instruction *Placeholder : Placeholders) {
1052-
Placeholder->replaceAllUsesWith(
1053-
Updater.GetValueInMiddleOfBlock(Placeholder->getParent()));
1054-
Placeholder->eraseFromParent();
1055-
}
1054+
// Then handle deferred loads.
1055+
forEachWorkListItem(DeferredLoads, [&](Instruction *I) {
1056+
SmallVector<LoadInst *, 0> NewDLs;
1057+
BasicBlock *BB = I->getParent();
1058+
// On the second pass, we use GetValueInMiddleOfBlock to guarantee we always
1059+
// get a value, inserting PHIs as needed.
1060+
Value *Result = promoteAllocaUserToVector(
1061+
I, *DL, VectorTy, VecStoreSize, ElementSize, TransferInfo, GEPVectorIdx,
1062+
Updater.GetValueInMiddleOfBlock(I->getParent()), NewDLs);
1063+
if (Result)
1064+
Updater.AddAvailableValue(BB, Result);
1065+
assert(NewDLs.empty() && "No more deferred loads should be queued!");
1066+
});
10561067

10571068
// Delete all instructions. On the first pass, new dummy loads may have been
10581069
// added so we need to collect them too.
10591070
DenseSet<Instruction *> InstsToDelete(WorkList.begin(), WorkList.end());
1071+
InstsToDelete.insert_range(DeferredLoads);
10601072
for (Instruction *I : InstsToDelete) {
10611073
assert(I->use_empty());
10621074
I->eraseFromParent();

0 commit comments

Comments
 (0)