Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 36 additions & 5 deletions llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -385,13 +385,42 @@ static bool isSupportedMemset(MemSetInst *I, AllocaInst *AI,
match(I->getOperand(2), m_SpecificInt(Size)) && !I->isVolatile();
}

static bool hasVariableOffset(GetElementPtrInst *GEP) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks like you're pre-filtering for a scenario that could happen in the map lookup. You should directly detect when this delete happens rather than assuming the set of cases it could

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It will make the code future proof. Will make the change

Copy link
Contributor Author

@sgundapa sgundapa Jan 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks like you're pre-filtering for a scenario that could happen in the map lookup. You should directly detect when this delete happens rather than assuming the set of cases it could

The deletion of this instuction happens after the transformation is done.
In the attached lit test, the dangling use of instruction in cached Index is result of replaceAllUsesWith() in promoteAllocaUserToVector() . Please advise. I will make this code generic (not for a specific scenario).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One possible solution is to track these values with WeakVH. Then you'll just have to check if the value was already deleted before deleting it. This appears to be how SROA handles it, (see how DeadInsts is a vector of WeakVH)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks Matt. I will look in to weakVH

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

WeakVH might not be useful here as the Value still exists in the code. We can do a DCE but I am not sure if it guaranteed if the Value has no uses in the code.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can easily check if the operation is dead. This pass is essentially doing the same thing as SROA, which checks isInstructionTriviallyDead

// Iterate over all operands starting from the first index (index 0 is the
// base pointer).
for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i) {
Value *Op = GEP->getOperand(i);
// Check if the operand is not a constant integer value
if (!isa<ConstantInt>(Op)) {
return true;
}
}
return false;
}

static Value *
calculateVectorIndex(Value *Ptr,
const std::map<GetElementPtrInst *, Value *> &GEPIdx) {
calculateVectorIndex(Value *Ptr, std::map<GetElementPtrInst *, Value *> &GEPIdx,
const DataLayout &DL) {
auto *GEP = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts());
if (!GEP)
return ConstantInt::getNullValue(Type::getInt32Ty(Ptr->getContext()));

// If the index of this GEP is a variable that might be deleted,
// update the index with its latest value. We've already handled any GEPs
// with unsupported index types(in GEPToVectorIndex) at this point.
if (hasVariableOffset(GEP)) {
unsigned BW = DL.getIndexTypeSizeInBits(GEP->getType());
SmallMapVector<Value *, APInt, 4> VarOffsets;
APInt ConstOffset(BW, 0);
if (GEP->collectOffset(DL, BW, VarOffsets, ConstOffset)) {
if (VarOffsets.size() == 1 && ConstOffset.isZero()) {
auto *UpdatedValue = VarOffsets.front().first;
GEPIdx[GEP] = UpdatedValue;
return UpdatedValue;
}
}
}

auto I = GEPIdx.find(GEP);
assert(I != GEPIdx.end() && "Must have entry for GEP!");
return I->second;
Expand Down Expand Up @@ -496,7 +525,7 @@ static Value *promoteAllocaUserToVector(
}

Value *Index = calculateVectorIndex(
cast<LoadInst>(Inst)->getPointerOperand(), GEPVectorIdx);
cast<LoadInst>(Inst)->getPointerOperand(), GEPVectorIdx, DL);

// We're loading the full vector.
Type *AccessTy = Inst->getType();
Expand Down Expand Up @@ -552,7 +581,8 @@ static Value *promoteAllocaUserToVector(
// to know the current value. If this is a store of a single element, we
// need to know the value.
StoreInst *SI = cast<StoreInst>(Inst);
Value *Index = calculateVectorIndex(SI->getPointerOperand(), GEPVectorIdx);
Value *Index =
calculateVectorIndex(SI->getPointerOperand(), GEPVectorIdx, DL);
Value *Val = SI->getValueOperand();

// We're storing the full vector, we can handle this without knowing CurVal.
Expand Down Expand Up @@ -850,7 +880,8 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
if (Ptr != &Alloca && !GEPVectorIdx.count(GEP))
return nullptr;

return dyn_cast<ConstantInt>(calculateVectorIndex(Ptr, GEPVectorIdx));
return dyn_cast<ConstantInt>(
calculateVectorIndex(Ptr, GEPVectorIdx, *DL));
};

unsigned OpNum = U->getOperandNo();
Expand Down
28 changes: 28 additions & 0 deletions llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,34 @@ define amdgpu_vs void @promote_load_from_store_aggr() #0 {
ret void
}

%Block4 = type { [2 x i32], i32 }
@block4 = external addrspace(1) global %Block4
%gl_PV = type { <4 x i32>, i32, [1 x i32], [1 x i32] }
@pv1 = external addrspace(1) global %gl_PV

; This should should not crash on variable offset that can be
; optimized out (variable foo4 in the test)
define amdgpu_vs void @promote_load_from_store_aggr_varoff() local_unnamed_addr {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
define amdgpu_vs void @promote_load_from_store_aggr_varoff() local_unnamed_addr {
define amdgpu_vs void @promote_load_from_store_aggr_varoff() {

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure

; CHECK-LABEL: @promote_load_from_store_aggr_varoff(
; CHECK-NEXT: [[FOO3_UNPACK2:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @block4, i64 8), align 4
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x i32> undef, i32 [[FOO3_UNPACK2]], i32 2
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i32> [[TMP1]], i32 [[FOO3_UNPACK2]]
; CHECK-NEXT: [[FOO12:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 3
; CHECK-NEXT: store <4 x i32> [[FOO12]], ptr addrspace(1) @pv1, align 16
; CHECK-NEXT: ret void
;
%f1 = alloca [3 x i32], align 4, addrspace(5)
%G1 = getelementptr inbounds i8, ptr addrspace(5) %f1, i32 8
%foo3.unpack2 = load i32, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @block4, i64 8), align 4
store i32 %foo3.unpack2, ptr addrspace(5) %G1, align 4
%foo4 = load i32, ptr addrspace(5) %G1, align 4
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

give a better name to indicate this is the interesting value

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure

%foo5 = getelementptr [3 x i32], ptr addrspace(5) %f1, i32 0, i32 %foo4
%foo6 = load i32, ptr addrspace(5) %foo5, align 4
%foo12 = insertelement <4 x i32> poison, i32 %foo6, i64 3
store <4 x i32> %foo12, ptr addrspace(1) @pv1, align 16
ret void
}

define amdgpu_vs void @promote_memmove_aggr() #0 {
; CHECK-LABEL: @promote_memmove_aggr(
; CHECK-NEXT: store float 1.000000e+00, ptr addrspace(1) @pv, align 4
Expand Down
Loading