Skip to content

Commit fa727bc

Browse files
committed
Rework the patch, v3
1 parent 7303934 commit fa727bc

File tree

2 files changed

+26
-0
lines changed

2 files changed

+26
-0
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,24 @@ bool GCNTTIImpl::hasBranchDivergence(const Function *F) const {
313313
return !F || !ST->isSingleLaneExecution(*F);
314314
}
315315

316+
unsigned GCNTTIImpl::getRegUsageForType(Type *Ty) {
317+
if (auto *VT = dyn_cast<FixedVectorType>(Ty)) {
318+
if (auto *PT = dyn_cast<PointerType>(VT->getElementType())) {
319+
switch (PT->getAddressSpace()) {
320+
// Assume that the resource parts of the vector being asked about are the
321+
// same.
322+
case AMDGPUAS::BUFFER_FAT_POINTER:
323+
return 4 + VT->getNumElements();
324+
case AMDGPUAS::BUFFER_STRIDED_POINTER:
325+
return 4 + 2 * VT->getNumElements();
326+
default:
327+
break;
328+
}
329+
}
330+
}
331+
return BaseT::getRegUsageForType(Ty);
332+
}
333+
316334
unsigned GCNTTIImpl::getNumberOfRegisters(unsigned RCID) const {
317335
// NB: RCID is not an RCID. In fact it is 0 or 1 for scalar or vector
318336
// registers. See getRegisterClassForType for the implementation.

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,14 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
113113
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
114114
TTI::PeelingPreferences &PP);
115115

116+
// Vectorization will query for the number of registers needed for
117+
// <N x ptr addrspace(7/9)> and the default implementation will cause crashes,
118+
// so override it here. This also lets us account for the fact that, in the
119+
// context of loop vectorization (which is what uses this API), the number of
120+
// registers needed for fat pointers is lower because they'll share a resource
121+
// part.
122+
unsigned getRegUsageForType(Type *Ty);
123+
116124
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
117125
assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
118126
return TTI::PSK_FastHardware;

0 commit comments

Comments
 (0)