File tree Expand file tree Collapse file tree 2 files changed +26
-0
lines changed Expand file tree Collapse file tree 2 files changed +26
-0
lines changed Original file line number Diff line number Diff line change @@ -313,6 +313,24 @@ bool GCNTTIImpl::hasBranchDivergence(const Function *F) const {
313313 return !F || !ST->isSingleLaneExecution (*F);
314314}
315315
316+ unsigned GCNTTIImpl::getRegUsageForType (Type *Ty) {
317+ if (auto *VT = dyn_cast<FixedVectorType>(Ty)) {
318+ if (auto *PT = dyn_cast<PointerType>(VT->getElementType ())) {
319+ switch (PT->getAddressSpace ()) {
320+ // Assume that the resource parts of the vector being asked about are the
321+ // same.
322+ case AMDGPUAS::BUFFER_FAT_POINTER:
323+ return 4 + VT->getNumElements ();
324+ case AMDGPUAS::BUFFER_STRIDED_POINTER:
325+ return 4 + 2 * VT->getNumElements ();
326+ default :
327+ break ;
328+ }
329+ }
330+ }
331+ return BaseT::getRegUsageForType (Ty);
332+ }
333+
316334unsigned GCNTTIImpl::getNumberOfRegisters (unsigned RCID) const {
317335 // NB: RCID is not an RCID. In fact it is 0 or 1 for scalar or vector
318336 // registers. See getRegisterClassForType for the implementation.
Original file line number Diff line number Diff line change @@ -113,6 +113,14 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
113113 void getPeelingPreferences (Loop *L, ScalarEvolution &SE,
114114 TTI::PeelingPreferences &PP);
115115
116+ // Vectorization will query for the number of registers needed for
117+ // <N x ptr addrspace(7/9)> and the default implementation will cause crashes,
118+ // so override it here. This also lets us account for the fact that, in the
119+ // context of loop vectorization (which is what uses this API), the number of
120+ // registers needed for fat pointers is lower because they'll share a resource
121+ // part.
122+ unsigned getRegUsageForType (Type *Ty);
123+
116124 TTI::PopcntSupportKind getPopcntSupport (unsigned TyWidth) {
117125 assert (isPowerOf2_32 (TyWidth) && " Ty width must be power of 2" );
118126 return TTI::PSK_FastHardware;
You can’t perform that action at this time.
0 commit comments