@@ -344,9 +344,12 @@ unsigned GCNTTIImpl::getMinVectorRegisterBitWidth() const {
344344unsigned  GCNTTIImpl::getMaximumVF (unsigned  ElemWidth, unsigned  Opcode) const  {
345345  if  (Opcode == Instruction::Load || Opcode == Instruction::Store)
346346    return  32  * 4  / ElemWidth;
347-   return  (ElemWidth == 16  && ST->has16BitInsts ()) ? 2 
348-        : (ElemWidth == 32  && ST->hasPackedFP32Ops ()) ? 2 
349-        : 1 ;
347+   //  For a given width return the max 0number of elements that can be combined
348+   //  into a wider bit value:
349+   return  ElemWidth == 8                                 ? 4 
350+          : (ElemWidth == 16  && ST->has16BitInsts ())    ? 2 
351+          : (ElemWidth == 32  && ST->hasPackedFP32Ops ()) ? 2 
352+                                                        : 1 ;
350353}
351354
352355unsigned  GCNTTIImpl::getLoadVectorFactor (unsigned  VF, unsigned  LoadSize,
@@ -1422,3 +1425,31 @@ void GCNTTIImpl::collectKernelLaunchBounds(
14221425  LB.push_back ({" amdgpu-waves-per-eu[0]"  , WavesPerEU.first });
14231426  LB.push_back ({" amdgpu-waves-per-eu[1]"  , WavesPerEU.second });
14241427}
1428+ 
1429+ InstructionCost GCNTTIImpl::getMemoryOpCost (unsigned  Opcode, Type *Src,
1430+                                             Align Alignment,
1431+                                             unsigned  AddressSpace,
1432+                                             TTI::TargetCostKind CostKind,
1433+                                             TTI::OperandValueInfo OpInfo,
1434+                                             const  Instruction *I) const  {
1435+   if  (VectorType *VecTy = dyn_cast<VectorType>(Src)) {
1436+     if  ((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
1437+         VecTy->getElementType ()->isIntegerTy (8 )) {
1438+       return  ((DL.getTypeSizeInBits (VecTy) - 1 ) /
1439+               getLoadStoreVecRegBitWidth (AddressSpace)) +
1440+              1 ;
1441+     }
1442+   }
1443+   return  BaseT::getMemoryOpCost (Opcode, Src, Alignment, AddressSpace, CostKind,
1444+                                 OpInfo, I);
1445+ }
1446+ 
1447+ unsigned  GCNTTIImpl::getNumberOfParts (Type *Tp) const  {
1448+   if  (VectorType *VecTy = dyn_cast<VectorType>(Tp)) {
1449+     if  (VecTy->getElementType ()->isIntegerTy (8 )) {
1450+       unsigned  ElementCount = VecTy->getElementCount ().getFixedValue ();
1451+       return  ((ElementCount - 1 ) / 4 ) + 1 ;
1452+     }
1453+   }
1454+   return  BaseT::getNumberOfParts (Tp);
1455+ }
0 commit comments