@@ -818,39 +818,6 @@ static BasicBlock::iterator skipToNonAllocaInsertPt(BasicBlock &BB,
818818 return I;
819819}
820820
821- // / Get the underlying type of a homogeneous aggregate type, or nullptr if the
822- // / type is non-homogeneous.
823- static Type *getHomogeneousType (Type *Ty) {
824- Type *ElemTy = nullptr ;
825- SmallVector<Type *> WorkList;
826- WorkList.push_back (Ty);
827- while (!WorkList.empty ()) {
828- Type *CurTy = WorkList.pop_back_val ();
829-
830- // Check if the current type is an aggregate type.
831- if (auto *VectorTy = dyn_cast<FixedVectorType>(CurTy)) {
832- WorkList.push_back (VectorTy->getElementType ());
833- continue ;
834- }
835- if (auto *ArrayTy = dyn_cast<ArrayType>(CurTy)) {
836- WorkList.push_back (ArrayTy->getElementType ());
837- continue ;
838- }
839- if (auto *StructTy = dyn_cast<StructType>(CurTy)) {
840- WorkList.append (StructTy->element_begin (), StructTy->element_end ());
841- continue ;
842- }
843-
844- // If not, it must be the same as all other non-aggregate types.
845- if (!ElemTy)
846- ElemTy = CurTy;
847- else if (ElemTy != CurTy)
848- return nullptr ;
849- }
850-
851- return ElemTy;
852- }
853-
854821// FIXME: Should try to pick the most likely to be profitable allocas first.
855822bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector (AllocaInst &Alloca) {
856823 LLVM_DEBUG (dbgs () << " Trying to promote to vector: " << Alloca << ' \n ' );
@@ -861,42 +828,42 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
861828 }
862829
863830 Type *AllocaTy = Alloca.getAllocatedType ();
864- Type *ElemTy = getHomogeneousType (AllocaTy);
865-
866- if (!ElemTy || !VectorType::isValidElementType (ElemTy)) {
867- LLVM_DEBUG (dbgs () << " Cannot convert type to vector\n " );
868- return false ;
869- }
831+ auto *VectorTy = dyn_cast<FixedVectorType>(AllocaTy);
832+ if (auto *ArrayTy = dyn_cast<ArrayType>(AllocaTy)) {
833+ uint64_t NumElems = 1 ;
834+ Type *ElemTy;
835+ do {
836+ NumElems *= ArrayTy->getNumElements ();
837+ ElemTy = ArrayTy->getElementType ();
838+ } while ((ArrayTy = dyn_cast<ArrayType>(ElemTy)));
839+
840+ // Check for array of vectors
841+ auto *InnerVectorTy = dyn_cast<FixedVectorType>(ElemTy);
842+ if (InnerVectorTy) {
843+ NumElems *= InnerVectorTy->getNumElements ();
844+ ElemTy = InnerVectorTy->getElementType ();
845+ }
870846
871- unsigned ElementSizeInBits = DL->getTypeSizeInBits (ElemTy);
872- if (ElementSizeInBits != DL->getTypeAllocSizeInBits (ElemTy)) {
873- LLVM_DEBUG (dbgs () << " Cannot convert to vector if the allocation size "
874- " does not match the type's size\n " );
875- return false ;
876- }
877- unsigned ElementSize = ElementSizeInBits / 8 ;
878- if (ElementSize == 0 ) {
879- LLVM_DEBUG (dbgs () << " Cannot create vector of zero-sized elements\n " );
880- return false ;
847+ if (VectorType::isValidElementType (ElemTy) && NumElems > 0 ) {
848+ unsigned ElementSize = DL->getTypeSizeInBits (ElemTy) / 8 ;
849+ if (ElementSize > 0 ) {
850+ unsigned AllocaSize = DL->getTypeStoreSize (AllocaTy);
851+ // Expand vector if required to match padding of inner type,
852+ // i.e. odd size subvectors.
853+ // Storage size of new vector must match that of alloca for correct
854+ // behaviour of byte offsets and GEP computation.
855+ if (NumElems * ElementSize != AllocaSize)
856+ NumElems = AllocaSize / ElementSize;
857+ if (NumElems > 0 && (AllocaSize % ElementSize) == 0 )
858+ VectorTy = FixedVectorType::get (ElemTy, NumElems);
859+ }
860+ }
881861 }
882862
883- // Calculate the size of the corresponding vector, accounting for padding of
884- // inner types, e.g., odd-sized subvectors. Storage size of new vector must
885- // match that of alloca for correct behaviour of byte offsets and GEP
886- // computation.
887- unsigned AllocaSize = DL->getTypeStoreSize (AllocaTy);
888- unsigned NumElems = AllocaSize / ElementSize;
889- if (NumElems == 0 ) {
890- LLVM_DEBUG (dbgs () << " Cannot vectorize an empty aggregate type\n " );
891- return false ;
892- }
893- if (NumElems * ElementSize != AllocaSize) {
894- LLVM_DEBUG (
895- dbgs () << " Cannot convert type into vector of the same size\n " );
863+ if (!VectorTy) {
864+ LLVM_DEBUG (dbgs () << " Cannot convert type to vector\n " );
896865 return false ;
897866 }
898- auto *VectorTy = FixedVectorType::get (ElemTy, NumElems);
899- assert (VectorTy && " Failed to create vector type." );
900867
901868 const unsigned MaxElements =
902869 (MaxVectorRegs * 32 ) / DL->getTypeSizeInBits (VectorTy->getElementType ());
@@ -928,6 +895,15 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
928895
929896 LLVM_DEBUG (dbgs () << " Attempting promotion to: " << *VectorTy << " \n " );
930897
898+ Type *VecEltTy = VectorTy->getElementType ();
899+ unsigned ElementSizeInBits = DL->getTypeSizeInBits (VecEltTy);
900+ if (ElementSizeInBits != DL->getTypeAllocSizeInBits (VecEltTy)) {
901+ LLVM_DEBUG (dbgs () << " Cannot convert to vector if the allocation size "
902+ " does not match the type's size\n " );
903+ return false ;
904+ }
905+ unsigned ElementSize = ElementSizeInBits / 8 ;
906+ assert (ElementSize > 0 );
931907 for (auto *U : Uses) {
932908 Instruction *Inst = cast<Instruction>(U->getUser ());
933909
@@ -967,7 +943,7 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
967943 if (auto *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
968944 // If we can't compute a vector index from this GEP, then we can't
969945 // promote this alloca to vector.
970- Value *Index = GEPToVectorIndex (GEP, &Alloca, ElemTy , *DL, NewGEPInsts);
946+ Value *Index = GEPToVectorIndex (GEP, &Alloca, VecEltTy , *DL, NewGEPInsts);
971947 if (!Index)
972948 return RejectUser (Inst, " cannot compute vector index for GEP" );
973949
0 commit comments