@@ -818,6 +818,39 @@ static BasicBlock::iterator skipToNonAllocaInsertPt(BasicBlock &BB,
818818 return I;
819819}
820820
821+ // / Get the underlying type of a homogeneous aggregate type, or nullptr if the
822+ // / type is non-homogeneous.
823+ static Type *getHomogeneousType (Type *Ty) {
824+ Type *ElemTy = nullptr ;
825+ SmallVector<Type *> WorkList;
826+ WorkList.push_back (Ty);
827+ while (!WorkList.empty ()) {
828+ Type *CurTy = WorkList.pop_back_val ();
829+
830+ // Check if the current type is an aggregate type.
831+ if (auto *VectorTy = dyn_cast<FixedVectorType>(CurTy)) {
832+ WorkList.push_back (VectorTy->getElementType ());
833+ continue ;
834+ }
835+ if (auto *ArrayTy = dyn_cast<ArrayType>(CurTy)) {
836+ WorkList.push_back (ArrayTy->getElementType ());
837+ continue ;
838+ }
839+ if (auto *StructTy = dyn_cast<StructType>(CurTy)) {
840+ WorkList.append (StructTy->element_begin (), StructTy->element_end ());
841+ continue ;
842+ }
843+
844+ // If not, it must be the same as all other non-aggregate types.
845+ if (!ElemTy)
846+ ElemTy = CurTy;
847+ else if (ElemTy != CurTy)
848+ return nullptr ;
849+ }
850+
851+ return ElemTy;
852+ }
853+
821854// FIXME: Should try to pick the most likely to be profitable allocas first.
822855bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector (AllocaInst &Alloca) {
823856 LLVM_DEBUG (dbgs () << " Trying to promote to vector: " << Alloca << ' \n ' );
@@ -828,42 +861,42 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
828861 }
829862
830863 Type *AllocaTy = Alloca.getAllocatedType ();
831- auto *VectorTy = dyn_cast<FixedVectorType>(AllocaTy);
832- if (auto *ArrayTy = dyn_cast<ArrayType>(AllocaTy)) {
833- uint64_t NumElems = 1 ;
834- Type *ElemTy;
835- do {
836- NumElems *= ArrayTy->getNumElements ();
837- ElemTy = ArrayTy->getElementType ();
838- } while ((ArrayTy = dyn_cast<ArrayType>(ElemTy)));
839-
840- // Check for array of vectors
841- auto *InnerVectorTy = dyn_cast<FixedVectorType>(ElemTy);
842- if (InnerVectorTy) {
843- NumElems *= InnerVectorTy->getNumElements ();
844- ElemTy = InnerVectorTy->getElementType ();
845- }
864+ Type *ElemTy = getHomogeneousType (AllocaTy);
846865
847- if (VectorType::isValidElementType (ElemTy) && NumElems > 0 ) {
848- unsigned ElementSize = DL->getTypeSizeInBits (ElemTy) / 8 ;
849- if (ElementSize > 0 ) {
850- unsigned AllocaSize = DL->getTypeStoreSize (AllocaTy);
851- // Expand vector if required to match padding of inner type,
852- // i.e. odd size subvectors.
853- // Storage size of new vector must match that of alloca for correct
854- // behaviour of byte offsets and GEP computation.
855- if (NumElems * ElementSize != AllocaSize)
856- NumElems = AllocaSize / ElementSize;
857- if (NumElems > 0 && (AllocaSize % ElementSize) == 0 )
858- VectorTy = FixedVectorType::get (ElemTy, NumElems);
859- }
860- }
866+ if (!ElemTy || !VectorType::isValidElementType (ElemTy)) {
867+ LLVM_DEBUG (dbgs () << " Cannot convert type to vector\n " );
868+ return false ;
861869 }
862870
863- if (!VectorTy) {
864- LLVM_DEBUG (dbgs () << " Cannot convert type to vector\n " );
871+ unsigned ElementSizeInBits = DL->getTypeSizeInBits (ElemTy);
872+ if (ElementSizeInBits != DL->getTypeAllocSizeInBits (ElemTy)) {
873+ LLVM_DEBUG (dbgs () << " Cannot convert to vector if the allocation size "
874+ " does not match the type's size\n " );
875+ return false ;
876+ }
877+ unsigned ElementSize = ElementSizeInBits / 8 ;
878+ if (ElementSize == 0 ) {
879+ LLVM_DEBUG (dbgs () << " Cannot create vector of zero-sized elements\n " );
880+ return false ;
881+ }
882+
883+ // Calculate the size of the corresponding vector, accounting for padding of
884+ // inner types, e.g., odd-sized subvectors. Storage size of new vector must
885+ // match that of alloca for correct behaviour of byte offsets and GEP
886+ // computation.
887+ unsigned AllocaSize = DL->getTypeStoreSize (AllocaTy);
888+ unsigned NumElems = AllocaSize / ElementSize;
889+ if (NumElems == 0 ) {
890+ LLVM_DEBUG (dbgs () << " Cannot vectorize an empty aggregate type\n " );
865891 return false ;
866892 }
893+ if (NumElems * ElementSize != AllocaSize) {
894+ LLVM_DEBUG (
895+ dbgs () << " Cannot convert type into vector of the same size\n " );
896+ return false ;
897+ }
898+ auto *VectorTy = FixedVectorType::get (ElemTy, NumElems);
899+ assert (VectorTy && " Failed to create vector type." );
867900
868901 const unsigned MaxElements =
869902 (MaxVectorRegs * 32 ) / DL->getTypeSizeInBits (VectorTy->getElementType ());
@@ -895,15 +928,6 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
895928
896929 LLVM_DEBUG (dbgs () << " Attempting promotion to: " << *VectorTy << " \n " );
897930
898- Type *VecEltTy = VectorTy->getElementType ();
899- unsigned ElementSizeInBits = DL->getTypeSizeInBits (VecEltTy);
900- if (ElementSizeInBits != DL->getTypeAllocSizeInBits (VecEltTy)) {
901- LLVM_DEBUG (dbgs () << " Cannot convert to vector if the allocation size "
902- " does not match the type's size\n " );
903- return false ;
904- }
905- unsigned ElementSize = ElementSizeInBits / 8 ;
906- assert (ElementSize > 0 );
907931 for (auto *U : Uses) {
908932 Instruction *Inst = cast<Instruction>(U->getUser ());
909933
@@ -943,7 +967,7 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
943967 if (auto *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
944968 // If we can't compute a vector index from this GEP, then we can't
945969 // promote this alloca to vector.
946- Value *Index = GEPToVectorIndex (GEP, &Alloca, VecEltTy , *DL, NewGEPInsts);
970+ Value *Index = GEPToVectorIndex (GEP, &Alloca, ElemTy , *DL, NewGEPInsts);
947971 if (!Index)
948972 return RejectUser (Inst, " cannot compute vector index for GEP" );
949973
0 commit comments