@@ -605,7 +605,7 @@ namespace {
605605// / 1. Recursing into structs (and arrays that don't share a memory layout with
606606// / vectors) since the intrinsics can't handle complex types.
607607// / 2. Converting arrays of non-aggregate, byte-sized types into their
608- // / correspondinng vectors
608+ // / corresponding vectors
609609// / 3. Bitcasting unsupported types, namely overly-long scalars and byte
610610// / vectors, into vectors of supported types.
611611// / 4. Splitting up excessively long reads/writes into multiple operations.
@@ -727,10 +727,7 @@ Type *LegalizeBufferContentTypesVisitor::legalNonAggregateFor(Type *T) {
727727 // Implicitly zero-extend to the next byte if needed
728728 if (!DL.typeSizeEqualsStoreSize (T))
729729 T = IRB.getIntNTy (Size.getFixedValue ());
730- Type *ElemTy = T;
731- if (auto *VT = dyn_cast<FixedVectorType>(T)) {
732- ElemTy = VT->getElementType ();
733- }
730+ Type *ElemTy = T->getScalarType ();
734731 if (isa<PointerType, ScalableVectorType>(ElemTy))
735732 // Pointers are always big enough, and we'll let scalable vectors through to
736733 // fail in codegen.
@@ -758,11 +755,11 @@ Type *LegalizeBufferContentTypesVisitor::legalNonAggregateFor(Type *T) {
758755Value *LegalizeBufferContentTypesVisitor::makeLegalNonAggregate (
759756 Value *V, Type *TargetType, const Twine &Name) {
760757 Type *SourceType = V->getType ();
761- if (DL. getTypeSizeInBits (SourceType) ! = DL.getTypeSizeInBits (TargetType)) {
762- Type *ShortScalarTy =
763- IRB. getIntNTy (DL. getTypeSizeInBits (SourceType). getFixedValue ());
764- Type *ByteScalarTy =
765- IRB.getIntNTy (DL. getTypeSizeInBits (TargetType) .getFixedValue ());
758+ TypeSize SourceSize = DL.getTypeSizeInBits (SourceType);
759+ TypeSize TargetSize = DL. getTypeSizeInBits (TargetType);
760+ if (SourceSize != TargetSize) {
761+ Type *ShortScalarTy = IRB. getIntNTy (SourceSize. getFixedValue ());
762+ Type *ByteScalarTy = IRB.getIntNTy (TargetSize .getFixedValue ());
766763 Value *AsScalar = IRB.CreateBitCast (V, ShortScalarTy, Name + " .as.scalar" );
767764 Value *Zext = IRB.CreateZExt (AsScalar, ByteScalarTy, Name + " .zext" );
768765 V = Zext;
@@ -774,11 +771,11 @@ Value *LegalizeBufferContentTypesVisitor::makeLegalNonAggregate(
774771Value *LegalizeBufferContentTypesVisitor::makeIllegalNonAggregate (
775772 Value *V, Type *OrigType, const Twine &Name) {
776773 Type *LegalType = V->getType ();
777- if (DL. getTypeSizeInBits (LegalType) ! = DL.getTypeSizeInBits (OrigType)) {
778- Type *ShortScalarTy =
779- IRB. getIntNTy (DL. getTypeSizeInBits (OrigType). getFixedValue ());
780- Type *ByteScalarTy =
781- IRB.getIntNTy (DL. getTypeSizeInBits (LegalType) .getFixedValue ());
774+ TypeSize LegalSize = DL.getTypeSizeInBits (LegalType);
775+ TypeSize OrigSize = DL. getTypeSizeInBits (OrigType);
776+ if (LegalSize != OrigSize) {
777+ Type *ShortScalarTy = IRB. getIntNTy (OrigSize. getFixedValue ());
778+ Type *ByteScalarTy = IRB.getIntNTy (LegalSize .getFixedValue ());
782779 Value *AsScalar = IRB.CreateBitCast (V, ByteScalarTy, Name + " .bytes.cast" );
783780 Value *Trunc = IRB.CreateTrunc (AsScalar, ShortScalarTy, Name + " .trunc" );
784781 return IRB.CreateBitCast (Trunc, OrigType, Name + " .orig" );
@@ -791,6 +788,8 @@ Type *LegalizeBufferContentTypesVisitor::intrinsicTypeFor(Type *LegalType) {
791788 if (!VT)
792789 return LegalType;
793790 Type *ET = VT->getElementType ();
791+ // Explicitly return the element type of 1-element vectors because the
792+ // underlying intrinsics don't like <1 x T> even though it's a synonym for T.
794793 if (VT->getNumElements () == 1 )
795794 return ET;
796795 if (DL.getTypeSizeInBits (LegalType) == 96 && DL.getTypeSizeInBits (ET) < 32 )
@@ -917,9 +916,8 @@ bool LegalizeBufferContentTypesVisitor::visitLoadImpl(
917916 if (auto *AT = dyn_cast<ArrayType>(PartType)) {
918917 Type *ElemTy = AT->getElementType ();
919918 TypeSize AllocSize = DL.getTypeAllocSize (ElemTy);
920- if (!(ElemTy->isSingleValueType () &&
921- DL.getTypeSizeInBits (ElemTy) == 8 * AllocSize &&
922- !ElemTy->isVectorTy ())) {
919+ if (!ElemTy->isSingleValueType () ||
920+ DL.getTypeSizeInBits (ElemTy) != 8 * AllocSize || ElemTy->isVectorTy ()) {
923921 bool Changed = false ;
924922 for (auto I : llvm::iota_range<uint32_t >(0 , AT->getNumElements (),
925923 /* Inclusive=*/ false )) {
@@ -963,9 +961,7 @@ bool LegalizeBufferContentTypesVisitor::visitLoadImpl(
963961 // type will be a vector (ex. an i256 load will have LegalType = <8 x i32>).
964962 // But if we're already a scalar (which can happen if we're splitting up a
965963 // struct), the element type will be the legal type itself.
966- Type *ElemType = LegalType;
967- if (auto *VT = dyn_cast<FixedVectorType>(LegalType))
968- ElemType = VT->getElementType ();
964+ Type *ElemType = LegalType->getScalarType ();
969965 unsigned ElemBytes = DL.getTypeStoreSize (ElemType);
970966 AAMDNodes AANodes = OrigLI.getAAMetadata ();
971967 if (IsAggPart && Slices.empty ())
@@ -986,10 +982,8 @@ bool LegalizeBufferContentTypesVisitor::visitLoadImpl(
986982 copyMetadataForLoad (*NewLI, OrigLI);
987983 NewLI->setAAMetadata (
988984 AANodes.adjustForAccess (ByteOffset, LoadableType, DL));
989- if (OrigLI.isAtomic ())
990- NewLI->setAtomic (OrigLI.getOrdering (), OrigLI.getSyncScopeID ());
991- if (OrigLI.isVolatile ())
992- NewLI->setVolatile (OrigLI.isVolatile ());
985+ NewLI->setAtomic (OrigLI.getOrdering (), OrigLI.getSyncScopeID ());
986+ NewLI->setVolatile (OrigLI.isVolatile ());
993987 Value *Loaded = IRB.CreateBitCast (NewLI, SliceType,
994988 NewLI->getName () + " .from.loadable" );
995989 LoadsRes = insertSlice (LoadsRes, Loaded, S, Name);
@@ -1042,9 +1036,8 @@ std::pair<bool, bool> LegalizeBufferContentTypesVisitor::visitStoreImpl(
10421036 if (auto *AT = dyn_cast<ArrayType>(PartType)) {
10431037 Type *ElemTy = AT->getElementType ();
10441038 TypeSize AllocSize = DL.getTypeAllocSize (ElemTy);
1045- if (!(ElemTy->isSingleValueType () &&
1046- DL.getTypeSizeInBits (ElemTy) == 8 * AllocSize &&
1047- !ElemTy->isVectorTy ())) {
1039+ if (!ElemTy->isSingleValueType () ||
1040+ DL.getTypeSizeInBits (ElemTy) != 8 * AllocSize || ElemTy->isVectorTy ()) {
10481041 bool Changed = false ;
10491042 for (auto I : llvm::iota_range<uint32_t >(0 , AT->getNumElements (),
10501043 /* Inclusive=*/ false )) {
0 commit comments