diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 45d3d493a9e68..b9d332b590368 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -2961,6 +2961,7 @@ class AllocaSliceRewriter : public InstVisitor { isa(NewAI.getAllocatedType()) ? cast(NewAI.getAllocatedType())->getElementType() : Type::getInt8Ty(NewAI.getContext()); + unsigned AllocatedEltTySize = DL.getTypeSizeInBits(AllocatedEltTy); // Helper to check if a type is // 1. A fixed vector type @@ -2991,10 +2992,17 @@ class AllocaSliceRewriter : public InstVisitor { // Do not handle the case if // 1. The store does not meet the conditions in the helper function // 2. The store is volatile + // 3. The total store size is not a multiple of the allocated element + // type size if (!IsTypeValidForTreeStructuredMerge( SI->getValueOperand()->getType()) || SI->isVolatile()) return std::nullopt; + auto *VecTy = cast(SI->getValueOperand()->getType()); + unsigned NumElts = VecTy->getNumElements(); + unsigned EltSize = DL.getTypeSizeInBits(VecTy->getElementType()); + if (NumElts * EltSize % AllocatedEltTySize != 0) + return std::nullopt; StoreInfos.emplace_back(SI, S.beginOffset(), S.endOffset(), SI->getValueOperand()); } else { diff --git a/llvm/test/Transforms/SROA/vector-promotion-cannot-tree-structure-merge.ll b/llvm/test/Transforms/SROA/vector-promotion-cannot-tree-structure-merge.ll index c858d071451e8..ead6e027ed37c 100644 --- a/llvm/test/Transforms/SROA/vector-promotion-cannot-tree-structure-merge.ll +++ b/llvm/test/Transforms/SROA/vector-promotion-cannot-tree-structure-merge.ll @@ -219,4 +219,18 @@ entry: } +define <1 x i32> @test_store_value_size_not_multiple_of_allocated_element_type_size(<1 x i16> %a, <1 x i16> %b) { +entry: + %alloca = alloca [2 x i16] + + %ptr0 = getelementptr inbounds [2 x i16], ptr %alloca, i32 0, i32 0 + store <1 x i16> %a, ptr %ptr0 + + %ptr1 = getelementptr inbounds [2 x i16], ptr %alloca, i32 0, i32 1 + store <1 x i16> %b, ptr %ptr1 + + %result = load <1 x i32>, ptr %alloca + ret <1 x i32> %result +} + declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)