diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 7db0586386506..f0cd00df23959 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -2103,6 +2103,10 @@ static void widenDestArray(CallInst *CI, const unsigned NumBytesToPad, unsigned ElementByteWidth = SourceDataArray->getElementByteSize(); unsigned int TotalBytes = NumBytesToCopy + NumBytesToPad; unsigned NumElementsToCopy = divideCeil(TotalBytes, ElementByteWidth); + // Don't change size if already wide enough. + if (Alloca->getAllocatedType()->getArrayNumElements() >= NumElementsToCopy) + return; + // Update destination array to be word aligned (memcpy(X,...,...)) IRBuilder<> BuildAlloca(Alloca); AllocaInst *NewAlloca = BuildAlloca.CreateAlloca(ArrayType::get( diff --git a/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-large-alloca.ll b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-large-alloca.ll new file mode 100644 index 0000000000000..4fca1ffefdcaf --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/ARM/arm-widen-large-alloca.ll @@ -0,0 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -mtriple=arm-none-eabi -passes=globalopt -S | FileCheck %s + +@.i8 = private unnamed_addr constant [3 x i8] [i8 1, i8 2, i8 3] , align 1 + +define void @memcpy() { +; CHECK-LABEL: define void @memcpy() local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[ALLOCA1:%.*]] = alloca [4 x i8], align 1 +; CHECK-NEXT: [[ALLOCA2:%.*]] = alloca [5 x i8], align 1 +; CHECK-NEXT: [[CALL1:%.*]] = call i32 @bar(ptr nonnull [[ALLOCA1]]) +; CHECK-NEXT: [[CALL2:%.*]] = call i32 @bar(ptr nonnull [[ALLOCA2]]) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) [[ALLOCA1]], ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 4, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(5) [[ALLOCA2]], ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 4, i1 false) +; CHECK-NEXT: ret void +; +entry: + %alloca1 = alloca [3 x i8], align 1 + %alloca2 = alloca [5 x i8], align 1 + %call1 = call i32 @bar(ptr nonnull %alloca1) + %call2 = call i32 @bar(ptr nonnull %alloca2) + call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(3) %alloca1, ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false) + call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(5) %alloca2, ptr noundef nonnull align 1 dereferenceable(3) @.i8, i32 3, i1 false) + ret void +} + +declare i32 @bar(...)