diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index b85239ebf08cb..d55096af8836c 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -3111,21 +3111,39 @@ bool IRTranslator::translateAlloca(const User &U, getOrCreateVReg(*ConstantInt::get(IntPtrIRTy, DL->getTypeAllocSize(Ty))); MIRBuilder.buildMul(AllocSize, NumElts, TySize); - // Round the size of the allocation up to the stack alignment size - // by add SA-1 to the size. This doesn't overflow because we're computing - // an address inside an alloca. - Align StackAlign = MF->getSubtarget().getFrameLowering()->getStackAlign(); - auto SAMinusOne = MIRBuilder.buildConstant(IntPtrTy, StackAlign.value() - 1); - auto AllocAdd = MIRBuilder.buildAdd(IntPtrTy, AllocSize, SAMinusOne, - MachineInstr::NoUWrap); - auto AlignCst = - MIRBuilder.buildConstant(IntPtrTy, ~(uint64_t)(StackAlign.value() - 1)); - auto AlignedAlloc = MIRBuilder.buildAnd(IntPtrTy, AllocAdd, AlignCst); - + const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); + Align StackAlign = TFI->getStackAlign(); Align Alignment = std::max(AI.getAlign(), DL->getPrefTypeAlign(Ty)); - if (Alignment <= StackAlign) + + // If the stack alignment is stricter than the alloca's alignment, ignore the + // alloca's alignment. We will align the size of the alloca to the stack + // alignment, which will guarantee that the alloca's alignment is satisfied. + bool IsUnderAligned = Alignment <= StackAlign; + if (IsUnderAligned) Alignment = Align(1); - MIRBuilder.buildDynStackAlloc(getOrCreateVReg(AI), AlignedAlloc, Alignment); + + // If the stack grows up, adding the alloca's size to SP without padding may + // leave SP not aligned (to the stack alignment) after the alloca because we + // align SP (to the stack align or alloca align) *before* adding the alloca + // size. On the other hand, if the stack grows down, we will align SP *after* + // decrementing it, so there is no need to pad the size. + if (TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp || + IsUnderAligned) { + // Round the size of the allocation up to the stack alignment size + // by add SA-1 to the size. This doesn't overflow because we're computing + // an address inside an alloca. + auto SAMinusOne = + MIRBuilder.buildConstant(IntPtrTy, StackAlign.value() - 1); + auto AllocAdd = MIRBuilder.buildAdd(IntPtrTy, AllocSize, SAMinusOne, + MachineInstr::NoUWrap); + auto AlignCst = + MIRBuilder.buildConstant(IntPtrTy, ~(uint64_t)(StackAlign.value() - 1)); + auto AlignedAlloc = MIRBuilder.buildAnd(IntPtrTy, AllocAdd, AlignCst); + + MIRBuilder.buildDynStackAlloc(getOrCreateVReg(AI), AlignedAlloc, Alignment); + } else { + MIRBuilder.buildDynStackAlloc(getOrCreateVReg(AI), AllocSize, Alignment); + } MF->getFrameInfo().CreateVariableSizedObject(Alignment, &AI); assert(MF->getFrameInfo().hasVarSizedObjects()); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll b/llvm/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll index 88eaa1382d1d6..0f74b51262339 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll @@ -28,11 +28,7 @@ define ptr @test_aligned_alloca(i32 %numelts) { ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ZEXT]], [[C]] - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 - ; CHECK: [[ADD:%[0-9]+]]:_(s64) = nuw G_ADD [[MUL]], [[C1]] - ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -16 - ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ADD]], [[C2]] - ; CHECK: [[DYN_STACKALLOC:%[0-9]+]]:_(p0) = G_DYN_STACKALLOC [[AND]](s64), 32 + ; CHECK: [[DYN_STACKALLOC:%[0-9]+]]:_(p0) = G_DYN_STACKALLOC [[MUL]](s64), 32 ; CHECK: $x0 = COPY [[DYN_STACKALLOC]](p0) ; CHECK: RET_ReallyLR implicit $x0 %addr = alloca i8, i32 %numelts, align 32 diff --git a/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll b/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll index e7687f0d3994b..ee74469c0a76a 100644 --- a/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll +++ b/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64 < %s -verify-machineinstrs | FileCheck %s -; RUN: llc -mtriple=aarch64 < %s -verify-machineinstrs -global-isel -global-isel-abort=2 | FileCheck %s +; RUN: llc -mtriple=aarch64 < %s -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple=aarch64 < %s -verify-machineinstrs -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=CHECK,CHECK-GI ; Dynamically-sized allocation, needs a loop which can handle any size at ; runtime. The final iteration of the loop will temporarily put SP below the @@ -107,13 +107,20 @@ define void @dynamic_align_64(i64 %size, ptr %out) #0 { ; CHECK-NEXT: .cfi_offset w29, -32 ; CHECK-NEXT: sub x9, sp, #32 ; CHECK-NEXT: and sp, x9, #0xffffffffffffffc0 -; CHECK-NEXT: add x9, x0, #15 -; CHECK-NEXT: mov x8, sp -; CHECK-DAG: str xzr, [sp] -; CHECK-DAG: and x9, x9, #0xfffffffffffffff0 -; CHECK-NOT: INVALID_TO_BREAK_UP_CHECK_DAG -; CHECK-DAG: mov x19, sp -; CHECK-DAG: sub x8, x8, x9 +; +; CHECK-SD-NEXT: add x9, x0, #15 +; CHECK-SD-NEXT: mov x8, sp +; CHECK-SD-DAG: str xzr, [sp] +; CHECK-SD-DAG: and x9, x9, #0xfffffffffffffff0 +; CHECK-SD-NOT: INVALID_TO_BREAK_UP_CHECK_DAG +; CHECK-SD-DAG: mov x19, sp +; CHECK-SD-DAG: sub x8, x8, x9 +; +; CHECK-GI-NEXT: mov x8, sp +; CHECK-GI-NEXT: str xzr, [sp] +; CHECK-GI-DAG: mov x19, sp +; CHECK-GI-DAG: sub x8, x8, x0 +; ; CHECK-NEXT: and x8, x8, #0xffffffffffffffc0 ; CHECK-NEXT: .LBB2_1: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 @@ -167,13 +174,20 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 { ; CHECK-NEXT: b .LBB3_1 ; CHECK-NEXT: .LBB3_3: ; CHECK-NEXT: mov sp, x9 -; CHECK-NEXT: add x9, x0, #15 -; CHECK-NEXT: mov x8, sp -; CHECK-DAG: ldr xzr, [sp] -; CHECK-DAG: and x9, x9, #0xfffffffffffffff0 -; CHECK-NOT: INVALID_TO_BREAK_UP_CHECK_DAG -; CHECK-DAG: mov x19, sp -; CHECK-DAG: sub x8, x8, x9 +; +; CHECK-SD-NEXT: add x9, x0, #15 +; CHECK-SD-NEXT: mov x8, sp +; CHECK-SD-DAG: ldr xzr, [sp] +; CHECK-SD-DAG: and x9, x9, #0xfffffffffffffff0 +; CHECK-SD-NOT: INVALID_TO_BREAK_UP_CHECK_DAG +; CHECK-SD-DAG: mov x19, sp +; CHECK-SD-DAG: sub x8, x8, x9 +; +; CHECK-GI-NEXT: mov x8, sp +; CHECK-GI-NEXT: ldr xzr, [sp] +; CHECK-GI-DAG: mov x19, sp +; CHECK-GI-DAG: sub x8, x8, x0 +; ; CHECK-NEXT: and x8, x8, #0xffffffffffffe000 ; CHECK-NEXT: .LBB3_4: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096