From 783982893eef1b9b417ad1565c0569c2da102f3e Mon Sep 17 00:00:00 2001 From: Jonathan Cogan Date: Wed, 19 Mar 2025 16:46:46 +0000 Subject: [PATCH 1/2] [CodeGen] Avoid aligning alloca size. --- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 44 +++++++++++++------ .../SelectionDAG/SelectionDAGBuilder.cpp | 43 +++++++++++------- .../AArch64/GlobalISel/dynamic-alloca.ll | 6 +-- .../CodeGen/AArch64/sme-framelower-use-bp.ll | 6 +-- .../CodeGen/AArch64/stack-probing-dynamic.ll | 14 ++---- llvm/test/CodeGen/AArch64/sve-alloca.ll | 4 +- .../PowerPC/aix-framepointer-save-restore.ll | 9 +--- llvm/test/CodeGen/PowerPC/pr46759.ll | 3 -- .../CodeGen/PowerPC/stack-clash-prologue.ll | 16 ++----- .../RISCV/rvv/stack-probing-dynamic.ll | 12 +---- .../CodeGen/RISCV/stack-clash-prologue.ll | 6 +-- llvm/test/CodeGen/SPARC/alloca-align.ll | 14 +----- llvm/test/CodeGen/SPARC/stack-align.ll | 4 +- llvm/test/CodeGen/SystemZ/alloca-03.ll | 2 +- llvm/test/CodeGen/VE/Scalar/alloca_aligned.ll | 9 ++-- .../CodeGen/VE/Scalar/stackframe_align.ll | 14 +++--- .../test/CodeGen/VE/Scalar/stackframe_call.ll | 8 ---- .../CodeGen/VE/Scalar/stackframe_nocall.ll | 8 ---- llvm/test/CodeGen/X86/clobber_base_ptr.ll | 4 -- llvm/test/CodeGen/X86/pr50782.ll | 3 +- .../stack-clash-small-alloc-medium-align.ll | 3 +- .../CodeGen/X86/win64_alloca_dynalloca.ll | 18 +++----- 22 files changed, 98 insertions(+), 152 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index b85239ebf08cb..d55096af8836c 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -3111,21 +3111,39 @@ bool IRTranslator::translateAlloca(const User &U, getOrCreateVReg(*ConstantInt::get(IntPtrIRTy, DL->getTypeAllocSize(Ty))); MIRBuilder.buildMul(AllocSize, NumElts, TySize); - // Round the size of the allocation up to the stack alignment size - // by add SA-1 to the size. This doesn't overflow because we're computing - // an address inside an alloca. - Align StackAlign = MF->getSubtarget().getFrameLowering()->getStackAlign(); - auto SAMinusOne = MIRBuilder.buildConstant(IntPtrTy, StackAlign.value() - 1); - auto AllocAdd = MIRBuilder.buildAdd(IntPtrTy, AllocSize, SAMinusOne, - MachineInstr::NoUWrap); - auto AlignCst = - MIRBuilder.buildConstant(IntPtrTy, ~(uint64_t)(StackAlign.value() - 1)); - auto AlignedAlloc = MIRBuilder.buildAnd(IntPtrTy, AllocAdd, AlignCst); - + const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); + Align StackAlign = TFI->getStackAlign(); Align Alignment = std::max(AI.getAlign(), DL->getPrefTypeAlign(Ty)); - if (Alignment <= StackAlign) + + // If the stack alignment is stricter than the alloca's alignment, ignore the + // alloca's alignment. We will align the size of the alloca to the stack + // alignment, which will guarantee that the alloca's alignment is satisfied. + bool IsUnderAligned = Alignment <= StackAlign; + if (IsUnderAligned) Alignment = Align(1); - MIRBuilder.buildDynStackAlloc(getOrCreateVReg(AI), AlignedAlloc, Alignment); + + // If the stack grows up, adding the alloca's size to SP without padding may + // leave SP not aligned (to the stack alignment) after the alloca because we + // align SP (to the stack align or alloca align) *before* adding the alloca + // size. On the other hand, if the stack grows down, we will align SP *after* + // decrementing it, so there is no need to pad the size. + if (TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp || + IsUnderAligned) { + // Round the size of the allocation up to the stack alignment size + // by add SA-1 to the size. This doesn't overflow because we're computing + // an address inside an alloca. + auto SAMinusOne = + MIRBuilder.buildConstant(IntPtrTy, StackAlign.value() - 1); + auto AllocAdd = MIRBuilder.buildAdd(IntPtrTy, AllocSize, SAMinusOne, + MachineInstr::NoUWrap); + auto AlignCst = + MIRBuilder.buildConstant(IntPtrTy, ~(uint64_t)(StackAlign.value() - 1)); + auto AlignedAlloc = MIRBuilder.buildAnd(IntPtrTy, AllocAdd, AlignCst); + + MIRBuilder.buildDynStackAlloc(getOrCreateVReg(AI), AlignedAlloc, Alignment); + } else { + MIRBuilder.buildDynStackAlloc(getOrCreateVReg(AI), AllocSize, Alignment); + } MF->getFrameInfo().CreateVariableSizedObject(Alignment, &AI); assert(MF->getFrameInfo().hasVarSizedObjects()); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 14bb1d943d2d6..fe1a8bab30e54 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4447,24 +4447,35 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { DAG.getZExtOrTrunc(TySizeValue, dl, IntPtr)); } - // Handle alignment. If the requested alignment is less than or equal to - // the stack alignment, ignore it. If the size is greater than or equal to - // the stack alignment, we note this in the DYNAMIC_STACKALLOC node. - Align StackAlign = DAG.getSubtarget().getFrameLowering()->getStackAlign(); - if (*Alignment <= StackAlign) + // Handle alignment. If the requested alignment is less than or equal to the + // stack alignment, ignore it since we will align the size. If the size is + // greater than or equal to the stack alignment, we note this in the + // DYNAMIC_STACKALLOC node. + const TargetFrameLowering *TFI = DAG.getSubtarget().getFrameLowering(); + Align StackAlign = TFI->getStackAlign(); + bool IsUnderAligned = *Alignment <= StackAlign; + if (IsUnderAligned) Alignment = std::nullopt; - const uint64_t StackAlignMask = StackAlign.value() - 1U; - // Round the size of the allocation up to the stack alignment size - // by add SA-1 to the size. This doesn't overflow because we're computing - // an address inside an alloca. - AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize, - DAG.getConstant(StackAlignMask, dl, IntPtr), - SDNodeFlags::NoUnsignedWrap); - - // Mask out the low bits for alignment purposes. - AllocSize = DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize, - DAG.getSignedConstant(~StackAlignMask, dl, IntPtr)); + // If the stack grows up, adding the alloca's size to SP without padding may + // leave SP not aligned (to the stack alignment) after the alloca because we + // align SP (to the stack align or alloca align) *before* adding the alloca + // size. On the other hand, if the stack grows down, we will align SP *after* + // decrementing it, so there is no need to align the size. + if (TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp || + IsUnderAligned) { + const uint64_t StackAlignMask = StackAlign.value() - 1U; + // Round the size of the allocation up to the stack alignment size + // by add SA-1 to the size. This doesn't overflow because we're computing + // an address inside an alloca. + AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize, + DAG.getConstant(StackAlignMask, dl, IntPtr), + SDNodeFlags::NoUnsignedWrap); + + // Mask out the low bits for alignment purposes. + AllocSize = DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize, + DAG.getSignedConstant(~StackAlignMask, dl, IntPtr)); + } SDValue Ops[] = { getRoot(), AllocSize, diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll b/llvm/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll index 88eaa1382d1d6..0f74b51262339 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll @@ -28,11 +28,7 @@ define ptr @test_aligned_alloca(i32 %numelts) { ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ZEXT]], [[C]] - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 - ; CHECK: [[ADD:%[0-9]+]]:_(s64) = nuw G_ADD [[MUL]], [[C1]] - ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -16 - ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ADD]], [[C2]] - ; CHECK: [[DYN_STACKALLOC:%[0-9]+]]:_(p0) = G_DYN_STACKALLOC [[AND]](s64), 32 + ; CHECK: [[DYN_STACKALLOC:%[0-9]+]]:_(p0) = G_DYN_STACKALLOC [[MUL]](s64), 32 ; CHECK: $x0 = COPY [[DYN_STACKALLOC]](p0) ; CHECK: RET_ReallyLR implicit $x0 %addr = alloca i8, i32 %numelts, align 32 diff --git a/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll b/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll index f49bb910b5bd1..85c6b2a2fb854 100644 --- a/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll +++ b/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll @@ -160,11 +160,7 @@ define void @quux() #1 { ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: subs x9, x9, #16 ; CHECK-NEXT: mov sp, x9 -; CHECK-NEXT: addvl x9, x8, #2 -; CHECK-NEXT: mov w0, w9 -; CHECK-NEXT: // implicit-def: $x9 -; CHECK-NEXT: mov w9, w0 -; CHECK-NEXT: and x9, x9, #0x7f0 +; CHECK-NEXT: rdvl x9, #2 ; CHECK-NEXT: mov x10, sp ; CHECK-NEXT: subs x10, x10, x9 ; CHECK-NEXT: and x10, x10, #0xffffffffffffffe0 diff --git a/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll b/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll index e7687f0d3994b..b32eb210ce0e7 100644 --- a/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll +++ b/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll @@ -107,13 +107,10 @@ define void @dynamic_align_64(i64 %size, ptr %out) #0 { ; CHECK-NEXT: .cfi_offset w29, -32 ; CHECK-NEXT: sub x9, sp, #32 ; CHECK-NEXT: and sp, x9, #0xffffffffffffffc0 -; CHECK-NEXT: add x9, x0, #15 ; CHECK-NEXT: mov x8, sp -; CHECK-DAG: str xzr, [sp] -; CHECK-DAG: and x9, x9, #0xfffffffffffffff0 -; CHECK-NOT: INVALID_TO_BREAK_UP_CHECK_DAG +; CHECK-NEXT: str xzr, [sp] ; CHECK-DAG: mov x19, sp -; CHECK-DAG: sub x8, x8, x9 +; CHECK-DAG: sub x8, x8, x0 ; CHECK-NEXT: and x8, x8, #0xffffffffffffffc0 ; CHECK-NEXT: .LBB2_1: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 @@ -167,13 +164,10 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 { ; CHECK-NEXT: b .LBB3_1 ; CHECK-NEXT: .LBB3_3: ; CHECK-NEXT: mov sp, x9 -; CHECK-NEXT: add x9, x0, #15 ; CHECK-NEXT: mov x8, sp -; CHECK-DAG: ldr xzr, [sp] -; CHECK-DAG: and x9, x9, #0xfffffffffffffff0 -; CHECK-NOT: INVALID_TO_BREAK_UP_CHECK_DAG +; CHECK-NEXT: ldr xzr, [sp] ; CHECK-DAG: mov x19, sp -; CHECK-DAG: sub x8, x8, x9 +; CHECK-DAG: sub x8, x8, x0 ; CHECK-NEXT: and x8, x8, #0xffffffffffffe000 ; CHECK-NEXT: .LBB3_4: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 diff --git a/llvm/test/CodeGen/AArch64/sve-alloca.ll b/llvm/test/CodeGen/AArch64/sve-alloca.ll index 2520095cce62e..3d3c5da483015 100644 --- a/llvm/test/CodeGen/AArch64/sve-alloca.ll +++ b/llvm/test/CodeGen/AArch64/sve-alloca.ll @@ -54,10 +54,8 @@ define void @foo( %dst, i1 %cond) { ; CHECK-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 32 - 48 * VG ; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 32 - 56 * VG ; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 32 - 64 * VG -; CHECK-NEXT: rdvl x9, #2 ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: add x9, x9, #15 -; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0 +; CHECK-NEXT: cnth x9, all, mul #4 ; CHECK-NEXT: sub x8, x8, x9 ; CHECK-NEXT: and x0, x8, #0xffffffffffffffe0 ; CHECK-NEXT: mov sp, x0 diff --git a/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll b/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll index 44281bcc3647d..8895a9a920569 100644 --- a/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll +++ b/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll @@ -82,10 +82,8 @@ define dso_local void @frameptr_realigned(i32 %n) { ; AIX32-NEXT: slwi 3, 3, 2 ; AIX32-NEXT: lwz 4, 0(1) ; AIX32-NEXT: li 5, -64 -; AIX32-NEXT: addi 3, 3, 15 -; AIX32-NEXT: mr 31, 1 -; AIX32-NEXT: rlwinm 3, 3, 0, 0, 27 ; AIX32-NEXT: neg 3, 3 +; AIX32-NEXT: mr 31, 1 ; AIX32-NEXT: and 5, 3, 5 ; AIX32-NEXT: stwux 4, 1, 5 ; AIX32-NEXT: addi 3, 1, 64 @@ -111,11 +109,8 @@ define dso_local void @frameptr_realigned(i32 %n) { ; AIX64-NEXT: rldic 3, 3, 2, 30 ; AIX64-NEXT: ld 4, 0(1) ; AIX64-NEXT: li 5, -64 -; AIX64-NEXT: addi 3, 3, 15 -; AIX64-NEXT: mr 31, 1 -; AIX64-NEXT: rldicl 3, 3, 60, 4 -; AIX64-NEXT: rldicl 3, 3, 4, 29 ; AIX64-NEXT: neg 3, 3 +; AIX64-NEXT: mr 31, 1 ; AIX64-NEXT: and 5, 3, 5 ; AIX64-NEXT: stdux 4, 1, 5 ; AIX64-NEXT: addi 3, 1, 128 diff --git a/llvm/test/CodeGen/PowerPC/pr46759.ll b/llvm/test/CodeGen/PowerPC/pr46759.ll index d1129b1825aee..8653ca997c4ed 100644 --- a/llvm/test/CodeGen/PowerPC/pr46759.ll +++ b/llvm/test/CodeGen/PowerPC/pr46759.ll @@ -33,9 +33,6 @@ define void @foo(i32 %vla_size) #0 { ; CHECK-LE-NEXT: li r4, -2048 ; CHECK-LE-NEXT: li r6, -4096 ; CHECK-LE-NEXT: mr r31, r1 -; CHECK-LE-NEXT: addi r3, r3, 15 -; CHECK-LE-NEXT: rldicl r3, r3, 60, 4 -; CHECK-LE-NEXT: rldicl r3, r3, 4, 31 ; CHECK-LE-NEXT: neg r5, r3 ; CHECK-LE-NEXT: ld r3, 0(r1) ; CHECK-LE-NEXT: and r4, r5, r4 diff --git a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll index b887bbb8c99f8..15d0cd5e9baee 100644 --- a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll +++ b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll @@ -835,20 +835,17 @@ define void @f11(i32 %vla_size, i64 %i) #0 { ; CHECK-LE-NEXT: .cfi_def_cfa_register r30 ; CHECK-LE-NEXT: .cfi_offset r31, -8 ; CHECK-LE-NEXT: .cfi_offset r30, -16 -; CHECK-LE-NEXT: clrldi r3, r3, 32 ; CHECK-LE-NEXT: lis r5, 1 ; CHECK-LE-NEXT: mr r31, r1 -; CHECK-LE-NEXT: li r6, 1 ; CHECK-LE-NEXT: sldi r4, r4, 2 -; CHECK-LE-NEXT: addi r3, r3, 15 +; CHECK-LE-NEXT: li r6, 1 +; CHECK-LE-NEXT: clrldi r3, r3, 32 ; CHECK-LE-NEXT: ori r5, r5, 0 -; CHECK-LE-NEXT: rldicl r3, r3, 60, 4 ; CHECK-LE-NEXT: add r5, r31, r5 -; CHECK-LE-NEXT: rldicl r3, r3, 4, 31 ; CHECK-LE-NEXT: stwx r6, r5, r4 +; CHECK-LE-NEXT: neg r5, r3 ; CHECK-LE-NEXT: li r4, -32768 ; CHECK-LE-NEXT: li r6, -4096 -; CHECK-LE-NEXT: neg r5, r3 ; CHECK-LE-NEXT: ld r3, 0(r1) ; CHECK-LE-NEXT: and r4, r5, r4 ; CHECK-LE-NEXT: mr r5, r4 @@ -896,16 +893,13 @@ define void @f11(i32 %vla_size, i64 %i) #0 { ; CHECK-BE-NEXT: .cfi_def_cfa_register r30 ; CHECK-BE-NEXT: .cfi_offset r31, -8 ; CHECK-BE-NEXT: .cfi_offset r30, -16 -; CHECK-BE-NEXT: clrldi r3, r3, 32 ; CHECK-BE-NEXT: lis r5, 1 -; CHECK-BE-NEXT: addi r3, r3, 15 ; CHECK-BE-NEXT: mr r31, r1 ; CHECK-BE-NEXT: ori r5, r5, 0 -; CHECK-BE-NEXT: rldicl r3, r3, 60, 4 ; CHECK-BE-NEXT: add r5, r31, r5 ; CHECK-BE-NEXT: sldi r4, r4, 2 ; CHECK-BE-NEXT: li r6, 1 -; CHECK-BE-NEXT: rldicl r3, r3, 4, 31 +; CHECK-BE-NEXT: clrldi r3, r3, 32 ; CHECK-BE-NEXT: stwx r6, r5, r4 ; CHECK-BE-NEXT: neg r7, r3 ; CHECK-BE-NEXT: li r4, -32768 @@ -964,11 +958,9 @@ define void @f11(i32 %vla_size, i64 %i) #0 { ; CHECK-32-NEXT: lis r4, 1 ; CHECK-32-NEXT: mr r31, r1 ; CHECK-32-NEXT: ori r4, r4, 0 -; CHECK-32-NEXT: addi r3, r3, 15 ; CHECK-32-NEXT: add r4, r31, r4 ; CHECK-32-NEXT: li r5, 1 ; CHECK-32-NEXT: slwi r6, r6, 2 -; CHECK-32-NEXT: rlwinm r3, r3, 0, 0, 27 ; CHECK-32-NEXT: neg r7, r3 ; CHECK-32-NEXT: stwx r5, r4, r6 ; CHECK-32-NEXT: li r4, -32768 diff --git a/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll b/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll index c3c1643e6de01..07daca9c7851e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll +++ b/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll @@ -180,8 +180,6 @@ define void @dynamic_align_64(i64 %size, ptr %out) #0 { ; RV64I-NEXT: .cfi_def_cfa s0, 0 ; RV64I-NEXT: andi sp, sp, -64 ; RV64I-NEXT: mv s1, sp -; RV64I-NEXT: addi a0, a0, 15 -; RV64I-NEXT: andi a0, a0, -16 ; RV64I-NEXT: sub a0, sp, a0 ; RV64I-NEXT: andi a0, a0, -64 ; RV64I-NEXT: lui a2, 1 @@ -219,8 +217,6 @@ define void @dynamic_align_64(i64 %size, ptr %out) #0 { ; RV32I-NEXT: .cfi_def_cfa s0, 0 ; RV32I-NEXT: andi sp, sp, -64 ; RV32I-NEXT: mv s1, sp -; RV32I-NEXT: addi a0, a0, 15 -; RV32I-NEXT: andi a0, a0, -16 ; RV32I-NEXT: sub a0, sp, a0 ; RV32I-NEXT: andi a0, a0, -64 ; RV32I-NEXT: lui a1, 1 @@ -278,10 +274,8 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 { ; RV64I-NEXT: srli a2, sp, 13 ; RV64I-NEXT: slli sp, a2, 13 ; RV64I-NEXT: mv s1, sp -; RV64I-NEXT: addi a0, a0, 15 -; RV64I-NEXT: lui a2, 1048574 -; RV64I-NEXT: andi a0, a0, -16 ; RV64I-NEXT: sub a0, sp, a0 +; RV64I-NEXT: lui a2, 1048574 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: lui a2, 1 ; RV64I-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 @@ -329,10 +323,8 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 { ; RV32I-NEXT: srli a1, sp, 13 ; RV32I-NEXT: slli sp, a1, 13 ; RV32I-NEXT: mv s1, sp -; RV32I-NEXT: addi a0, a0, 15 -; RV32I-NEXT: lui a1, 1048574 -; RV32I-NEXT: andi a0, a0, -16 ; RV32I-NEXT: sub a0, sp, a0 +; RV32I-NEXT: lui a1, 1048574 ; RV32I-NEXT: and a0, a0, a1 ; RV32I-NEXT: lui a1, 1 ; RV32I-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 diff --git a/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll b/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll index b1c0755c36ec1..70c082026bda8 100644 --- a/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll +++ b/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll @@ -642,8 +642,6 @@ define void @f11(i32 %vla_size, i64 %i) #0 { ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: sw a2, 0(a1) -; RV64I-NEXT: addi a0, a0, 15 -; RV64I-NEXT: andi a0, a0, -16 ; RV64I-NEXT: sub a0, sp, a0 ; RV64I-NEXT: andi a0, a0, -2048 ; RV64I-NEXT: lui a1, 1 @@ -698,11 +696,9 @@ define void @f11(i32 %vla_size, i64 %i) #0 { ; RV32I-NEXT: add a2, s1, a2 ; RV32I-NEXT: add a1, a2, a1 ; RV32I-NEXT: li a2, 1 -; RV32I-NEXT: addi a0, a0, 15 -; RV32I-NEXT: andi a0, a0, -16 -; RV32I-NEXT: sw a2, 0(a1) ; RV32I-NEXT: sub a0, sp, a0 ; RV32I-NEXT: andi a0, a0, -2048 +; RV32I-NEXT: sw a2, 0(a1) ; RV32I-NEXT: lui a1, 1 ; RV32I-NEXT: .LBB11_3: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: sub sp, sp, a1 diff --git a/llvm/test/CodeGen/SPARC/alloca-align.ll b/llvm/test/CodeGen/SPARC/alloca-align.ll index a3dcc3779f4e6..3c469ee2f3a59 100644 --- a/llvm/test/CodeGen/SPARC/alloca-align.ll +++ b/llvm/test/CodeGen/SPARC/alloca-align.ll @@ -6,7 +6,7 @@ define void @variable_alloca_with_overalignment(i32 %num) nounwind { ; CHECK32-LABEL: variable_alloca_with_overalignment: ; CHECK32: ! %bb.0: ; CHECK32-NEXT: save %sp, -96, %sp -; CHECK32-NEXT: add %sp, 80, %i1 +; CHECK32-NEXT: add %sp, 84, %i1 ; CHECK32-NEXT: and %i1, -64, %o0 ; CHECK32-NEXT: add %o0, -96, %sp ; CHECK32-NEXT: add %i0, 7, %i0 @@ -21,7 +21,7 @@ define void @variable_alloca_with_overalignment(i32 %num) nounwind { ; CHECK64-LABEL: variable_alloca_with_overalignment: ; CHECK64: ! %bb.0: ; CHECK64-NEXT: save %sp, -128, %sp -; CHECK64-NEXT: add %sp, 2159, %i1 +; CHECK64-NEXT: add %sp, 2171, %i1 ; CHECK64-NEXT: and %i1, -64, %o0 ; CHECK64-NEXT: add %o0, -2175, %sp ; CHECK64-NEXT: srl %i0, 0, %i0 @@ -52,8 +52,6 @@ define void @variable_alloca_with_overalignment_2(i32 %num) nounwind { ; CHECK32-LABEL: variable_alloca_with_overalignment_2: ; CHECK32: ! %bb.0: ; CHECK32-NEXT: save %sp, -96, %sp -; CHECK32-NEXT: add %i0, 7, %i0 -; CHECK32-NEXT: and %i0, -8, %i0 ; CHECK32-NEXT: sub %sp, %i0, %i0 ; CHECK32-NEXT: add %i0, 88, %i0 ; CHECK32-NEXT: and %i0, -64, %o1 @@ -67,14 +65,6 @@ define void @variable_alloca_with_overalignment_2(i32 %num) nounwind { ; CHECK64: ! %bb.0: ; CHECK64-NEXT: save %sp, -128, %sp ; CHECK64-NEXT: srl %i0, 0, %i0 -; CHECK64-NEXT: add %i0, 15, %i0 -; CHECK64-NEXT: sethi 4194303, %i1 -; CHECK64-NEXT: or %i1, 1008, %i1 -; CHECK64-NEXT: sethi 0, %i2 -; CHECK64-NEXT: or %i2, 1, %i2 -; CHECK64-NEXT: sllx %i2, 32, %i2 -; CHECK64-NEXT: or %i2, %i1, %i1 -; CHECK64-NEXT: and %i0, %i1, %i0 ; CHECK64-NEXT: sub %sp, %i0, %i0 ; CHECK64-NEXT: add %i0, 2175, %i0 ; CHECK64-NEXT: and %i0, -64, %o1 diff --git a/llvm/test/CodeGen/SPARC/stack-align.ll b/llvm/test/CodeGen/SPARC/stack-align.ll index 18bb052b47d97..fffec42c2bbb9 100644 --- a/llvm/test/CodeGen/SPARC/stack-align.ll +++ b/llvm/test/CodeGen/SPARC/stack-align.ll @@ -13,7 +13,7 @@ define void @stack_realign(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 % ; CHECK32: ! %bb.0: ! %entry ; CHECK32-NEXT: save %sp, -96, %sp ; CHECK32-NEXT: ld [%fp+92], %o0 -; CHECK32-NEXT: add %sp, 80, %i0 +; CHECK32-NEXT: add %sp, 84, %i0 ; CHECK32-NEXT: and %i0, -64, %o1 ; CHECK32-NEXT: call stack_realign_helper ; CHECK32-NEXT: add %o1, -96, %sp @@ -23,7 +23,7 @@ define void @stack_realign(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 % ; CHECK64-LABEL: stack_realign: ; CHECK64: ! %bb.0: ! %entry ; CHECK64-NEXT: save %sp, -128, %sp -; CHECK64-NEXT: add %sp, 2159, %i0 +; CHECK64-NEXT: add %sp, 2171, %i0 ; CHECK64-NEXT: and %i0, -64, %o1 ; CHECK64-NEXT: add %o1, -2175, %sp ; CHECK64-NEXT: add %sp, -48, %sp diff --git a/llvm/test/CodeGen/SystemZ/alloca-03.ll b/llvm/test/CodeGen/SystemZ/alloca-03.ll index e331bfbfb8ab3..3ad687df50295 100644 --- a/llvm/test/CodeGen/SystemZ/alloca-03.ll +++ b/llvm/test/CodeGen/SystemZ/alloca-03.ll @@ -114,7 +114,7 @@ define void @f5() { ; CHECK-NEXT: lgr %r11, %r15 ; CHECK-NEXT: .cfi_def_cfa_register %r11 ; CHECK-NEXT: lgr %r1, %r15 -; CHECK-NEXT: aghi %r1, -128 +; CHECK-NEXT: aghi %r1, -124 ; CHECK-NEXT: la %r2, 280(%r1) ; CHECK-NEXT: nill %r2, 65408 ; CHECK-NEXT: lgr %r15, %r1 diff --git a/llvm/test/CodeGen/VE/Scalar/alloca_aligned.ll b/llvm/test/CodeGen/VE/Scalar/alloca_aligned.ll index a1002c540c14f..99c4a9fcdd8b1 100644 --- a/llvm/test/CodeGen/VE/Scalar/alloca_aligned.ll +++ b/llvm/test/CodeGen/VE/Scalar/alloca_aligned.ll @@ -7,12 +7,11 @@ define void @test(i64 %n) { ; CHECK-LABEL: test: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: or %s2, 0, %s0 -; CHECK-NEXT: lea %s0, 15(, %s0) -; CHECK-NEXT: and %s0, -16, %s0 -; CHECK-NEXT: lea %s1, __ve_grow_stack_align@lo -; CHECK-NEXT: and %s1, %s1, (32)0 -; CHECK-NEXT: lea.sl %s12, __ve_grow_stack_align@hi(, %s1) +; CHECK-NEXT: lea %s0, __ve_grow_stack_align@lo +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s12, __ve_grow_stack_align@hi(, %s0) ; CHECK-NEXT: or %s1, -32, (0)1 +; CHECK-NEXT: or %s0, 0, %s2 ; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: lea %s0, 240(, %s11) ; CHECK-NEXT: lea %s0, 31(, %s0) diff --git a/llvm/test/CodeGen/VE/Scalar/stackframe_align.ll b/llvm/test/CodeGen/VE/Scalar/stackframe_align.ll index d90c0bcf9f837..4121e9507bad3 100644 --- a/llvm/test/CodeGen/VE/Scalar/stackframe_align.ll +++ b/llvm/test/CodeGen/VE/Scalar/stackframe_align.ll @@ -407,10 +407,9 @@ define ptr @test_frame16_align16_dynalign32(ptr %0, i64 %n) { ; CHECK-NEXT: monc ; CHECK-NEXT: or %s0, 0, %s62 ; CHECK-NEXT: .LBB6_2: -; CHECK-NEXT: ld1b.zx %s0, (, %s0) -; CHECK-NEXT: st1b %s0, 272(, %s17) -; CHECK-NEXT: lea %s0, 15(, %s1) -; CHECK-NEXT: and %s0, -16, %s0 +; CHECK-NEXT: ld1b.zx %s2, (, %s0) +; CHECK-NEXT: or %s0, 0, %s1 +; CHECK-NEXT: st1b %s2, 272(, %s17) ; CHECK-NEXT: lea %s1, __ve_grow_stack_align@lo ; CHECK-NEXT: and %s1, %s1, (32)0 ; CHECK-NEXT: lea.sl %s12, __ve_grow_stack_align@hi(, %s1) @@ -447,10 +446,9 @@ define ptr @test_frame16_align16_dynalign32(ptr %0, i64 %n) { ; CHECKFP-NEXT: monc ; CHECKFP-NEXT: or %s0, 0, %s62 ; CHECKFP-NEXT: .LBB6_2: -; CHECKFP-NEXT: ld1b.zx %s0, (, %s0) -; CHECKFP-NEXT: st1b %s0, 272(, %s17) -; CHECKFP-NEXT: lea %s0, 15(, %s1) -; CHECKFP-NEXT: and %s0, -16, %s0 +; CHECKFP-NEXT: ld1b.zx %s2, (, %s0) +; CHECKFP-NEXT: or %s0, 0, %s1 +; CHECKFP-NEXT: st1b %s2, 272(, %s17) ; CHECKFP-NEXT: lea %s1, __ve_grow_stack_align@lo ; CHECKFP-NEXT: and %s1, %s1, (32)0 ; CHECKFP-NEXT: lea.sl %s12, __ve_grow_stack_align@hi(, %s1) diff --git a/llvm/test/CodeGen/VE/Scalar/stackframe_call.ll b/llvm/test/CodeGen/VE/Scalar/stackframe_call.ll index 3a3b1ba1544c4..02a1298141265 100644 --- a/llvm/test/CodeGen/VE/Scalar/stackframe_call.ll +++ b/llvm/test/CodeGen/VE/Scalar/stackframe_call.ll @@ -180,8 +180,6 @@ define ptr @test_align32(i32 signext %0, ptr nocapture readnone %1) { ; CHECK-NEXT: monc ; CHECK-NEXT: or %s0, 0, %s62 ; CHECK-NEXT: .LBB2_2: -; CHECK-NEXT: lea %s0, 15(, %s0) -; CHECK-NEXT: and %s0, -16, %s0 ; CHECK-NEXT: lea %s1, __ve_grow_stack_align@lo ; CHECK-NEXT: and %s1, %s1, (32)0 ; CHECK-NEXT: lea.sl %s12, __ve_grow_stack_align@hi(, %s1) @@ -227,8 +225,6 @@ define ptr @test_align32(i32 signext %0, ptr nocapture readnone %1) { ; PIC-NEXT: and %s15, %s15, (32)0 ; PIC-NEXT: sic %s16 ; PIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_@pc_hi(%s16, %s15) -; PIC-NEXT: lea %s0, 15(, %s0) -; PIC-NEXT: and %s0, -16, %s0 ; PIC-NEXT: lea %s12, __ve_grow_stack_align@plt_lo(-24) ; PIC-NEXT: and %s12, %s12, (32)0 ; PIC-NEXT: sic %s16 @@ -447,8 +443,6 @@ define ptr @test_align32_var(i32 signext %0, ptr nocapture readnone %1) { ; CHECK-NEXT: monc ; CHECK-NEXT: or %s0, 0, %s62 ; CHECK-NEXT: .LBB5_2: -; CHECK-NEXT: lea %s0, 15(, %s0) -; CHECK-NEXT: and %s0, -16, %s0 ; CHECK-NEXT: lea %s1, __ve_grow_stack_align@lo ; CHECK-NEXT: and %s1, %s1, (32)0 ; CHECK-NEXT: lea.sl %s12, __ve_grow_stack_align@hi(, %s1) @@ -499,8 +493,6 @@ define ptr @test_align32_var(i32 signext %0, ptr nocapture readnone %1) { ; PIC-NEXT: and %s15, %s15, (32)0 ; PIC-NEXT: sic %s16 ; PIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_@pc_hi(%s16, %s15) -; PIC-NEXT: lea %s0, 15(, %s0) -; PIC-NEXT: and %s0, -16, %s0 ; PIC-NEXT: lea %s12, __ve_grow_stack_align@plt_lo(-24) ; PIC-NEXT: and %s12, %s12, (32)0 ; PIC-NEXT: sic %s16 diff --git a/llvm/test/CodeGen/VE/Scalar/stackframe_nocall.ll b/llvm/test/CodeGen/VE/Scalar/stackframe_nocall.ll index f9308a172ad05..20affad0fc7bd 100644 --- a/llvm/test/CodeGen/VE/Scalar/stackframe_nocall.ll +++ b/llvm/test/CodeGen/VE/Scalar/stackframe_nocall.ll @@ -103,8 +103,6 @@ define noalias nonnull ptr @test_align32(i32 signext %0, ptr nocapture readonly ; CHECK-NEXT: or %s0, 0, %s62 ; CHECK-NEXT: .LBB2_2: ; CHECK-NEXT: or %s2, 0, %s1 -; CHECK-NEXT: lea %s0, 15(, %s0) -; CHECK-NEXT: and %s0, -16, %s0 ; CHECK-NEXT: lea %s1, __ve_grow_stack_align@lo ; CHECK-NEXT: and %s1, %s1, (32)0 ; CHECK-NEXT: lea.sl %s12, __ve_grow_stack_align@hi(, %s1) @@ -149,8 +147,6 @@ define noalias nonnull ptr @test_align32(i32 signext %0, ptr nocapture readonly ; PIC-NEXT: and %s15, %s15, (32)0 ; PIC-NEXT: sic %s16 ; PIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_@pc_hi(%s16, %s15) -; PIC-NEXT: lea %s0, 15(, %s0) -; PIC-NEXT: and %s0, -16, %s0 ; PIC-NEXT: lea %s12, __ve_grow_stack_align@plt_lo(-24) ; PIC-NEXT: and %s12, %s12, (32)0 ; PIC-NEXT: sic %s16 @@ -300,8 +296,6 @@ define noalias nonnull ptr @test_align32_var(i32 signext %0, ptr nocapture reado ; CHECK-NEXT: or %s0, 0, %s62 ; CHECK-NEXT: .LBB5_2: ; CHECK-NEXT: or %s2, 0, %s1 -; CHECK-NEXT: lea %s0, 15(, %s0) -; CHECK-NEXT: and %s0, -16, %s0 ; CHECK-NEXT: lea %s1, __ve_grow_stack_align@lo ; CHECK-NEXT: and %s1, %s1, (32)0 ; CHECK-NEXT: lea.sl %s12, __ve_grow_stack_align@hi(, %s1) @@ -346,8 +340,6 @@ define noalias nonnull ptr @test_align32_var(i32 signext %0, ptr nocapture reado ; PIC-NEXT: and %s15, %s15, (32)0 ; PIC-NEXT: sic %s16 ; PIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_@pc_hi(%s16, %s15) -; PIC-NEXT: lea %s0, 15(, %s0) -; PIC-NEXT: and %s0, -16, %s0 ; PIC-NEXT: lea %s12, __ve_grow_stack_align@plt_lo(-24) ; PIC-NEXT: and %s12, %s12, (32)0 ; PIC-NEXT: sic %s16 diff --git a/llvm/test/CodeGen/X86/clobber_base_ptr.ll b/llvm/test/CodeGen/X86/clobber_base_ptr.ll index 2c39560f02d16..2bd1c69bc521d 100644 --- a/llvm/test/CodeGen/X86/clobber_base_ptr.ll +++ b/llvm/test/CodeGen/X86/clobber_base_ptr.ll @@ -25,8 +25,6 @@ define i32 @clober_bp() { ; CHECK-NEXT: .cfi_offset %edi, -12 ; CHECK-NEXT: movl $4, 12(%esi) ; CHECK-NEXT: movl 12(%esi), %eax -; CHECK-NEXT: addl $3, %eax -; CHECK-NEXT: andl $-4, %eax ; CHECK-NEXT: calll __alloca ; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: andl $-16, %eax @@ -78,8 +76,6 @@ define i32 @clobber_bpfp() { ; CHECK-NEXT: .cfi_offset %edi, -12 ; CHECK-NEXT: movl $4, 12(%esi) ; CHECK-NEXT: movl 12(%esi), %eax -; CHECK-NEXT: addl $3, %eax -; CHECK-NEXT: andl $-4, %eax ; CHECK-NEXT: calll __alloca ; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: andl $-16, %eax diff --git a/llvm/test/CodeGen/X86/pr50782.ll b/llvm/test/CodeGen/X86/pr50782.ll index 591a33446d4e3..0cdbd5721e6b1 100644 --- a/llvm/test/CodeGen/X86/pr50782.ll +++ b/llvm/test/CodeGen/X86/pr50782.ll @@ -25,8 +25,7 @@ define void @h(float %i) { ; CHECK-NEXT: .cfi_offset %esi, -12 ; CHECK-NEXT: flds 8(%ebp) ; CHECK-NEXT: movl _a, %ecx -; CHECK-NEXT: leal 3(%ecx), %eax -; CHECK-NEXT: andl $-4, %eax +; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: calll __alloca ; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: andl $-16, %eax diff --git a/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll index ccf7e1d56da90..dc2503ecece91 100644 --- a/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll +++ b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll @@ -99,8 +99,7 @@ define i32 @foo4(i64 %i) local_unnamed_addr #0 { ; CHECK-NEXT: movl $1, (%rbx,%rdi,4) ; CHECK-NEXT: movl (%rbx), %ecx ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: leaq 15(,%rcx,4), %rcx -; CHECK-NEXT: andq $-16, %rcx +; CHECK-NEXT: shlq $2, %rcx ; CHECK-NEXT: subq %rcx, %rax ; CHECK-NEXT: cmpq %rsp, %rax ; CHECK-NEXT: jge .LBB3_3 diff --git a/llvm/test/CodeGen/X86/win64_alloca_dynalloca.ll b/llvm/test/CodeGen/X86/win64_alloca_dynalloca.ll index 241188b8cc3d5..d636896467b00 100644 --- a/llvm/test/CodeGen/X86/win64_alloca_dynalloca.ll +++ b/llvm/test/CodeGen/X86/win64_alloca_dynalloca.ll @@ -95,28 +95,24 @@ entry: %buf1 = alloca i8, i64 %n, align 128 -; M64: leaq 15(%{{.*}}), %rax -; M64: andq $-16, %rax +; M64: movq %rcx, %rax ; M64: callq ___chkstk_ms ; M64: subq %rax, %rsp ; M64: movq %rsp, [[R2:%r.*]] ; M64: andq $-128, [[R2]] ; M64: movq [[R2]], %rsp -; W64: leaq 15(%{{.*}}), %rax -; W64: andq $-16, %rax +; W64: movq %rcx, %rax ; W64: callq __chkstk ; W64: subq %rax, %rsp ; W64: movq %rsp, [[R2:%r.*]] ; W64: andq $-128, [[R2]] ; W64: movq [[R2]], %rsp -; EFI: leaq 15(%{{.*}}), [[R1:%r.*]] -; EFI: andq $-16, [[R1]] -; EFI: movq %rsp, [[R64:%r.*]] -; EFI: subq [[R1]], [[R64]] -; EFI: andq $-128, [[R64]] -; EFI: movq [[R64]], %rsp +; EFI: movq %rsp, %rax +; EFI: subq %rcx, %rax +; EFI: andq $-128, %rax +; EFI: movq %rax, %rsp %r = call i64 @bar(i64 %n, i64 %x, i64 %n, ptr undef, ptr %buf1) nounwind @@ -129,7 +125,7 @@ entry: ; W64: callq bar ; EFI: subq $48, %rsp -; EFI: movq [[R64]], 32(%rsp) +; EFI: movq %rax, 32(%rsp) ; EFI: callq _bar ret i64 %r From 5ca41197b3651b9d30bcd62affc2fadca753abb7 Mon Sep 17 00:00:00 2001 From: Jonathan Cogan Date: Mon, 24 Mar 2025 09:18:03 +0000 Subject: [PATCH 2/2] Revert changes to SelectionDAG. --- .../SelectionDAG/SelectionDAGBuilder.cpp | 43 +++++++------------ .../CodeGen/AArch64/sme-framelower-use-bp.ll | 6 ++- .../CodeGen/AArch64/stack-probing-dynamic.ll | 40 ++++++++++++----- llvm/test/CodeGen/AArch64/sve-alloca.ll | 4 +- .../PowerPC/aix-framepointer-save-restore.ll | 9 +++- llvm/test/CodeGen/PowerPC/pr46759.ll | 3 ++ .../CodeGen/PowerPC/stack-clash-prologue.ll | 16 +++++-- .../RISCV/rvv/stack-probing-dynamic.ll | 12 +++++- .../CodeGen/RISCV/stack-clash-prologue.ll | 6 ++- llvm/test/CodeGen/SPARC/alloca-align.ll | 14 +++++- llvm/test/CodeGen/SPARC/stack-align.ll | 4 +- llvm/test/CodeGen/SystemZ/alloca-03.ll | 2 +- llvm/test/CodeGen/VE/Scalar/alloca_aligned.ll | 9 ++-- .../CodeGen/VE/Scalar/stackframe_align.ll | 14 +++--- .../test/CodeGen/VE/Scalar/stackframe_call.ll | 8 ++++ .../CodeGen/VE/Scalar/stackframe_nocall.ll | 8 ++++ llvm/test/CodeGen/X86/clobber_base_ptr.ll | 4 ++ llvm/test/CodeGen/X86/pr50782.ll | 3 +- .../stack-clash-small-alloc-medium-align.ll | 3 +- .../CodeGen/X86/win64_alloca_dynalloca.ll | 18 +++++--- 20 files changed, 154 insertions(+), 72 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index fe1a8bab30e54..14bb1d943d2d6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4447,35 +4447,24 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { DAG.getZExtOrTrunc(TySizeValue, dl, IntPtr)); } - // Handle alignment. If the requested alignment is less than or equal to the - // stack alignment, ignore it since we will align the size. If the size is - // greater than or equal to the stack alignment, we note this in the - // DYNAMIC_STACKALLOC node. - const TargetFrameLowering *TFI = DAG.getSubtarget().getFrameLowering(); - Align StackAlign = TFI->getStackAlign(); - bool IsUnderAligned = *Alignment <= StackAlign; - if (IsUnderAligned) + // Handle alignment. If the requested alignment is less than or equal to + // the stack alignment, ignore it. If the size is greater than or equal to + // the stack alignment, we note this in the DYNAMIC_STACKALLOC node. + Align StackAlign = DAG.getSubtarget().getFrameLowering()->getStackAlign(); + if (*Alignment <= StackAlign) Alignment = std::nullopt; - // If the stack grows up, adding the alloca's size to SP without padding may - // leave SP not aligned (to the stack alignment) after the alloca because we - // align SP (to the stack align or alloca align) *before* adding the alloca - // size. On the other hand, if the stack grows down, we will align SP *after* - // decrementing it, so there is no need to align the size. - if (TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp || - IsUnderAligned) { - const uint64_t StackAlignMask = StackAlign.value() - 1U; - // Round the size of the allocation up to the stack alignment size - // by add SA-1 to the size. This doesn't overflow because we're computing - // an address inside an alloca. - AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize, - DAG.getConstant(StackAlignMask, dl, IntPtr), - SDNodeFlags::NoUnsignedWrap); - - // Mask out the low bits for alignment purposes. - AllocSize = DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize, - DAG.getSignedConstant(~StackAlignMask, dl, IntPtr)); - } + const uint64_t StackAlignMask = StackAlign.value() - 1U; + // Round the size of the allocation up to the stack alignment size + // by add SA-1 to the size. This doesn't overflow because we're computing + // an address inside an alloca. + AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize, + DAG.getConstant(StackAlignMask, dl, IntPtr), + SDNodeFlags::NoUnsignedWrap); + + // Mask out the low bits for alignment purposes. + AllocSize = DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize, + DAG.getSignedConstant(~StackAlignMask, dl, IntPtr)); SDValue Ops[] = { getRoot(), AllocSize, diff --git a/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll b/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll index 85c6b2a2fb854..f49bb910b5bd1 100644 --- a/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll +++ b/llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll @@ -160,7 +160,11 @@ define void @quux() #1 { ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: subs x9, x9, #16 ; CHECK-NEXT: mov sp, x9 -; CHECK-NEXT: rdvl x9, #2 +; CHECK-NEXT: addvl x9, x8, #2 +; CHECK-NEXT: mov w0, w9 +; CHECK-NEXT: // implicit-def: $x9 +; CHECK-NEXT: mov w9, w0 +; CHECK-NEXT: and x9, x9, #0x7f0 ; CHECK-NEXT: mov x10, sp ; CHECK-NEXT: subs x10, x10, x9 ; CHECK-NEXT: and x10, x10, #0xffffffffffffffe0 diff --git a/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll b/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll index b32eb210ce0e7..ee74469c0a76a 100644 --- a/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll +++ b/llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64 < %s -verify-machineinstrs | FileCheck %s -; RUN: llc -mtriple=aarch64 < %s -verify-machineinstrs -global-isel -global-isel-abort=2 | FileCheck %s +; RUN: llc -mtriple=aarch64 < %s -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple=aarch64 < %s -verify-machineinstrs -global-isel -global-isel-abort=2 | FileCheck %s --check-prefixes=CHECK,CHECK-GI ; Dynamically-sized allocation, needs a loop which can handle any size at ; runtime. The final iteration of the loop will temporarily put SP below the @@ -107,10 +107,20 @@ define void @dynamic_align_64(i64 %size, ptr %out) #0 { ; CHECK-NEXT: .cfi_offset w29, -32 ; CHECK-NEXT: sub x9, sp, #32 ; CHECK-NEXT: and sp, x9, #0xffffffffffffffc0 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: str xzr, [sp] -; CHECK-DAG: mov x19, sp -; CHECK-DAG: sub x8, x8, x0 +; +; CHECK-SD-NEXT: add x9, x0, #15 +; CHECK-SD-NEXT: mov x8, sp +; CHECK-SD-DAG: str xzr, [sp] +; CHECK-SD-DAG: and x9, x9, #0xfffffffffffffff0 +; CHECK-SD-NOT: INVALID_TO_BREAK_UP_CHECK_DAG +; CHECK-SD-DAG: mov x19, sp +; CHECK-SD-DAG: sub x8, x8, x9 +; +; CHECK-GI-NEXT: mov x8, sp +; CHECK-GI-NEXT: str xzr, [sp] +; CHECK-GI-DAG: mov x19, sp +; CHECK-GI-DAG: sub x8, x8, x0 +; ; CHECK-NEXT: and x8, x8, #0xffffffffffffffc0 ; CHECK-NEXT: .LBB2_1: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 @@ -164,10 +174,20 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 { ; CHECK-NEXT: b .LBB3_1 ; CHECK-NEXT: .LBB3_3: ; CHECK-NEXT: mov sp, x9 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: ldr xzr, [sp] -; CHECK-DAG: mov x19, sp -; CHECK-DAG: sub x8, x8, x0 +; +; CHECK-SD-NEXT: add x9, x0, #15 +; CHECK-SD-NEXT: mov x8, sp +; CHECK-SD-DAG: ldr xzr, [sp] +; CHECK-SD-DAG: and x9, x9, #0xfffffffffffffff0 +; CHECK-SD-NOT: INVALID_TO_BREAK_UP_CHECK_DAG +; CHECK-SD-DAG: mov x19, sp +; CHECK-SD-DAG: sub x8, x8, x9 +; +; CHECK-GI-NEXT: mov x8, sp +; CHECK-GI-NEXT: ldr xzr, [sp] +; CHECK-GI-DAG: mov x19, sp +; CHECK-GI-DAG: sub x8, x8, x0 +; ; CHECK-NEXT: and x8, x8, #0xffffffffffffe000 ; CHECK-NEXT: .LBB3_4: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096 diff --git a/llvm/test/CodeGen/AArch64/sve-alloca.ll b/llvm/test/CodeGen/AArch64/sve-alloca.ll index 3d3c5da483015..2520095cce62e 100644 --- a/llvm/test/CodeGen/AArch64/sve-alloca.ll +++ b/llvm/test/CodeGen/AArch64/sve-alloca.ll @@ -54,8 +54,10 @@ define void @foo( %dst, i1 %cond) { ; CHECK-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 32 - 48 * VG ; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 32 - 56 * VG ; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 32 - 64 * VG +; CHECK-NEXT: rdvl x9, #2 ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: cnth x9, all, mul #4 +; CHECK-NEXT: add x9, x9, #15 +; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0 ; CHECK-NEXT: sub x8, x8, x9 ; CHECK-NEXT: and x0, x8, #0xffffffffffffffe0 ; CHECK-NEXT: mov sp, x0 diff --git a/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll b/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll index 8895a9a920569..44281bcc3647d 100644 --- a/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll +++ b/llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll @@ -82,8 +82,10 @@ define dso_local void @frameptr_realigned(i32 %n) { ; AIX32-NEXT: slwi 3, 3, 2 ; AIX32-NEXT: lwz 4, 0(1) ; AIX32-NEXT: li 5, -64 -; AIX32-NEXT: neg 3, 3 +; AIX32-NEXT: addi 3, 3, 15 ; AIX32-NEXT: mr 31, 1 +; AIX32-NEXT: rlwinm 3, 3, 0, 0, 27 +; AIX32-NEXT: neg 3, 3 ; AIX32-NEXT: and 5, 3, 5 ; AIX32-NEXT: stwux 4, 1, 5 ; AIX32-NEXT: addi 3, 1, 64 @@ -109,8 +111,11 @@ define dso_local void @frameptr_realigned(i32 %n) { ; AIX64-NEXT: rldic 3, 3, 2, 30 ; AIX64-NEXT: ld 4, 0(1) ; AIX64-NEXT: li 5, -64 -; AIX64-NEXT: neg 3, 3 +; AIX64-NEXT: addi 3, 3, 15 ; AIX64-NEXT: mr 31, 1 +; AIX64-NEXT: rldicl 3, 3, 60, 4 +; AIX64-NEXT: rldicl 3, 3, 4, 29 +; AIX64-NEXT: neg 3, 3 ; AIX64-NEXT: and 5, 3, 5 ; AIX64-NEXT: stdux 4, 1, 5 ; AIX64-NEXT: addi 3, 1, 128 diff --git a/llvm/test/CodeGen/PowerPC/pr46759.ll b/llvm/test/CodeGen/PowerPC/pr46759.ll index 8653ca997c4ed..d1129b1825aee 100644 --- a/llvm/test/CodeGen/PowerPC/pr46759.ll +++ b/llvm/test/CodeGen/PowerPC/pr46759.ll @@ -33,6 +33,9 @@ define void @foo(i32 %vla_size) #0 { ; CHECK-LE-NEXT: li r4, -2048 ; CHECK-LE-NEXT: li r6, -4096 ; CHECK-LE-NEXT: mr r31, r1 +; CHECK-LE-NEXT: addi r3, r3, 15 +; CHECK-LE-NEXT: rldicl r3, r3, 60, 4 +; CHECK-LE-NEXT: rldicl r3, r3, 4, 31 ; CHECK-LE-NEXT: neg r5, r3 ; CHECK-LE-NEXT: ld r3, 0(r1) ; CHECK-LE-NEXT: and r4, r5, r4 diff --git a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll index 15d0cd5e9baee..b887bbb8c99f8 100644 --- a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll +++ b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll @@ -835,17 +835,20 @@ define void @f11(i32 %vla_size, i64 %i) #0 { ; CHECK-LE-NEXT: .cfi_def_cfa_register r30 ; CHECK-LE-NEXT: .cfi_offset r31, -8 ; CHECK-LE-NEXT: .cfi_offset r30, -16 +; CHECK-LE-NEXT: clrldi r3, r3, 32 ; CHECK-LE-NEXT: lis r5, 1 ; CHECK-LE-NEXT: mr r31, r1 -; CHECK-LE-NEXT: sldi r4, r4, 2 ; CHECK-LE-NEXT: li r6, 1 -; CHECK-LE-NEXT: clrldi r3, r3, 32 +; CHECK-LE-NEXT: sldi r4, r4, 2 +; CHECK-LE-NEXT: addi r3, r3, 15 ; CHECK-LE-NEXT: ori r5, r5, 0 +; CHECK-LE-NEXT: rldicl r3, r3, 60, 4 ; CHECK-LE-NEXT: add r5, r31, r5 +; CHECK-LE-NEXT: rldicl r3, r3, 4, 31 ; CHECK-LE-NEXT: stwx r6, r5, r4 -; CHECK-LE-NEXT: neg r5, r3 ; CHECK-LE-NEXT: li r4, -32768 ; CHECK-LE-NEXT: li r6, -4096 +; CHECK-LE-NEXT: neg r5, r3 ; CHECK-LE-NEXT: ld r3, 0(r1) ; CHECK-LE-NEXT: and r4, r5, r4 ; CHECK-LE-NEXT: mr r5, r4 @@ -893,13 +896,16 @@ define void @f11(i32 %vla_size, i64 %i) #0 { ; CHECK-BE-NEXT: .cfi_def_cfa_register r30 ; CHECK-BE-NEXT: .cfi_offset r31, -8 ; CHECK-BE-NEXT: .cfi_offset r30, -16 +; CHECK-BE-NEXT: clrldi r3, r3, 32 ; CHECK-BE-NEXT: lis r5, 1 +; CHECK-BE-NEXT: addi r3, r3, 15 ; CHECK-BE-NEXT: mr r31, r1 ; CHECK-BE-NEXT: ori r5, r5, 0 +; CHECK-BE-NEXT: rldicl r3, r3, 60, 4 ; CHECK-BE-NEXT: add r5, r31, r5 ; CHECK-BE-NEXT: sldi r4, r4, 2 ; CHECK-BE-NEXT: li r6, 1 -; CHECK-BE-NEXT: clrldi r3, r3, 32 +; CHECK-BE-NEXT: rldicl r3, r3, 4, 31 ; CHECK-BE-NEXT: stwx r6, r5, r4 ; CHECK-BE-NEXT: neg r7, r3 ; CHECK-BE-NEXT: li r4, -32768 @@ -958,9 +964,11 @@ define void @f11(i32 %vla_size, i64 %i) #0 { ; CHECK-32-NEXT: lis r4, 1 ; CHECK-32-NEXT: mr r31, r1 ; CHECK-32-NEXT: ori r4, r4, 0 +; CHECK-32-NEXT: addi r3, r3, 15 ; CHECK-32-NEXT: add r4, r31, r4 ; CHECK-32-NEXT: li r5, 1 ; CHECK-32-NEXT: slwi r6, r6, 2 +; CHECK-32-NEXT: rlwinm r3, r3, 0, 0, 27 ; CHECK-32-NEXT: neg r7, r3 ; CHECK-32-NEXT: stwx r5, r4, r6 ; CHECK-32-NEXT: li r4, -32768 diff --git a/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll b/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll index 07daca9c7851e..c3c1643e6de01 100644 --- a/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll +++ b/llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll @@ -180,6 +180,8 @@ define void @dynamic_align_64(i64 %size, ptr %out) #0 { ; RV64I-NEXT: .cfi_def_cfa s0, 0 ; RV64I-NEXT: andi sp, sp, -64 ; RV64I-NEXT: mv s1, sp +; RV64I-NEXT: addi a0, a0, 15 +; RV64I-NEXT: andi a0, a0, -16 ; RV64I-NEXT: sub a0, sp, a0 ; RV64I-NEXT: andi a0, a0, -64 ; RV64I-NEXT: lui a2, 1 @@ -217,6 +219,8 @@ define void @dynamic_align_64(i64 %size, ptr %out) #0 { ; RV32I-NEXT: .cfi_def_cfa s0, 0 ; RV32I-NEXT: andi sp, sp, -64 ; RV32I-NEXT: mv s1, sp +; RV32I-NEXT: addi a0, a0, 15 +; RV32I-NEXT: andi a0, a0, -16 ; RV32I-NEXT: sub a0, sp, a0 ; RV32I-NEXT: andi a0, a0, -64 ; RV32I-NEXT: lui a1, 1 @@ -274,8 +278,10 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 { ; RV64I-NEXT: srli a2, sp, 13 ; RV64I-NEXT: slli sp, a2, 13 ; RV64I-NEXT: mv s1, sp -; RV64I-NEXT: sub a0, sp, a0 +; RV64I-NEXT: addi a0, a0, 15 ; RV64I-NEXT: lui a2, 1048574 +; RV64I-NEXT: andi a0, a0, -16 +; RV64I-NEXT: sub a0, sp, a0 ; RV64I-NEXT: and a0, a0, a2 ; RV64I-NEXT: lui a2, 1 ; RV64I-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 @@ -323,8 +329,10 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 { ; RV32I-NEXT: srli a1, sp, 13 ; RV32I-NEXT: slli sp, a1, 13 ; RV32I-NEXT: mv s1, sp -; RV32I-NEXT: sub a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 15 ; RV32I-NEXT: lui a1, 1048574 +; RV32I-NEXT: andi a0, a0, -16 +; RV32I-NEXT: sub a0, sp, a0 ; RV32I-NEXT: and a0, a0, a1 ; RV32I-NEXT: lui a1, 1 ; RV32I-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 diff --git a/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll b/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll index 70c082026bda8..b1c0755c36ec1 100644 --- a/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll +++ b/llvm/test/CodeGen/RISCV/stack-clash-prologue.ll @@ -642,6 +642,8 @@ define void @f11(i32 %vla_size, i64 %i) #0 { ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: sw a2, 0(a1) +; RV64I-NEXT: addi a0, a0, 15 +; RV64I-NEXT: andi a0, a0, -16 ; RV64I-NEXT: sub a0, sp, a0 ; RV64I-NEXT: andi a0, a0, -2048 ; RV64I-NEXT: lui a1, 1 @@ -696,9 +698,11 @@ define void @f11(i32 %vla_size, i64 %i) #0 { ; RV32I-NEXT: add a2, s1, a2 ; RV32I-NEXT: add a1, a2, a1 ; RV32I-NEXT: li a2, 1 +; RV32I-NEXT: addi a0, a0, 15 +; RV32I-NEXT: andi a0, a0, -16 +; RV32I-NEXT: sw a2, 0(a1) ; RV32I-NEXT: sub a0, sp, a0 ; RV32I-NEXT: andi a0, a0, -2048 -; RV32I-NEXT: sw a2, 0(a1) ; RV32I-NEXT: lui a1, 1 ; RV32I-NEXT: .LBB11_3: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: sub sp, sp, a1 diff --git a/llvm/test/CodeGen/SPARC/alloca-align.ll b/llvm/test/CodeGen/SPARC/alloca-align.ll index 3c469ee2f3a59..a3dcc3779f4e6 100644 --- a/llvm/test/CodeGen/SPARC/alloca-align.ll +++ b/llvm/test/CodeGen/SPARC/alloca-align.ll @@ -6,7 +6,7 @@ define void @variable_alloca_with_overalignment(i32 %num) nounwind { ; CHECK32-LABEL: variable_alloca_with_overalignment: ; CHECK32: ! %bb.0: ; CHECK32-NEXT: save %sp, -96, %sp -; CHECK32-NEXT: add %sp, 84, %i1 +; CHECK32-NEXT: add %sp, 80, %i1 ; CHECK32-NEXT: and %i1, -64, %o0 ; CHECK32-NEXT: add %o0, -96, %sp ; CHECK32-NEXT: add %i0, 7, %i0 @@ -21,7 +21,7 @@ define void @variable_alloca_with_overalignment(i32 %num) nounwind { ; CHECK64-LABEL: variable_alloca_with_overalignment: ; CHECK64: ! %bb.0: ; CHECK64-NEXT: save %sp, -128, %sp -; CHECK64-NEXT: add %sp, 2171, %i1 +; CHECK64-NEXT: add %sp, 2159, %i1 ; CHECK64-NEXT: and %i1, -64, %o0 ; CHECK64-NEXT: add %o0, -2175, %sp ; CHECK64-NEXT: srl %i0, 0, %i0 @@ -52,6 +52,8 @@ define void @variable_alloca_with_overalignment_2(i32 %num) nounwind { ; CHECK32-LABEL: variable_alloca_with_overalignment_2: ; CHECK32: ! %bb.0: ; CHECK32-NEXT: save %sp, -96, %sp +; CHECK32-NEXT: add %i0, 7, %i0 +; CHECK32-NEXT: and %i0, -8, %i0 ; CHECK32-NEXT: sub %sp, %i0, %i0 ; CHECK32-NEXT: add %i0, 88, %i0 ; CHECK32-NEXT: and %i0, -64, %o1 @@ -65,6 +67,14 @@ define void @variable_alloca_with_overalignment_2(i32 %num) nounwind { ; CHECK64: ! %bb.0: ; CHECK64-NEXT: save %sp, -128, %sp ; CHECK64-NEXT: srl %i0, 0, %i0 +; CHECK64-NEXT: add %i0, 15, %i0 +; CHECK64-NEXT: sethi 4194303, %i1 +; CHECK64-NEXT: or %i1, 1008, %i1 +; CHECK64-NEXT: sethi 0, %i2 +; CHECK64-NEXT: or %i2, 1, %i2 +; CHECK64-NEXT: sllx %i2, 32, %i2 +; CHECK64-NEXT: or %i2, %i1, %i1 +; CHECK64-NEXT: and %i0, %i1, %i0 ; CHECK64-NEXT: sub %sp, %i0, %i0 ; CHECK64-NEXT: add %i0, 2175, %i0 ; CHECK64-NEXT: and %i0, -64, %o1 diff --git a/llvm/test/CodeGen/SPARC/stack-align.ll b/llvm/test/CodeGen/SPARC/stack-align.ll index fffec42c2bbb9..18bb052b47d97 100644 --- a/llvm/test/CodeGen/SPARC/stack-align.ll +++ b/llvm/test/CodeGen/SPARC/stack-align.ll @@ -13,7 +13,7 @@ define void @stack_realign(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 % ; CHECK32: ! %bb.0: ! %entry ; CHECK32-NEXT: save %sp, -96, %sp ; CHECK32-NEXT: ld [%fp+92], %o0 -; CHECK32-NEXT: add %sp, 84, %i0 +; CHECK32-NEXT: add %sp, 80, %i0 ; CHECK32-NEXT: and %i0, -64, %o1 ; CHECK32-NEXT: call stack_realign_helper ; CHECK32-NEXT: add %o1, -96, %sp @@ -23,7 +23,7 @@ define void @stack_realign(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 % ; CHECK64-LABEL: stack_realign: ; CHECK64: ! %bb.0: ! %entry ; CHECK64-NEXT: save %sp, -128, %sp -; CHECK64-NEXT: add %sp, 2171, %i0 +; CHECK64-NEXT: add %sp, 2159, %i0 ; CHECK64-NEXT: and %i0, -64, %o1 ; CHECK64-NEXT: add %o1, -2175, %sp ; CHECK64-NEXT: add %sp, -48, %sp diff --git a/llvm/test/CodeGen/SystemZ/alloca-03.ll b/llvm/test/CodeGen/SystemZ/alloca-03.ll index 3ad687df50295..e331bfbfb8ab3 100644 --- a/llvm/test/CodeGen/SystemZ/alloca-03.ll +++ b/llvm/test/CodeGen/SystemZ/alloca-03.ll @@ -114,7 +114,7 @@ define void @f5() { ; CHECK-NEXT: lgr %r11, %r15 ; CHECK-NEXT: .cfi_def_cfa_register %r11 ; CHECK-NEXT: lgr %r1, %r15 -; CHECK-NEXT: aghi %r1, -124 +; CHECK-NEXT: aghi %r1, -128 ; CHECK-NEXT: la %r2, 280(%r1) ; CHECK-NEXT: nill %r2, 65408 ; CHECK-NEXT: lgr %r15, %r1 diff --git a/llvm/test/CodeGen/VE/Scalar/alloca_aligned.ll b/llvm/test/CodeGen/VE/Scalar/alloca_aligned.ll index 99c4a9fcdd8b1..a1002c540c14f 100644 --- a/llvm/test/CodeGen/VE/Scalar/alloca_aligned.ll +++ b/llvm/test/CodeGen/VE/Scalar/alloca_aligned.ll @@ -7,11 +7,12 @@ define void @test(i64 %n) { ; CHECK-LABEL: test: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: or %s2, 0, %s0 -; CHECK-NEXT: lea %s0, __ve_grow_stack_align@lo -; CHECK-NEXT: and %s0, %s0, (32)0 -; CHECK-NEXT: lea.sl %s12, __ve_grow_stack_align@hi(, %s0) +; CHECK-NEXT: lea %s0, 15(, %s0) +; CHECK-NEXT: and %s0, -16, %s0 +; CHECK-NEXT: lea %s1, __ve_grow_stack_align@lo +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea.sl %s12, __ve_grow_stack_align@hi(, %s1) ; CHECK-NEXT: or %s1, -32, (0)1 -; CHECK-NEXT: or %s0, 0, %s2 ; CHECK-NEXT: bsic %s10, (, %s12) ; CHECK-NEXT: lea %s0, 240(, %s11) ; CHECK-NEXT: lea %s0, 31(, %s0) diff --git a/llvm/test/CodeGen/VE/Scalar/stackframe_align.ll b/llvm/test/CodeGen/VE/Scalar/stackframe_align.ll index 4121e9507bad3..d90c0bcf9f837 100644 --- a/llvm/test/CodeGen/VE/Scalar/stackframe_align.ll +++ b/llvm/test/CodeGen/VE/Scalar/stackframe_align.ll @@ -407,9 +407,10 @@ define ptr @test_frame16_align16_dynalign32(ptr %0, i64 %n) { ; CHECK-NEXT: monc ; CHECK-NEXT: or %s0, 0, %s62 ; CHECK-NEXT: .LBB6_2: -; CHECK-NEXT: ld1b.zx %s2, (, %s0) -; CHECK-NEXT: or %s0, 0, %s1 -; CHECK-NEXT: st1b %s2, 272(, %s17) +; CHECK-NEXT: ld1b.zx %s0, (, %s0) +; CHECK-NEXT: st1b %s0, 272(, %s17) +; CHECK-NEXT: lea %s0, 15(, %s1) +; CHECK-NEXT: and %s0, -16, %s0 ; CHECK-NEXT: lea %s1, __ve_grow_stack_align@lo ; CHECK-NEXT: and %s1, %s1, (32)0 ; CHECK-NEXT: lea.sl %s12, __ve_grow_stack_align@hi(, %s1) @@ -446,9 +447,10 @@ define ptr @test_frame16_align16_dynalign32(ptr %0, i64 %n) { ; CHECKFP-NEXT: monc ; CHECKFP-NEXT: or %s0, 0, %s62 ; CHECKFP-NEXT: .LBB6_2: -; CHECKFP-NEXT: ld1b.zx %s2, (, %s0) -; CHECKFP-NEXT: or %s0, 0, %s1 -; CHECKFP-NEXT: st1b %s2, 272(, %s17) +; CHECKFP-NEXT: ld1b.zx %s0, (, %s0) +; CHECKFP-NEXT: st1b %s0, 272(, %s17) +; CHECKFP-NEXT: lea %s0, 15(, %s1) +; CHECKFP-NEXT: and %s0, -16, %s0 ; CHECKFP-NEXT: lea %s1, __ve_grow_stack_align@lo ; CHECKFP-NEXT: and %s1, %s1, (32)0 ; CHECKFP-NEXT: lea.sl %s12, __ve_grow_stack_align@hi(, %s1) diff --git a/llvm/test/CodeGen/VE/Scalar/stackframe_call.ll b/llvm/test/CodeGen/VE/Scalar/stackframe_call.ll index 02a1298141265..3a3b1ba1544c4 100644 --- a/llvm/test/CodeGen/VE/Scalar/stackframe_call.ll +++ b/llvm/test/CodeGen/VE/Scalar/stackframe_call.ll @@ -180,6 +180,8 @@ define ptr @test_align32(i32 signext %0, ptr nocapture readnone %1) { ; CHECK-NEXT: monc ; CHECK-NEXT: or %s0, 0, %s62 ; CHECK-NEXT: .LBB2_2: +; CHECK-NEXT: lea %s0, 15(, %s0) +; CHECK-NEXT: and %s0, -16, %s0 ; CHECK-NEXT: lea %s1, __ve_grow_stack_align@lo ; CHECK-NEXT: and %s1, %s1, (32)0 ; CHECK-NEXT: lea.sl %s12, __ve_grow_stack_align@hi(, %s1) @@ -225,6 +227,8 @@ define ptr @test_align32(i32 signext %0, ptr nocapture readnone %1) { ; PIC-NEXT: and %s15, %s15, (32)0 ; PIC-NEXT: sic %s16 ; PIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_@pc_hi(%s16, %s15) +; PIC-NEXT: lea %s0, 15(, %s0) +; PIC-NEXT: and %s0, -16, %s0 ; PIC-NEXT: lea %s12, __ve_grow_stack_align@plt_lo(-24) ; PIC-NEXT: and %s12, %s12, (32)0 ; PIC-NEXT: sic %s16 @@ -443,6 +447,8 @@ define ptr @test_align32_var(i32 signext %0, ptr nocapture readnone %1) { ; CHECK-NEXT: monc ; CHECK-NEXT: or %s0, 0, %s62 ; CHECK-NEXT: .LBB5_2: +; CHECK-NEXT: lea %s0, 15(, %s0) +; CHECK-NEXT: and %s0, -16, %s0 ; CHECK-NEXT: lea %s1, __ve_grow_stack_align@lo ; CHECK-NEXT: and %s1, %s1, (32)0 ; CHECK-NEXT: lea.sl %s12, __ve_grow_stack_align@hi(, %s1) @@ -493,6 +499,8 @@ define ptr @test_align32_var(i32 signext %0, ptr nocapture readnone %1) { ; PIC-NEXT: and %s15, %s15, (32)0 ; PIC-NEXT: sic %s16 ; PIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_@pc_hi(%s16, %s15) +; PIC-NEXT: lea %s0, 15(, %s0) +; PIC-NEXT: and %s0, -16, %s0 ; PIC-NEXT: lea %s12, __ve_grow_stack_align@plt_lo(-24) ; PIC-NEXT: and %s12, %s12, (32)0 ; PIC-NEXT: sic %s16 diff --git a/llvm/test/CodeGen/VE/Scalar/stackframe_nocall.ll b/llvm/test/CodeGen/VE/Scalar/stackframe_nocall.ll index 20affad0fc7bd..f9308a172ad05 100644 --- a/llvm/test/CodeGen/VE/Scalar/stackframe_nocall.ll +++ b/llvm/test/CodeGen/VE/Scalar/stackframe_nocall.ll @@ -103,6 +103,8 @@ define noalias nonnull ptr @test_align32(i32 signext %0, ptr nocapture readonly ; CHECK-NEXT: or %s0, 0, %s62 ; CHECK-NEXT: .LBB2_2: ; CHECK-NEXT: or %s2, 0, %s1 +; CHECK-NEXT: lea %s0, 15(, %s0) +; CHECK-NEXT: and %s0, -16, %s0 ; CHECK-NEXT: lea %s1, __ve_grow_stack_align@lo ; CHECK-NEXT: and %s1, %s1, (32)0 ; CHECK-NEXT: lea.sl %s12, __ve_grow_stack_align@hi(, %s1) @@ -147,6 +149,8 @@ define noalias nonnull ptr @test_align32(i32 signext %0, ptr nocapture readonly ; PIC-NEXT: and %s15, %s15, (32)0 ; PIC-NEXT: sic %s16 ; PIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_@pc_hi(%s16, %s15) +; PIC-NEXT: lea %s0, 15(, %s0) +; PIC-NEXT: and %s0, -16, %s0 ; PIC-NEXT: lea %s12, __ve_grow_stack_align@plt_lo(-24) ; PIC-NEXT: and %s12, %s12, (32)0 ; PIC-NEXT: sic %s16 @@ -296,6 +300,8 @@ define noalias nonnull ptr @test_align32_var(i32 signext %0, ptr nocapture reado ; CHECK-NEXT: or %s0, 0, %s62 ; CHECK-NEXT: .LBB5_2: ; CHECK-NEXT: or %s2, 0, %s1 +; CHECK-NEXT: lea %s0, 15(, %s0) +; CHECK-NEXT: and %s0, -16, %s0 ; CHECK-NEXT: lea %s1, __ve_grow_stack_align@lo ; CHECK-NEXT: and %s1, %s1, (32)0 ; CHECK-NEXT: lea.sl %s12, __ve_grow_stack_align@hi(, %s1) @@ -340,6 +346,8 @@ define noalias nonnull ptr @test_align32_var(i32 signext %0, ptr nocapture reado ; PIC-NEXT: and %s15, %s15, (32)0 ; PIC-NEXT: sic %s16 ; PIC-NEXT: lea.sl %s15, _GLOBAL_OFFSET_TABLE_@pc_hi(%s16, %s15) +; PIC-NEXT: lea %s0, 15(, %s0) +; PIC-NEXT: and %s0, -16, %s0 ; PIC-NEXT: lea %s12, __ve_grow_stack_align@plt_lo(-24) ; PIC-NEXT: and %s12, %s12, (32)0 ; PIC-NEXT: sic %s16 diff --git a/llvm/test/CodeGen/X86/clobber_base_ptr.ll b/llvm/test/CodeGen/X86/clobber_base_ptr.ll index 2bd1c69bc521d..2c39560f02d16 100644 --- a/llvm/test/CodeGen/X86/clobber_base_ptr.ll +++ b/llvm/test/CodeGen/X86/clobber_base_ptr.ll @@ -25,6 +25,8 @@ define i32 @clober_bp() { ; CHECK-NEXT: .cfi_offset %edi, -12 ; CHECK-NEXT: movl $4, 12(%esi) ; CHECK-NEXT: movl 12(%esi), %eax +; CHECK-NEXT: addl $3, %eax +; CHECK-NEXT: andl $-4, %eax ; CHECK-NEXT: calll __alloca ; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: andl $-16, %eax @@ -76,6 +78,8 @@ define i32 @clobber_bpfp() { ; CHECK-NEXT: .cfi_offset %edi, -12 ; CHECK-NEXT: movl $4, 12(%esi) ; CHECK-NEXT: movl 12(%esi), %eax +; CHECK-NEXT: addl $3, %eax +; CHECK-NEXT: andl $-4, %eax ; CHECK-NEXT: calll __alloca ; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: andl $-16, %eax diff --git a/llvm/test/CodeGen/X86/pr50782.ll b/llvm/test/CodeGen/X86/pr50782.ll index 0cdbd5721e6b1..591a33446d4e3 100644 --- a/llvm/test/CodeGen/X86/pr50782.ll +++ b/llvm/test/CodeGen/X86/pr50782.ll @@ -25,7 +25,8 @@ define void @h(float %i) { ; CHECK-NEXT: .cfi_offset %esi, -12 ; CHECK-NEXT: flds 8(%ebp) ; CHECK-NEXT: movl _a, %ecx -; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: leal 3(%ecx), %eax +; CHECK-NEXT: andl $-4, %eax ; CHECK-NEXT: calll __alloca ; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: andl $-16, %eax diff --git a/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll index dc2503ecece91..ccf7e1d56da90 100644 --- a/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll +++ b/llvm/test/CodeGen/X86/stack-clash-small-alloc-medium-align.ll @@ -99,7 +99,8 @@ define i32 @foo4(i64 %i) local_unnamed_addr #0 { ; CHECK-NEXT: movl $1, (%rbx,%rdi,4) ; CHECK-NEXT: movl (%rbx), %ecx ; CHECK-NEXT: movq %rsp, %rax -; CHECK-NEXT: shlq $2, %rcx +; CHECK-NEXT: leaq 15(,%rcx,4), %rcx +; CHECK-NEXT: andq $-16, %rcx ; CHECK-NEXT: subq %rcx, %rax ; CHECK-NEXT: cmpq %rsp, %rax ; CHECK-NEXT: jge .LBB3_3 diff --git a/llvm/test/CodeGen/X86/win64_alloca_dynalloca.ll b/llvm/test/CodeGen/X86/win64_alloca_dynalloca.ll index d636896467b00..241188b8cc3d5 100644 --- a/llvm/test/CodeGen/X86/win64_alloca_dynalloca.ll +++ b/llvm/test/CodeGen/X86/win64_alloca_dynalloca.ll @@ -95,24 +95,28 @@ entry: %buf1 = alloca i8, i64 %n, align 128 -; M64: movq %rcx, %rax +; M64: leaq 15(%{{.*}}), %rax +; M64: andq $-16, %rax ; M64: callq ___chkstk_ms ; M64: subq %rax, %rsp ; M64: movq %rsp, [[R2:%r.*]] ; M64: andq $-128, [[R2]] ; M64: movq [[R2]], %rsp -; W64: movq %rcx, %rax +; W64: leaq 15(%{{.*}}), %rax +; W64: andq $-16, %rax ; W64: callq __chkstk ; W64: subq %rax, %rsp ; W64: movq %rsp, [[R2:%r.*]] ; W64: andq $-128, [[R2]] ; W64: movq [[R2]], %rsp -; EFI: movq %rsp, %rax -; EFI: subq %rcx, %rax -; EFI: andq $-128, %rax -; EFI: movq %rax, %rsp +; EFI: leaq 15(%{{.*}}), [[R1:%r.*]] +; EFI: andq $-16, [[R1]] +; EFI: movq %rsp, [[R64:%r.*]] +; EFI: subq [[R1]], [[R64]] +; EFI: andq $-128, [[R64]] +; EFI: movq [[R64]], %rsp %r = call i64 @bar(i64 %n, i64 %x, i64 %n, ptr undef, ptr %buf1) nounwind @@ -125,7 +129,7 @@ entry: ; W64: callq bar ; EFI: subq $48, %rsp -; EFI: movq %rax, 32(%rsp) +; EFI: movq [[R64]], 32(%rsp) ; EFI: callq _bar ret i64 %r