Skip to content

Commit 7839828

Browse files
committed
[CodeGen] Avoid aligning alloca size.
1 parent cb64a36 commit 7839828

22 files changed

+98
-152
lines changed

llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3111,21 +3111,39 @@ bool IRTranslator::translateAlloca(const User &U,
31113111
getOrCreateVReg(*ConstantInt::get(IntPtrIRTy, DL->getTypeAllocSize(Ty)));
31123112
MIRBuilder.buildMul(AllocSize, NumElts, TySize);
31133113

3114-
// Round the size of the allocation up to the stack alignment size
3115-
// by add SA-1 to the size. This doesn't overflow because we're computing
3116-
// an address inside an alloca.
3117-
Align StackAlign = MF->getSubtarget().getFrameLowering()->getStackAlign();
3118-
auto SAMinusOne = MIRBuilder.buildConstant(IntPtrTy, StackAlign.value() - 1);
3119-
auto AllocAdd = MIRBuilder.buildAdd(IntPtrTy, AllocSize, SAMinusOne,
3120-
MachineInstr::NoUWrap);
3121-
auto AlignCst =
3122-
MIRBuilder.buildConstant(IntPtrTy, ~(uint64_t)(StackAlign.value() - 1));
3123-
auto AlignedAlloc = MIRBuilder.buildAnd(IntPtrTy, AllocAdd, AlignCst);
3124-
3114+
const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
3115+
Align StackAlign = TFI->getStackAlign();
31253116
Align Alignment = std::max(AI.getAlign(), DL->getPrefTypeAlign(Ty));
3126-
if (Alignment <= StackAlign)
3117+
3118+
// If the stack alignment is stricter than the alloca's alignment, ignore the
3119+
// alloca's alignment. We will align the size of the alloca to the stack
3120+
// alignment, which will guarantee that the alloca's alignment is satisfied.
3121+
bool IsUnderAligned = Alignment <= StackAlign;
3122+
if (IsUnderAligned)
31273123
Alignment = Align(1);
3128-
MIRBuilder.buildDynStackAlloc(getOrCreateVReg(AI), AlignedAlloc, Alignment);
3124+
3125+
// If the stack grows up, adding the alloca's size to SP without padding may
3126+
// leave SP not aligned (to the stack alignment) after the alloca because we
3127+
// align SP (to the stack align or alloca align) *before* adding the alloca
3128+
// size. On the other hand, if the stack grows down, we will align SP *after*
3129+
// decrementing it, so there is no need to pad the size.
3130+
if (TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp ||
3131+
IsUnderAligned) {
3132+
// Round the size of the allocation up to the stack alignment size
3133+
// by add SA-1 to the size. This doesn't overflow because we're computing
3134+
// an address inside an alloca.
3135+
auto SAMinusOne =
3136+
MIRBuilder.buildConstant(IntPtrTy, StackAlign.value() - 1);
3137+
auto AllocAdd = MIRBuilder.buildAdd(IntPtrTy, AllocSize, SAMinusOne,
3138+
MachineInstr::NoUWrap);
3139+
auto AlignCst =
3140+
MIRBuilder.buildConstant(IntPtrTy, ~(uint64_t)(StackAlign.value() - 1));
3141+
auto AlignedAlloc = MIRBuilder.buildAnd(IntPtrTy, AllocAdd, AlignCst);
3142+
3143+
MIRBuilder.buildDynStackAlloc(getOrCreateVReg(AI), AlignedAlloc, Alignment);
3144+
} else {
3145+
MIRBuilder.buildDynStackAlloc(getOrCreateVReg(AI), AllocSize, Alignment);
3146+
}
31293147

31303148
MF->getFrameInfo().CreateVariableSizedObject(Alignment, &AI);
31313149
assert(MF->getFrameInfo().hasVarSizedObjects());

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4447,24 +4447,35 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
44474447
DAG.getZExtOrTrunc(TySizeValue, dl, IntPtr));
44484448
}
44494449

4450-
// Handle alignment. If the requested alignment is less than or equal to
4451-
// the stack alignment, ignore it. If the size is greater than or equal to
4452-
// the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
4453-
Align StackAlign = DAG.getSubtarget().getFrameLowering()->getStackAlign();
4454-
if (*Alignment <= StackAlign)
4450+
// Handle alignment. If the requested alignment is less than or equal to the
4451+
// stack alignment, ignore it since we will align the size. If the size is
4452+
// greater than or equal to the stack alignment, we note this in the
4453+
// DYNAMIC_STACKALLOC node.
4454+
const TargetFrameLowering *TFI = DAG.getSubtarget().getFrameLowering();
4455+
Align StackAlign = TFI->getStackAlign();
4456+
bool IsUnderAligned = *Alignment <= StackAlign;
4457+
if (IsUnderAligned)
44554458
Alignment = std::nullopt;
44564459

4457-
const uint64_t StackAlignMask = StackAlign.value() - 1U;
4458-
// Round the size of the allocation up to the stack alignment size
4459-
// by add SA-1 to the size. This doesn't overflow because we're computing
4460-
// an address inside an alloca.
4461-
AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize,
4462-
DAG.getConstant(StackAlignMask, dl, IntPtr),
4463-
SDNodeFlags::NoUnsignedWrap);
4464-
4465-
// Mask out the low bits for alignment purposes.
4466-
AllocSize = DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize,
4467-
DAG.getSignedConstant(~StackAlignMask, dl, IntPtr));
4460+
// If the stack grows up, adding the alloca's size to SP without padding may
4461+
// leave SP not aligned (to the stack alignment) after the alloca because we
4462+
// align SP (to the stack align or alloca align) *before* adding the alloca
4463+
// size. On the other hand, if the stack grows down, we will align SP *after*
4464+
// decrementing it, so there is no need to align the size.
4465+
if (TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp ||
4466+
IsUnderAligned) {
4467+
const uint64_t StackAlignMask = StackAlign.value() - 1U;
4468+
// Round the size of the allocation up to the stack alignment size
4469+
// by add SA-1 to the size. This doesn't overflow because we're computing
4470+
// an address inside an alloca.
4471+
AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize,
4472+
DAG.getConstant(StackAlignMask, dl, IntPtr),
4473+
SDNodeFlags::NoUnsignedWrap);
4474+
4475+
// Mask out the low bits for alignment purposes.
4476+
AllocSize = DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize,
4477+
DAG.getSignedConstant(~StackAlignMask, dl, IntPtr));
4478+
}
44684479

44694480
SDValue Ops[] = {
44704481
getRoot(), AllocSize,

llvm/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,7 @@ define ptr @test_aligned_alloca(i32 %numelts) {
2828
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
2929
; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32)
3030
; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[ZEXT]], [[C]]
31-
; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15
32-
; CHECK: [[ADD:%[0-9]+]]:_(s64) = nuw G_ADD [[MUL]], [[C1]]
33-
; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -16
34-
; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[ADD]], [[C2]]
35-
; CHECK: [[DYN_STACKALLOC:%[0-9]+]]:_(p0) = G_DYN_STACKALLOC [[AND]](s64), 32
31+
; CHECK: [[DYN_STACKALLOC:%[0-9]+]]:_(p0) = G_DYN_STACKALLOC [[MUL]](s64), 32
3632
; CHECK: $x0 = COPY [[DYN_STACKALLOC]](p0)
3733
; CHECK: RET_ReallyLR implicit $x0
3834
%addr = alloca i8, i32 %numelts, align 32

llvm/test/CodeGen/AArch64/sme-framelower-use-bp.ll

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -160,11 +160,7 @@ define void @quux() #1 {
160160
; CHECK-NEXT: mov x9, sp
161161
; CHECK-NEXT: subs x9, x9, #16
162162
; CHECK-NEXT: mov sp, x9
163-
; CHECK-NEXT: addvl x9, x8, #2
164-
; CHECK-NEXT: mov w0, w9
165-
; CHECK-NEXT: // implicit-def: $x9
166-
; CHECK-NEXT: mov w9, w0
167-
; CHECK-NEXT: and x9, x9, #0x7f0
163+
; CHECK-NEXT: rdvl x9, #2
168164
; CHECK-NEXT: mov x10, sp
169165
; CHECK-NEXT: subs x10, x10, x9
170166
; CHECK-NEXT: and x10, x10, #0xffffffffffffffe0

llvm/test/CodeGen/AArch64/stack-probing-dynamic.ll

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -107,13 +107,10 @@ define void @dynamic_align_64(i64 %size, ptr %out) #0 {
107107
; CHECK-NEXT: .cfi_offset w29, -32
108108
; CHECK-NEXT: sub x9, sp, #32
109109
; CHECK-NEXT: and sp, x9, #0xffffffffffffffc0
110-
; CHECK-NEXT: add x9, x0, #15
111110
; CHECK-NEXT: mov x8, sp
112-
; CHECK-DAG: str xzr, [sp]
113-
; CHECK-DAG: and x9, x9, #0xfffffffffffffff0
114-
; CHECK-NOT: INVALID_TO_BREAK_UP_CHECK_DAG
111+
; CHECK-NEXT: str xzr, [sp]
115112
; CHECK-DAG: mov x19, sp
116-
; CHECK-DAG: sub x8, x8, x9
113+
; CHECK-DAG: sub x8, x8, x0
117114
; CHECK-NEXT: and x8, x8, #0xffffffffffffffc0
118115
; CHECK-NEXT: .LBB2_1: // =>This Inner Loop Header: Depth=1
119116
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
@@ -167,13 +164,10 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 {
167164
; CHECK-NEXT: b .LBB3_1
168165
; CHECK-NEXT: .LBB3_3:
169166
; CHECK-NEXT: mov sp, x9
170-
; CHECK-NEXT: add x9, x0, #15
171167
; CHECK-NEXT: mov x8, sp
172-
; CHECK-DAG: ldr xzr, [sp]
173-
; CHECK-DAG: and x9, x9, #0xfffffffffffffff0
174-
; CHECK-NOT: INVALID_TO_BREAK_UP_CHECK_DAG
168+
; CHECK-NEXT: ldr xzr, [sp]
175169
; CHECK-DAG: mov x19, sp
176-
; CHECK-DAG: sub x8, x8, x9
170+
; CHECK-DAG: sub x8, x8, x0
177171
; CHECK-NEXT: and x8, x8, #0xffffffffffffe000
178172
; CHECK-NEXT: .LBB3_4: // =>This Inner Loop Header: Depth=1
179173
; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096

llvm/test/CodeGen/AArch64/sve-alloca.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,8 @@ define void @foo(<vscale x 4 x i64> %dst, i1 %cond) {
5454
; CHECK-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 32 - 48 * VG
5555
; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 32 - 56 * VG
5656
; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 32 - 64 * VG
57-
; CHECK-NEXT: rdvl x9, #2
5857
; CHECK-NEXT: mov x8, sp
59-
; CHECK-NEXT: add x9, x9, #15
60-
; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
58+
; CHECK-NEXT: cnth x9, all, mul #4
6159
; CHECK-NEXT: sub x8, x8, x9
6260
; CHECK-NEXT: and x0, x8, #0xffffffffffffffe0
6361
; CHECK-NEXT: mov sp, x0

llvm/test/CodeGen/PowerPC/aix-framepointer-save-restore.ll

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,8 @@ define dso_local void @frameptr_realigned(i32 %n) {
8282
; AIX32-NEXT: slwi 3, 3, 2
8383
; AIX32-NEXT: lwz 4, 0(1)
8484
; AIX32-NEXT: li 5, -64
85-
; AIX32-NEXT: addi 3, 3, 15
86-
; AIX32-NEXT: mr 31, 1
87-
; AIX32-NEXT: rlwinm 3, 3, 0, 0, 27
8885
; AIX32-NEXT: neg 3, 3
86+
; AIX32-NEXT: mr 31, 1
8987
; AIX32-NEXT: and 5, 3, 5
9088
; AIX32-NEXT: stwux 4, 1, 5
9189
; AIX32-NEXT: addi 3, 1, 64
@@ -111,11 +109,8 @@ define dso_local void @frameptr_realigned(i32 %n) {
111109
; AIX64-NEXT: rldic 3, 3, 2, 30
112110
; AIX64-NEXT: ld 4, 0(1)
113111
; AIX64-NEXT: li 5, -64
114-
; AIX64-NEXT: addi 3, 3, 15
115-
; AIX64-NEXT: mr 31, 1
116-
; AIX64-NEXT: rldicl 3, 3, 60, 4
117-
; AIX64-NEXT: rldicl 3, 3, 4, 29
118112
; AIX64-NEXT: neg 3, 3
113+
; AIX64-NEXT: mr 31, 1
119114
; AIX64-NEXT: and 5, 3, 5
120115
; AIX64-NEXT: stdux 4, 1, 5
121116
; AIX64-NEXT: addi 3, 1, 128

llvm/test/CodeGen/PowerPC/pr46759.ll

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,6 @@ define void @foo(i32 %vla_size) #0 {
3333
; CHECK-LE-NEXT: li r4, -2048
3434
; CHECK-LE-NEXT: li r6, -4096
3535
; CHECK-LE-NEXT: mr r31, r1
36-
; CHECK-LE-NEXT: addi r3, r3, 15
37-
; CHECK-LE-NEXT: rldicl r3, r3, 60, 4
38-
; CHECK-LE-NEXT: rldicl r3, r3, 4, 31
3936
; CHECK-LE-NEXT: neg r5, r3
4037
; CHECK-LE-NEXT: ld r3, 0(r1)
4138
; CHECK-LE-NEXT: and r4, r5, r4

llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -835,20 +835,17 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
835835
; CHECK-LE-NEXT: .cfi_def_cfa_register r30
836836
; CHECK-LE-NEXT: .cfi_offset r31, -8
837837
; CHECK-LE-NEXT: .cfi_offset r30, -16
838-
; CHECK-LE-NEXT: clrldi r3, r3, 32
839838
; CHECK-LE-NEXT: lis r5, 1
840839
; CHECK-LE-NEXT: mr r31, r1
841-
; CHECK-LE-NEXT: li r6, 1
842840
; CHECK-LE-NEXT: sldi r4, r4, 2
843-
; CHECK-LE-NEXT: addi r3, r3, 15
841+
; CHECK-LE-NEXT: li r6, 1
842+
; CHECK-LE-NEXT: clrldi r3, r3, 32
844843
; CHECK-LE-NEXT: ori r5, r5, 0
845-
; CHECK-LE-NEXT: rldicl r3, r3, 60, 4
846844
; CHECK-LE-NEXT: add r5, r31, r5
847-
; CHECK-LE-NEXT: rldicl r3, r3, 4, 31
848845
; CHECK-LE-NEXT: stwx r6, r5, r4
846+
; CHECK-LE-NEXT: neg r5, r3
849847
; CHECK-LE-NEXT: li r4, -32768
850848
; CHECK-LE-NEXT: li r6, -4096
851-
; CHECK-LE-NEXT: neg r5, r3
852849
; CHECK-LE-NEXT: ld r3, 0(r1)
853850
; CHECK-LE-NEXT: and r4, r5, r4
854851
; CHECK-LE-NEXT: mr r5, r4
@@ -896,16 +893,13 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
896893
; CHECK-BE-NEXT: .cfi_def_cfa_register r30
897894
; CHECK-BE-NEXT: .cfi_offset r31, -8
898895
; CHECK-BE-NEXT: .cfi_offset r30, -16
899-
; CHECK-BE-NEXT: clrldi r3, r3, 32
900896
; CHECK-BE-NEXT: lis r5, 1
901-
; CHECK-BE-NEXT: addi r3, r3, 15
902897
; CHECK-BE-NEXT: mr r31, r1
903898
; CHECK-BE-NEXT: ori r5, r5, 0
904-
; CHECK-BE-NEXT: rldicl r3, r3, 60, 4
905899
; CHECK-BE-NEXT: add r5, r31, r5
906900
; CHECK-BE-NEXT: sldi r4, r4, 2
907901
; CHECK-BE-NEXT: li r6, 1
908-
; CHECK-BE-NEXT: rldicl r3, r3, 4, 31
902+
; CHECK-BE-NEXT: clrldi r3, r3, 32
909903
; CHECK-BE-NEXT: stwx r6, r5, r4
910904
; CHECK-BE-NEXT: neg r7, r3
911905
; CHECK-BE-NEXT: li r4, -32768
@@ -964,11 +958,9 @@ define void @f11(i32 %vla_size, i64 %i) #0 {
964958
; CHECK-32-NEXT: lis r4, 1
965959
; CHECK-32-NEXT: mr r31, r1
966960
; CHECK-32-NEXT: ori r4, r4, 0
967-
; CHECK-32-NEXT: addi r3, r3, 15
968961
; CHECK-32-NEXT: add r4, r31, r4
969962
; CHECK-32-NEXT: li r5, 1
970963
; CHECK-32-NEXT: slwi r6, r6, 2
971-
; CHECK-32-NEXT: rlwinm r3, r3, 0, 0, 27
972964
; CHECK-32-NEXT: neg r7, r3
973965
; CHECK-32-NEXT: stwx r5, r4, r6
974966
; CHECK-32-NEXT: li r4, -32768

llvm/test/CodeGen/RISCV/rvv/stack-probing-dynamic.ll

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -180,8 +180,6 @@ define void @dynamic_align_64(i64 %size, ptr %out) #0 {
180180
; RV64I-NEXT: .cfi_def_cfa s0, 0
181181
; RV64I-NEXT: andi sp, sp, -64
182182
; RV64I-NEXT: mv s1, sp
183-
; RV64I-NEXT: addi a0, a0, 15
184-
; RV64I-NEXT: andi a0, a0, -16
185183
; RV64I-NEXT: sub a0, sp, a0
186184
; RV64I-NEXT: andi a0, a0, -64
187185
; RV64I-NEXT: lui a2, 1
@@ -219,8 +217,6 @@ define void @dynamic_align_64(i64 %size, ptr %out) #0 {
219217
; RV32I-NEXT: .cfi_def_cfa s0, 0
220218
; RV32I-NEXT: andi sp, sp, -64
221219
; RV32I-NEXT: mv s1, sp
222-
; RV32I-NEXT: addi a0, a0, 15
223-
; RV32I-NEXT: andi a0, a0, -16
224220
; RV32I-NEXT: sub a0, sp, a0
225221
; RV32I-NEXT: andi a0, a0, -64
226222
; RV32I-NEXT: lui a1, 1
@@ -278,10 +274,8 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 {
278274
; RV64I-NEXT: srli a2, sp, 13
279275
; RV64I-NEXT: slli sp, a2, 13
280276
; RV64I-NEXT: mv s1, sp
281-
; RV64I-NEXT: addi a0, a0, 15
282-
; RV64I-NEXT: lui a2, 1048574
283-
; RV64I-NEXT: andi a0, a0, -16
284277
; RV64I-NEXT: sub a0, sp, a0
278+
; RV64I-NEXT: lui a2, 1048574
285279
; RV64I-NEXT: and a0, a0, a2
286280
; RV64I-NEXT: lui a2, 1
287281
; RV64I-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
@@ -329,10 +323,8 @@ define void @dynamic_align_8192(i64 %size, ptr %out) #0 {
329323
; RV32I-NEXT: srli a1, sp, 13
330324
; RV32I-NEXT: slli sp, a1, 13
331325
; RV32I-NEXT: mv s1, sp
332-
; RV32I-NEXT: addi a0, a0, 15
333-
; RV32I-NEXT: lui a1, 1048574
334-
; RV32I-NEXT: andi a0, a0, -16
335326
; RV32I-NEXT: sub a0, sp, a0
327+
; RV32I-NEXT: lui a1, 1048574
336328
; RV32I-NEXT: and a0, a0, a1
337329
; RV32I-NEXT: lui a1, 1
338330
; RV32I-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1

0 commit comments

Comments
 (0)