Skip to content

Commit 386066b

Browse files
committed
[AArch64] Fix a corner case with large stack allocation
In the unlikely case where the stack size is greater than 4GB, we may run into the situation where the local stack size and the callee saved registers stack size get combined incorrectly when restoring the callee saved registers. This happens because the stack size in shouldCombineCSRLocalStackBumpInEpilogue is represented as an 'unsigned', but is passed in as an 'int64_t'. We end up with something like $fp, $lr = frame-destroy LDPXi $sp, 536870912 This change just makes 'shouldCombineCSRLocalStackBumpInEpilogue' match 'shouldCombineCSRLocalStackBump' where 'StackBumpBytes' is an 'uint64_t'
1 parent acbd822 commit 386066b

File tree

3 files changed

+58
-4
lines changed

3 files changed

+58
-4
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1195,10 +1195,9 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
11951195
}
11961196

11971197
bool AArch64FrameLowering::shouldCombineCSRLocalStackBumpInEpilogue(
1198-
MachineBasicBlock &MBB, unsigned StackBumpBytes) const {
1198+
MachineBasicBlock &MBB, uint64_t StackBumpBytes) const {
11991199
if (!shouldCombineCSRLocalStackBump(*MBB.getParent(), StackBumpBytes))
12001200
return false;
1201-
12021201
if (MBB.empty())
12031202
return true;
12041203

@@ -2363,7 +2362,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
23632362
}
23642363
bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes);
23652364
// Assume we can't combine the last pop with the sp restore.
2366-
2365+
//
23672366
bool CombineAfterCSRBump = false;
23682367
if (!CombineSPBump && PrologueSaveSize != 0) {
23692368
MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());

llvm/lib/Target/AArch64/AArch64FrameLowering.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ class AArch64FrameLowering : public TargetFrameLowering {
146146
int &MinCSFrameIndex,
147147
int &MaxCSFrameIndex) const;
148148
bool shouldCombineCSRLocalStackBumpInEpilogue(MachineBasicBlock &MBB,
149-
unsigned StackBumpBytes) const;
149+
uint64_t StackBumpBytes) const;
150150
void emitCalleeSavedGPRLocations(MachineBasicBlock &MBB,
151151
MachineBasicBlock::iterator MBBI) const;
152152
void emitCalleeSavedSVELocations(MachineBasicBlock &MBB,
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# RUN: llc -mtriple=aarch64 -run-pass=prologepilog %s -o - | FileCheck %s
2+
--- |
3+
; ModuleID = 'bug.c'
4+
source_filename = "bug.c"
5+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
6+
target triple = "aarch64-unknown-linux-gnu"
7+
8+
; Function Attrs: mustprogress noinline optnone uwtable
9+
define dso_local noundef i32 @_Z4funcv() #0 {
10+
entry:
11+
%array = alloca [1073741824 x i32], align 4
12+
%arrayidx = getelementptr inbounds [1073741824 x i32], ptr %array, i64 0, i64 20
13+
store i32 7, ptr %arrayidx, align 4
14+
call void @_Z5func2v()
15+
%arrayidx1 = getelementptr inbounds [1073741824 x i32], ptr %array, i64 0, i64 20
16+
%0 = load i32, ptr %arrayidx1, align 4
17+
ret i32 %0
18+
}
19+
20+
declare void @_Z5func2v() #1
21+
22+
attributes #0 = { mustprogress noinline optnone uwtable "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+outline-atomics,+v8a,-fmv" }
23+
attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+outline-atomics,+v8a,-fmv" }
24+
...
25+
---
26+
name: _Z4funcv
27+
alignment: 4
28+
legalized: true
29+
regBankSelected: true
30+
selected: true
31+
tracksRegLiveness: true
32+
noPhis: true
33+
isSSA: false
34+
noVRegs: true
35+
hasFakeUses: false
36+
frameInfo:
37+
maxAlignment: 4
38+
adjustsStack: true
39+
hasCalls: true
40+
maxCallFrameSize: 0
41+
stack:
42+
- { id: 0, name: array, size: 4294967296, alignment: 4, local-offset: -4294967296 }
43+
machineFunctionInfo: {}
44+
body: |
45+
bb.1.entry:
46+
renamable $w8 = MOVi32imm 7
47+
STRWui killed renamable $w8, %stack.0.array, 20 :: (store (s32) into %ir.arrayidx)
48+
ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
49+
BL @_Z5func2v, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp
50+
ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
51+
renamable $w0 = LDRWui %stack.0.array, 20 :: (dereferenceable load (s32) from %ir.arrayidx1)
52+
; CHECK: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp
53+
RET_ReallyLR implicit killed $w0
54+
55+
...

0 commit comments

Comments
 (0)