Skip to content

Commit 719dfcf

Browse files
committed
cmd/compile: redo arm64 LR/FP save and restore
Instead of storing LR (the return address) at 0(SP) and the FP (parent's frame pointer) at -8(SP), store them at framesize-8(SP) and framesize-16(SP), respectively. We push and pop data onto the stack such that we're never accessing anything below SP. The prolog/epilog lengths are unchanged (3 insns for a typical prolog, 2 for a typical epilog). We use 8 bytes more per frame. Typical prologue: STP.W (FP, LR), -16(SP) MOVD SP, FP SUB $C, SP Typical epilogue: ADD $C, SP LDP.P 16(SP), (FP, LR) RET The previous word where we stored LR, at 0(SP), is now unused. We could repurpose that slot for storing a local variable. The new prolog and epilog instructions are recognized by libunwind, so pc-sampling tools like perf should now be accurate. (TODO: except maybe after the first RET instruction? Have to look into that.) Update golang#73753 (fixes, for arm64) Update golang#57302 (Quim thinks this will help on that issue) Change-Id: I4800036a9a9a08aaaf35d9f99de79a36cf37ebb8 Reviewed-on: https://go-review.googlesource.com/c/go/+/674615 Reviewed-by: David Chase <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Keith Randall <[email protected]>
1 parent f331212 commit 719dfcf

File tree

23 files changed

+303
-361
lines changed

23 files changed

+303
-361
lines changed

src/cmd/compile/abi-internal.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -576,19 +576,19 @@ A function's stack frame, after the frame is created, is laid out as
576576
follows:
577577

578578
+------------------------------+
579+
| return PC |
580+
| frame pointer on entry | ← R29 points to
579581
| ... locals ... |
580582
| ... outgoing arguments ... |
581-
| return PC | ← RSP points to
582-
| frame pointer on entry |
583+
| unused word | ← RSP points to
583584
+------------------------------+ ↓ lower addresses
584585

585586
The "return PC" is loaded to the link register, R30, as part of the
586587
arm64 `CALL` operation.
587588

588-
On entry, a function subtracts from RSP to open its stack frame, and
589-
saves the values of R30 and R29 at the bottom of the frame.
590-
Specifically, R30 is saved at 0(RSP) and R29 is saved at -8(RSP),
591-
after RSP is updated.
589+
On entry, a function pushes R30 (the return address) and R29
590+
(the caller's frame pointer) onto the bottom of the stack. It then
591+
subtracts a constant from RSP to open its stack frame.
592592

593593
A leaf function that does not require any stack space may omit the
594594
saved R30 and R29.

src/cmd/compile/internal/arm64/ggen.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,12 @@ import (
1111
)
1212

1313
func padframe(frame int64) int64 {
14-
// arm64 requires that the frame size (not counting saved FP&LR)
15-
// be 16 bytes aligned. If not, pad it.
16-
if frame%16 != 0 {
17-
frame += 16 - (frame % 16)
14+
// arm64 requires frame sizes here that are 8 mod 16.
15+
// With the additional (unused) slot at the bottom of the frame,
16+
// that makes an aligned 16 byte frame.
17+
// Adding a save region for LR+FP does not change the alignment.
18+
if frame != 0 {
19+
frame += (-(frame + 8)) & 15
1820
}
1921
return frame
2022
}

src/cmd/compile/internal/arm64/ssa.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
221221

222222
for i := 0; i < len(args); i++ {
223223
a := args[i]
224-
// Offset by size of the saved LR slot.
224+
// Offset by size of the unused slot before start of args.
225225
addr := ssagen.SpillSlotAddr(a, arm64.REGSP, base.Ctxt.Arch.FixedFrameSize)
226226
// Look for double-register operations if we can.
227227
if i < len(args)-1 {

src/cmd/compile/internal/ssagen/pgen.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,10 +393,16 @@ func StackOffset(slot ssa.LocalSlot) int32 {
393393
case ir.PAUTO:
394394
off = n.FrameOffset()
395395
if base.Ctxt.Arch.FixedFrameSize == 0 {
396+
// x86 return address
396397
off -= int64(types.PtrSize)
397398
}
398399
if buildcfg.FramePointerEnabled {
400+
// frame pointer
399401
off -= int64(types.PtrSize)
402+
if buildcfg.GOARCH == "arm64" {
403+
// arm64 return address also
404+
off -= int64(types.PtrSize)
405+
}
400406
}
401407
}
402408
return int32(off + slot.Off)

src/cmd/compile/internal/ssagen/ssa.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7150,14 +7150,15 @@ func defframe(s *State, e *ssafn, f *ssa.Func) {
71507150
// Insert code to zero ambiguously live variables so that the
71517151
// garbage collector only sees initialized values when it
71527152
// looks for pointers.
7153+
// Note: lo/hi are offsets from varp and will be negative.
71537154
var lo, hi int64
71547155

71557156
// Opaque state for backend to use. Current backends use it to
71567157
// keep track of which helper registers have been zeroed.
71577158
var state uint32
71587159

71597160
// Iterate through declarations. Autos are sorted in decreasing
7160-
// frame offset order.
7161+
// frame offset order (least negative to most negative).
71617162
for _, n := range e.curfn.Dcl {
71627163
if !n.Needzero() {
71637164
continue

src/cmd/internal/obj/arm64/asm7.go

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,6 @@ type ctxt7 struct {
5151
blitrl *obj.Prog
5252
elitrl *obj.Prog
5353
autosize int32
54-
extrasize int32
5554
instoffset int64
5655
pc int64
5756
pool struct {
@@ -1122,8 +1121,7 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
11221121
ctxt.Diag("arm64 ops not initialized, call arm64.buildop first")
11231122
}
11241123

1125-
c := ctxt7{ctxt: ctxt, newprog: newprog, cursym: cursym, autosize: int32(p.To.Offset & 0xffffffff), extrasize: int32(p.To.Offset >> 32)}
1126-
p.To.Offset &= 0xffffffff // extrasize is no longer needed
1124+
c := ctxt7{ctxt: ctxt, newprog: newprog, cursym: cursym, autosize: int32(p.To.Offset)}
11271125

11281126
// Process literal pool and allocate initial program counter for each Prog, before
11291127
// generating branch veneers.
@@ -2119,8 +2117,8 @@ func (c *ctxt7) aclass(a *obj.Addr) int {
21192117
// a.Offset is still relative to pseudo-SP.
21202118
a.Reg = obj.REG_NONE
21212119
}
2122-
// The frame top 8 or 16 bytes are for FP
2123-
c.instoffset = int64(c.autosize) + a.Offset - int64(c.extrasize)
2120+
// The frame top 16 bytes are for LR/FP
2121+
c.instoffset = int64(c.autosize) + a.Offset - extrasize
21242122
return autoclass(c.instoffset)
21252123

21262124
case obj.NAME_PARAM:
@@ -2180,8 +2178,8 @@ func (c *ctxt7) aclass(a *obj.Addr) int {
21802178
// a.Offset is still relative to pseudo-SP.
21812179
a.Reg = obj.REG_NONE
21822180
}
2183-
// The frame top 8 or 16 bytes are for FP
2184-
c.instoffset = int64(c.autosize) + a.Offset - int64(c.extrasize)
2181+
// The frame top 16 bytes are for LR/FP
2182+
c.instoffset = int64(c.autosize) + a.Offset - extrasize
21852183

21862184
case obj.NAME_PARAM:
21872185
if a.Reg == REGSP {

0 commit comments

Comments
 (0)