Skip to content

Commit 3d3e481

Browse files
aykevldeadprogram
authored andcommitted
runtime: use MSP/PSP registers for scheduling on Cortex-M
The Cortex-M architecture contains two stack pointers, designed to be used by RTOSes: MSP and PSP (where MSP is the default at reset). In fact, the ARM documentation recommends using the PSP for tasks in a RTOS. This commit switches to using the PSP for goroutine stacks. Aside from being the recommended operation, this has the big advantage that the NVIC automatically switches to the MSP when handling interrupts. This avoids having to make every goroutine stack big enough that interrupts can be handled on it. Additionally, I've optimized the assembly code to save/restore registers (made possible by this change). For Cortex-M3 and up, saving all registers is just a single push instruction and restoring+branching is a single pop instruction. For Cortex-M0 it's a bit more work because the push/pop instructions there don't support most high registers. Sidenote: the fact that you can pop a number of registers and branch at the same time makes ARM not exactly a true RISC system. However, it's very useful in this case.
1 parent ea5df0f commit 3d3e481

File tree

2 files changed

+106
-82
lines changed

2 files changed

+106
-82
lines changed

src/runtime/scheduler_cortexm.S

Lines changed: 69 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -19,76 +19,96 @@ tinygo_startTask:
1919
// After return, exit this goroutine. This is a tail call.
2020
bl runtime.yield
2121

22+
.section .text.tinygo_getSystemStackPointer
23+
.global tinygo_getSystemStackPointer
24+
.type tinygo_getSystemStackPointer, %function
25+
tinygo_getSystemStackPointer:
26+
// The system stack pointer is always stored in the MSP register.
27+
mrs r0, MSP
28+
bx lr
29+
30+
31+
// switchToScheduler and switchToTask are also in the same section, to make sure
32+
// relative branches work.
2233
.section .text.tinygo_swapTask
34+
35+
.global tinygo_switchToScheduler
36+
.type tinygo_switchToScheduler, %function
37+
tinygo_switchToScheduler:
38+
// r0 = oldTask *task
39+
40+
// Currently on the task stack (SP=PSP). We need to store the position on
41+
// the stack where the in-use registers will be stored.
42+
mov r1, sp
43+
subs r1, #36
44+
str r1, [r0, #36]
45+
46+
b tinygo_swapTask
47+
48+
.global tinygo_switchToTask
49+
.type tinygo_switchToTask, %function
50+
tinygo_switchToTask:
51+
// r0 = newTask *task
52+
53+
// Currently on the scheduler stack (SP=MSP). We'll have to update the PSP,
54+
// and then we can invoke swapTask.
55+
ldr r0, [r0, #36]
56+
msr PSP, r0
57+
58+
// Continue executing in the swapTask function, which swaps the stack
59+
// pointer.
60+
2361
.global tinygo_swapTask
2462
.type tinygo_swapTask, %function
2563
tinygo_swapTask:
26-
// r0 = oldTask *task
27-
// r1 = newTask *task
28-
29-
// This function stores the current register state to a task struct and
30-
// loads the state of another task to replace the current state. Apart from
31-
// saving and restoring all relevant callee-saved registers, it also ends
32-
// with branching to the last program counter (saved as the lr register, to
33-
// follow the ARM calling convention).
64+
// This function stores the current register state to the stack, switches to
65+
// the other stack (MSP/PSP), and loads the register state from the other
66+
// stack. Apart from saving and restoring all relevant callee-saved
67+
// registers, it also ends with branching to the last program counter (saved
68+
// as the lr register, to follow the ARM calling convention).
3469

3570
// On pre-Thumb2 CPUs (Cortex-M0 in particular), registers r8-r15 cannot be
3671
// used directly. Only very few operations work on them, such as mov. That's
3772
// why the higher register values are first stored in the temporary register
3873
// r3 when loading/storing them.
74+
// It is possible to reduce the swapTask by two instructions (~2 cycles) on
75+
// Cortex-M0 by reordering the layout of the pushed registers from {r4-r11,
76+
// lr} to {r8-r11, r4-r8, lr}. However, that also requires a change on the
77+
// Go side (depending on thumb1/thumb2!) and so is not really worth the
78+
// complexity.
3979

4080
// Store state to old task. It saves the lr instead of the pc, because that
4181
// will be the pc after returning back to the old task (in a different
4282
// invocation of swapTask).
43-
str r4, [r0, #0]
44-
str r5, [r0, #4]
45-
str r6, [r0, #8]
46-
str r7, [r0, #12]
4783
#if defined(__thumb2__)
48-
str r8, [r0, #16]
49-
str r9, [r0, #20]
50-
str r10, [r0, #24]
51-
str r11, [r0, #28]
52-
str sp, [r0, #32]
53-
str lr, [r0, #36]
84+
push {r4-r11, lr}
5485
#else
55-
mov r3, r8
56-
str r3, [r0, #16]
57-
mov r3, r9
58-
str r3, [r0, #20]
59-
mov r3, r10
60-
str r3, [r0, #24]
86+
mov r0, r8
87+
mov r1, r9
88+
mov r2, r10
6189
mov r3, r11
62-
str r3, [r0, #28]
63-
mov r3, sp
64-
str r3, [r0, #32]
65-
mov r3, lr
66-
str r3, [r0, #36]
90+
push {r0-r3, lr}
91+
push {r4-r7}
6792
#endif
6893

94+
// Switch the stack. This could either switch from PSP to MSP, or from MSP
95+
// to PSP. By using an XOR (eor), it will just switch to the other stack.
96+
mrs r0, CONTROL // load CONTROL register
97+
movs r3, #2
98+
eors r0, r0, r3 // flip the SPSEL (active stack pointer) bit
99+
msr CONTROL, r0 // store CONTROL register
100+
isb // required to flush the pipeline
101+
69102
// Load state from new task and branch to the previous position in the
70103
// program.
71-
ldr r4, [r1, #0]
72-
ldr r5, [r1, #4]
73-
ldr r6, [r1, #8]
74-
ldr r7, [r1, #12]
75104
#if defined(__thumb2__)
76-
ldr r8, [r1, #16]
77-
ldr r9, [r1, #20]
78-
ldr r10, [r1, #24]
79-
ldr r11, [r1, #28]
80-
ldr sp, [r1, #32]
105+
pop {r4-r11, pc}
81106
#else
82-
ldr r3, [r1, #16]
83-
mov r8, r3
84-
ldr r3, [r1, #20]
85-
mov r9, r3
86-
ldr r3, [r1, #24]
87-
mov r10, r3
88-
ldr r3, [r1, #28]
107+
pop {r4-r7}
108+
pop {r0-r3}
109+
mov r8, r0
110+
mov r9, r1
111+
mov r10, r2
89112
mov r11, r3
90-
ldr r3, [r1, #32]
91-
mov sp, r3
113+
pop {pc}
92114
#endif
93-
ldr r3, [r1, #36]
94-
bx r3

src/runtime/scheduler_tasks.go

Lines changed: 37 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,7 @@ const stackSize = 1024
1212
const stackCanary = uintptr(uint64(0x670c1333b83bf575) & uint64(^uintptr(0)))
1313

1414
var (
15-
schedulerState = task{canary: stackCanary}
16-
currentTask *task // currently running goroutine, or nil
15+
currentTask *task // currently running goroutine, or nil
1716
)
1817

1918
// This type points to the bottom of the goroutine stack and contains some state
@@ -22,10 +21,10 @@ var (
2221
type task struct {
2322
// The order of fields in this structs must be kept in sync with assembly!
2423
calleeSavedRegs
25-
sp uintptr
2624
pc uintptr
25+
sp uintptr
2726
taskState
28-
canary uintptr // used to detect stack overflows
27+
canaryPtr *uintptr // used to detect stack overflows
2928
}
3029

3130
// getCoroutine returns the currently executing goroutine. It is used as an
@@ -47,26 +46,24 @@ func (t *task) state() *taskState {
4746
// to the scheduler.
4847
func (t *task) resume() {
4948
currentTask = t
50-
swapTask(&schedulerState, t)
49+
switchToTask(t)
5150
currentTask = nil
5251
}
5352

54-
// swapTask saves the current state to oldTask (which must contain the current
55-
// task state) and switches to newTask. Note that this function usually does
56-
// return, when another task (perhaps newTask) switches back to the current
57-
// task.
58-
//
59-
// As an additional protection, before switching tasks, it checks whether this
60-
// goroutine has overflowed the stack.
61-
func swapTask(oldTask, newTask *task) {
62-
if oldTask.canary != stackCanary {
63-
runtimePanic("goroutine stack overflow")
64-
}
65-
swapTaskLower(oldTask, newTask)
66-
}
53+
// switchToScheduler saves the current state on the stack, saves the current
54+
// stack pointer in the task, and switches to the scheduler. It must only be
55+
// called when actually running on this task.
56+
// When it returns, the scheduler has switched back to this task (for example,
57+
// after a blocking operation completed).
58+
//export tinygo_switchToScheduler
59+
func switchToScheduler(t *task)
6760

68-
//go:linkname swapTaskLower tinygo_swapTask
69-
func swapTaskLower(oldTask, newTask *task)
61+
// switchToTask switches from the scheduler to the task. It must only be called
62+
// from the scheduler.
63+
// When this function returns, the task just yielded control back to the
64+
// scheduler.
65+
//export tinygo_switchToTask
66+
func switchToTask(t *task)
7067

7168
// startTask is a small wrapper function that sets up the first (and only)
7269
// argument to the new goroutine and makes sure it is exited when the goroutine
@@ -79,11 +76,20 @@ var startTask [0]uint8
7976
// adds it to the runqueue.
8077
func startGoroutine(fn, args uintptr) {
8178
stack := alloc(stackSize)
82-
t := (*task)(stack)
83-
t.sp = uintptr(stack) + stackSize
79+
t := (*task)(unsafe.Pointer(uintptr(stack) + stackSize - unsafe.Sizeof(task{})))
80+
81+
// Set up the stack canary, a random number that should be checked when
82+
// switching from the task back to the scheduler. The stack canary pointer
83+
// points to the first word of the stack. If it has changed between now and
84+
// the next stack switch, there was a stack overflow.
85+
t.canaryPtr = (*uintptr)(unsafe.Pointer(stack))
86+
*t.canaryPtr = stackCanary
87+
88+
// Store the initial sp/pc for the startTask function (implemented in
89+
// assembly).
90+
t.sp = uintptr(stack) + stackSize - unsafe.Sizeof(task{})
8491
t.pc = uintptr(unsafe.Pointer(&startTask))
8592
t.prepareStartTask(fn, args)
86-
t.canary = stackCanary
8793
scheduleLogTask(" start goroutine:", t)
8894
runqueuePushBack(t)
8995
}
@@ -92,17 +98,15 @@ func startGoroutine(fn, args uintptr) {
9298
// any wakeups must be configured before calling yield
9399
//export runtime.yield
94100
func yield() {
95-
swapTask(currentTask, &schedulerState)
101+
// Check whether the canary (the lowest address of the stack) is still
102+
// valid. If it is not, a stack overflow has occured.
103+
if *currentTask.canaryPtr != stackCanary {
104+
runtimePanic("goroutine stack overflow")
105+
}
106+
switchToScheduler(currentTask)
96107
}
97108

98109
// getSystemStackPointer returns the current stack pointer of the system stack.
99110
// This is not necessarily the same as the current stack pointer.
100-
func getSystemStackPointer() uintptr {
101-
if currentTask == nil {
102-
// Currently on the system stack.
103-
return getCurrentStackPointer()
104-
} else {
105-
// Currently in a goroutine.
106-
return schedulerState.sp
107-
}
108-
}
111+
//export tinygo_getSystemStackPointer
112+
func getSystemStackPointer() uintptr

0 commit comments

Comments
 (0)