Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions src/device/esp/esp32s3.S
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,66 @@ call_start_cpu0:
// If main returns, loop forever.
1: j 1b

// -----------------------------------------------------------------------
// APP CPU entry point.
// -----------------------------------------------------------------------
.section .text.call_start_cpu1
.literal_position
.align 4
.Lstack1_top_addr:
.long _stack1_top
.Lrun_core1_entry_addr:
.long tinygo_runCore1
.Lvector_table_addr_cpu1:
.long _vector_table

.global call_start_cpu1
call_start_cpu1:
// CPU1 starts from ROM with no stack contract that TinyGo can rely on.
// Repeat the CPU-local windowed-ABI setup from CPU0, then enter Go.
rsr.ps a2
movi a3, ~(PS_WOE)
and a2, a2, a3
wsr.ps a2
rsync

rsr.windowbase a2
ssl a2
movi a2, 1
sll a2, a2
wsr.windowstart a2
rsync

l32r a1, .Lstack1_top_addr

rsr.ps a2
movi a3, PS_WOE
or a2, a2, a3
wsr.ps a2
rsync

movi a2, 1
wsr.cpenable a2
rsync

l32r a8, .Lvector_table_addr_cpu1
wsr.vecbase a8
rsync

rsr.ps a2
movi a3, ~0x1F
and a2, a2, a3
movi a3, 0x20
or a2, a2, a3
wsr.ps a2
rsync

mov a5, a1
l32r a4, .Lrun_core1_entry_addr
callx4 a4

1: j 1b

// -----------------------------------------------------------------------
// tinygo_scanCurrentStack — Spill all Xtensa register windows to the
// stack, then call tinygo_scanstack(sp) so the conservative GC can
Expand Down
15 changes: 9 additions & 6 deletions src/internal/task/task_stack_esp32.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//go:build scheduler.tasks && (esp32 || esp32s3)
//go:build (scheduler.tasks || scheduler.cores) && (esp32 || esp32s3)

package task

Expand All @@ -12,10 +12,12 @@ package task
// https://0x04.net/~mwk/doc/xtensa.pdf

import (
_ "unsafe"
"unsafe"
)

var systemStack uintptr
//go:linkname runtime_systemStackPtr runtime.systemStackPtr
func runtime_systemStackPtr() *uintptr

// calleeSavedRegs is the list of registers that must be saved and restored when
// switching between tasks. Also see task_stack_esp8266.S that relies on the
Expand Down Expand Up @@ -60,19 +62,20 @@ func (s *state) archInit(r *calleeSavedRegs, fn uintptr, args unsafe.Pointer) {
}

func (s *state) resume() {
swapTask(s.sp, &systemStack)
swapTask(s.sp, runtime_systemStackPtr())
}

func (s *state) pause() {
newStack := systemStack
systemStack = 0
systemStackPtr := runtime_systemStackPtr()
newStack := *systemStackPtr
*systemStackPtr = 0
swapTask(newStack, &s.sp)
}

// SystemStack returns the system stack pointer when called from a task stack.
// When called from the system stack, it returns 0.
func SystemStack() uintptr {
return systemStack
return *runtime_systemStackPtr()
}

//export tinygo_task_current
Expand Down
217 changes: 217 additions & 0 deletions src/runtime/runtime_esp32s3_cores.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
//go:build esp32s3 && scheduler.cores

package runtime

import (
"device"
"device/esp"
"internal/task"
"runtime/interrupt"
"runtime/volatile"
"sync/atomic"
"unsafe"
)

const numCPU = 2

const crosscoreCPUInt = 12

const (
crosscoreReasonWake = 1 << iota
crosscoreReasonGC
)

var (
printLock spinLock
schedulerLock spinLock
atomicsLock spinLock
futexLock spinLock
)

var sleepingCore uint8 = 0xff
var waitingCores uint8
var cpu1Started atomic.Uint32
var crosscoreReason [numCPU]atomic.Uint32
var gcSignalWait volatile.Register8

func hasSleepingCore() bool {
return sleepingCore != 0xff
}

func sleepTicksMulticore(d timeUnit) {
sleepingCore = uint8(currentCPU())
schedulerLock.Unlock()
sleepTicks(d)
schedulerLock.Lock()
sleepingCore = 0xff
}

func interruptSleepTicksMulticore(wakeup timeUnit) {
_ = wakeup
schedulerWake()
}

func schedulerUnlockAndWait() {
core := currentCPU()
waitingCores |= uint8(1 << core)
schedulerLock.Unlock()
device.Asm("waiti 0")
schedulerLock.Lock()
waitingCores &^= uint8(1 << core)
}

func schedulerWake() {
if waitingCores == 0 {
return
}
core := currentCPU() ^ 1
if waitingCores&(1<<core) == 0 {
core ^= 1
}
sendCrosscoreInterrupt(core, crosscoreReasonWake)
}

func currentCPU() uint32 {
prid := uintptr(device.AsmFull("rsr.prid {}", nil))
return uint32((prid >> 13) & 1)
}

func startSecondaryCores() {
initCrosscoreInterrupt(0)

esp.RTC_CNTL.SetOPTIONS0_SW_STALL_APPCPU_C0(0)
esp.RTC_CNTL.SetSW_CPU_STALL_SW_STALL_APPCPU_C1(0)

esp.SYSTEM.SetCORE_1_CONTROL_0_CONTROL_CORE_1_CLKGATE_EN(1)
esp.SYSTEM.SetCORE_1_CONTROL_0_CONTROL_CORE_1_RUNSTALL(0)
esp.SYSTEM.SetCORE_1_CONTROL_0_CONTROL_CORE_1_RESETING(1)
esp.SYSTEM.SetCORE_1_CONTROL_0_CONTROL_CORE_1_RESETING(0)

etsSetAppCPUBootAddr(uint32(uintptr(unsafe.Pointer(&callStartCPU1))))

for i := 0; i < 1000000 && cpu1Started.Load() == 0; i++ {
spinLoopWait()
}
}

func gcPauseCore(core uint32) {
sendCrosscoreInterrupt(core, crosscoreReasonGC)
}

func gcSignalCore(core uint32) {
gcSignalWait.Set(1)
sendCrosscoreInterrupt(core, crosscoreReasonGC)
}

func coreStackTop(core uint32) uintptr {
switch core {
case 0:
return uintptr(unsafe.Pointer(&stackTopSymbol))
case 1:
return uintptr(unsafe.Pointer(&stack1TopSymbol))
default:
runtimePanic("unexpected core")
return 0
}
}

func spinLoopWait() {
device.Asm("nop")
}

//export tinygo_runCore1
func runCore1() {
interruptInit()
initCrosscoreInterrupt(1)
etsSetAppCPUBootAddr(0)
cpu1Started.Store(1)
schedulerLock.Lock()
scheduler(false)
schedulerLock.Unlock()
exit(0)
}

func initCrosscoreInterrupt(core uint32) {
if core == 0 {
esp.INTERRUPT_CORE0.SetCPU_INTR_FROM_CPU_0_MAP(crosscoreCPUInt)
} else {
esp.INTERRUPT_CORE1.SetCPU_INTR_FROM_CPU_1_MAP(crosscoreCPUInt)
}
intr := interrupt.New(crosscoreCPUInt, crosscoreInterruptHandler)
_ = intr.Enable()
}

func crosscoreInterruptHandler(interrupt.Interrupt) {
handleCrosscoreInterrupt(currentCPU())
}

func sendCrosscoreInterrupt(core uint32, reason uint32) {
crosscoreReason[core].Or(reason)
if core == 0 {
esp.SYSTEM.SetCPU_INTR_FROM_CPU_0(1)
} else {
esp.SYSTEM.SetCPU_INTR_FROM_CPU_1(1)
}
}

func clearCrosscoreInterrupt(core uint32) {
if core == 0 {
esp.SYSTEM.SetCPU_INTR_FROM_CPU_0(0)
} else {
esp.SYSTEM.SetCPU_INTR_FROM_CPU_1(0)
}
}

func handleCrosscoreInterrupt(core uint32) {
clearCrosscoreInterrupt(core)
reason := crosscoreReason[core].Swap(0)
if reason&crosscoreReasonGC != 0 {
gcInterruptHandler(core)
}
}

func gcInterruptHandler(hartID uint32) {
gcScanState.Add(1)
for gcSignalWait.Get() == 0 {
spinLoopWait()
}
gcSignalWait.Set(0)

scanCurrentStack()
if !task.OnSystemStack() {
markRoots(task.SystemStack(), coreStackTop(hartID))
}

gcScanState.Store(1)
for gcSignalWait.Get() == 0 {
spinLoopWait()
}
gcSignalWait.Set(0)
gcScanState.Add(1)
}

type spinLock struct {
atomic.Uint32
}

func (l *spinLock) Lock() {
for !l.CompareAndSwap(0, 1) {
spinLoopWait()
}
}

func (l *spinLock) Unlock() {
if schedulerAsserts && l.Load() != 1 {
runtimePanic("unlock of unlocked spinlock")
}
l.Store(0)
}

//go:extern _stack1_top
var stack1TopSymbol [0]uint32

//go:extern call_start_cpu1
var callStartCPU1 [0]uint32

//go:linkname etsSetAppCPUBootAddr ets_set_appcpu_boot_addr
func etsSetAppCPUBootAddr(addr uint32)
4 changes: 4 additions & 0 deletions targets/esp32s3.ld
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ SECTIONS
. = ALIGN(16);
. += _stack_size;
_stack_top = .;
. = ALIGN(16);
. += _stack_size;
_stack1_top = .;
} >DRAM

/* Global variables that are mutable and zero-initialized. */
Expand Down Expand Up @@ -180,6 +183,7 @@ memset = 0x400011e8;
memcpy = 0x400011f4;
memmove = 0x40001200;
memcmp = 0x4000120c;
ets_set_appcpu_boot_addr = 0x40000720;

/* From ESP-IDF:
* components/esp_rom/esp32/ld/esp32.rom.libgcc.ld
Expand Down