Skip to content

Commit 4ebf295

Browse files
prattmicgopherbot
authored andcommitted
runtime: prefer to restart Ps on the same M after STW
Today, Ps jump around arbitrarily across STW. Instead, try to keep the P on the previous M it ran on. In the future, we'll likely want to try to expand this beyond STW to create a more general affinity for specific Ms. For this to be useful, the Ps need to have runnable Gs. Today, STW preemption goes through goschedImpl, which places the G on the global run queue. If that was the only G then the P won't have runnable goroutines anymore. It makes more sense to keep the G with its P across STW anyway, so add a special case to goschedImpl for that. On my machine, this CL reduces the error rate in TestTraceSTW from 99.8% to 1.9%. As a nearly 2% error rate shows, there are still cases where this best effort scheduling doesn't work. The most obvious is that while procresize assigns Ps back to their original M, startTheWorldWithSema calls wakep to start a spinning M. The spinning M may steal a goroutine from another P if that P is too slow to start. For #65694. Change-Id: I6a6a636c0969c587d039b68bc68ea16c74ff1fc9 Reviewed-on: https://go-review.googlesource.com/c/go/+/714801 Reviewed-by: Michael Knyszek <[email protected]> Auto-Submit: Michael Pratt <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]>
1 parent 625d8e9 commit 4ebf295

File tree

6 files changed

+656
-9
lines changed

6 files changed

+656
-9
lines changed

src/internal/trace/testtrace/helpers.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,7 @@ import (
1616
"testing"
1717
)
1818

19-
// MustHaveSyscallEvents skips the current test if the current
20-
// platform does not support true system call events.
19+
// Dump saves the trace to a file or the test log.
2120
func Dump(t *testing.T, testName string, traceBytes []byte, forceToFile bool) {
2221
onBuilder := testenv.Builder() != ""
2322
onOldBuilder := !strings.Contains(testenv.Builder(), "gotip") && !strings.Contains(testenv.Builder(), "go1")

src/runtime/proc.go

Lines changed: 76 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1009,6 +1009,8 @@ func mcommoninit(mp *m, id int64) {
10091009
mp.id = mReserveID()
10101010
}
10111011

1012+
mp.self = newMWeakPointer(mp)
1013+
10121014
mrandinit(mp)
10131015

10141016
mpreinit(mp)
@@ -2018,6 +2020,10 @@ func mexit(osStack bool) {
20182020
// Free vgetrandom state.
20192021
vgetrandomDestroy(mp)
20202022

2023+
// Clear the self pointer so Ps don't access this M after it is freed,
2024+
// or keep it alive.
2025+
mp.self.clear()
2026+
20212027
// Remove m from allm.
20222028
lock(&sched.lock)
20232029
for pprev := &allm; *pprev != nil; pprev = &(*pprev).alllink {
@@ -4259,6 +4265,7 @@ func park_m(gp *g) {
42594265
}
42604266

42614267
func goschedImpl(gp *g, preempted bool) {
4268+
pp := gp.m.p.ptr()
42624269
trace := traceAcquire()
42634270
status := readgstatus(gp)
42644271
if status&^_Gscan != _Grunning {
@@ -4281,9 +4288,15 @@ func goschedImpl(gp *g, preempted bool) {
42814288
}
42824289

42834290
dropg()
4284-
lock(&sched.lock)
4285-
globrunqput(gp)
4286-
unlock(&sched.lock)
4291+
if preempted && sched.gcwaiting.Load() {
4292+
// If preempted for STW, keep the G on the local P in runnext
4293+
// so it can keep running immediately after the STW.
4294+
runqput(pp, gp, true)
4295+
} else {
4296+
lock(&sched.lock)
4297+
globrunqput(gp)
4298+
unlock(&sched.lock)
4299+
}
42874300

42884301
if mainStarted {
42894302
wakep()
@@ -6013,6 +6026,7 @@ func procresize(nprocs int32) *p {
60136026
}
60146027

60156028
var runnablePs *p
6029+
var runnablePsNeedM *p
60166030
for i := nprocs - 1; i >= 0; i-- {
60176031
pp := allp[i]
60186032
if gp.m.p.ptr() == pp {
@@ -6021,12 +6035,41 @@ func procresize(nprocs int32) *p {
60216035
pp.status = _Pidle
60226036
if runqempty(pp) {
60236037
pidleput(pp, now)
6024-
} else {
6025-
pp.m.set(mget())
6026-
pp.link.set(runnablePs)
6027-
runnablePs = pp
6038+
continue
60286039
}
6040+
6041+
// Prefer to run on the most recent M if it is
6042+
// available.
6043+
//
6044+
// Ps with no oldm (or for which oldm is already taken
6045+
// by an earlier P), we delay until all oldm Ps are
6046+
// handled. Otherwise, mget may return an M that a
6047+
// later P has in oldm.
6048+
var mp *m
6049+
if oldm := pp.oldm.get(); oldm != nil {
6050+
// Returns nil if oldm is not idle.
6051+
mp = mgetSpecific(oldm)
6052+
}
6053+
if mp == nil {
6054+
// Call mget later.
6055+
pp.link.set(runnablePsNeedM)
6056+
runnablePsNeedM = pp
6057+
continue
6058+
}
6059+
pp.m.set(mp)
6060+
pp.link.set(runnablePs)
6061+
runnablePs = pp
60296062
}
6063+
for runnablePsNeedM != nil {
6064+
pp := runnablePsNeedM
6065+
runnablePsNeedM = pp.link.ptr()
6066+
6067+
mp := mget()
6068+
pp.m.set(mp)
6069+
pp.link.set(runnablePs)
6070+
runnablePs = pp
6071+
}
6072+
60306073
stealOrder.reset(uint32(nprocs))
60316074
var int32p *int32 = &gomaxprocs // make compiler check that gomaxprocs is an int32
60326075
atomic.Store((*uint32)(unsafe.Pointer(int32p)), uint32(nprocs))
@@ -6064,6 +6107,11 @@ func acquirepNoTrace(pp *p) {
60646107

60656108
// Have p; write barriers now allowed.
60666109

6110+
// The M we're associating with will be the old M after the next
6111+
// releasep. We must set this here because write barriers are not
6112+
// allowed in releasep.
6113+
pp.oldm = pp.m.ptr().self
6114+
60676115
// Perform deferred mcache flush before this P can allocate
60686116
// from a potentially stale mcache.
60696117
pp.mcache.prepareForSweep()
@@ -6998,6 +7046,27 @@ func mget() *m {
69987046
return mp
69997047
}
70007048

7049+
// Try to get a specific m from midle list. Returns nil if it isn't on the
7050+
// midle list.
7051+
//
7052+
// sched.lock must be held.
7053+
// May run during STW, so write barriers are not allowed.
7054+
//
7055+
//go:nowritebarrierrec
7056+
func mgetSpecific(mp *m) *m {
7057+
assertLockHeld(&sched.lock)
7058+
7059+
if mp.idleNode.prev == 0 && mp.idleNode.next == 0 {
7060+
// Not on the list.
7061+
return nil
7062+
}
7063+
7064+
sched.midle.remove(unsafe.Pointer(mp))
7065+
sched.nmidle--
7066+
7067+
return mp
7068+
}
7069+
70017070
// Put gp on the global runnable queue.
70027071
// sched.lock must be held.
70037072
// May run during STW, so write barriers are not allowed.

0 commit comments

Comments
 (0)