Skip to content

Commit d7a38ad

Browse files
committed
runtime: eliminate global span queue [green tea]
This change removes the locked global span queue and replaces the fixed-size local span queue with a variable-sized local span queue. The variable-sized local span queue grows as needed to accomodate local work. With no global span queue either, GC workers balance work amongst themselves by stealing from each other. The new variable-sized local span queues are inspired by the P-local deque underlying sync.Pool. Unlike the sync.Pool deque, however, both the owning P and stealing Ps take spans from the tail, making this incarnation a strict queue, not a deque. This is intentional, since we want a queue-like order to encourage objects to accumulate on each span. These variable-sized local span queues are crucial to mark termination, just like the global span queue was. To avoid hitting the ragged barrier too often, we must check whether any Ps have any spans on their variable-sized local span queues. We maintain a per-P atomic bitmask (another pMask) that contains this state. We can also use this to speed up stealing by skipping Ps that don't have any local spans. The variable-sized local span queues are slower than the old fixed-size local span queues because of the additional indirection, so this change adds a non-atomic local fixed-size queue. This risks getting work stuck on it, so, similarly to how workbufs work, each worker will occasionally dump some spans onto its local variable-sized queue. This scales much more nicely than dumping to a global queue, but is still visible to all other Ps. For golang#73581. Change-Id: I814f54d9c3cc7fa7896167746e9823f50943ac22 Reviewed-on: https://go-review.googlesource.com/c/go/+/700496 Reviewed-by: Michael Pratt <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]>
1 parent 7bc1935 commit d7a38ad

File tree

11 files changed

+546
-533
lines changed

11 files changed

+546
-533
lines changed

src/runtime/export_test.go

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1289,30 +1289,6 @@ func MSpanCountAlloc(ms *MSpan, bits []byte) int {
12891289
return result
12901290
}
12911291

1292-
type MSpanQueue mSpanQueue
1293-
1294-
func (q *MSpanQueue) Size() int {
1295-
return (*mSpanQueue)(q).n
1296-
}
1297-
1298-
func (q *MSpanQueue) Push(s *MSpan) {
1299-
(*mSpanQueue)(q).push((*mspan)(s))
1300-
}
1301-
1302-
func (q *MSpanQueue) Pop() *MSpan {
1303-
s := (*mSpanQueue)(q).pop()
1304-
return (*MSpan)(s)
1305-
}
1306-
1307-
func (q *MSpanQueue) TakeAll(p *MSpanQueue) {
1308-
(*mSpanQueue)(q).takeAll((*mSpanQueue)(p))
1309-
}
1310-
1311-
func (q *MSpanQueue) PopN(n int) MSpanQueue {
1312-
p := (*mSpanQueue)(q).popN(n)
1313-
return (MSpanQueue)(p)
1314-
}
1315-
13161292
const (
13171293
TimeHistSubBucketBits = timeHistSubBucketBits
13181294
TimeHistNumSubBuckets = timeHistNumSubBuckets

src/runtime/gc_test.go

Lines changed: 0 additions & 193 deletions
Original file line numberDiff line numberDiff line change
@@ -881,199 +881,6 @@ func TestWeakToStrongMarkTermination(t *testing.T) {
881881
}
882882
}
883883

884-
func TestMSpanQueue(t *testing.T) {
885-
expectSize := func(t *testing.T, q *runtime.MSpanQueue, want int) {
886-
t.Helper()
887-
if got := q.Size(); got != want {
888-
t.Errorf("expected size %d, got %d", want, got)
889-
}
890-
}
891-
expectMSpan := func(t *testing.T, got, want *runtime.MSpan, op string) {
892-
t.Helper()
893-
if got != want {
894-
t.Errorf("expected mspan %p from %s, got %p", want, op, got)
895-
}
896-
}
897-
makeSpans := func(t *testing.T, n int) ([]*runtime.MSpan, func()) {
898-
t.Helper()
899-
spans := make([]*runtime.MSpan, 0, n)
900-
for range cap(spans) {
901-
spans = append(spans, runtime.AllocMSpan())
902-
}
903-
return spans, func() {
904-
for i, s := range spans {
905-
runtime.FreeMSpan(s)
906-
spans[i] = nil
907-
}
908-
}
909-
}
910-
t.Run("Empty", func(t *testing.T) {
911-
var q runtime.MSpanQueue
912-
expectSize(t, &q, 0)
913-
expectMSpan(t, q.Pop(), nil, "pop")
914-
})
915-
t.Run("PushPop", func(t *testing.T) {
916-
s := runtime.AllocMSpan()
917-
defer runtime.FreeMSpan(s)
918-
919-
var q runtime.MSpanQueue
920-
q.Push(s)
921-
expectSize(t, &q, 1)
922-
expectMSpan(t, q.Pop(), s, "pop")
923-
expectMSpan(t, q.Pop(), nil, "pop")
924-
})
925-
t.Run("PushPopPushPop", func(t *testing.T) {
926-
s0 := runtime.AllocMSpan()
927-
defer runtime.FreeMSpan(s0)
928-
s1 := runtime.AllocMSpan()
929-
defer runtime.FreeMSpan(s1)
930-
931-
var q runtime.MSpanQueue
932-
933-
// Push and pop s0.
934-
q.Push(s0)
935-
expectSize(t, &q, 1)
936-
expectMSpan(t, q.Pop(), s0, "pop")
937-
expectMSpan(t, q.Pop(), nil, "pop")
938-
939-
// Push and pop s1.
940-
q.Push(s1)
941-
expectSize(t, &q, 1)
942-
expectMSpan(t, q.Pop(), s1, "pop")
943-
expectMSpan(t, q.Pop(), nil, "pop")
944-
})
945-
t.Run("PushPushPopPop", func(t *testing.T) {
946-
s0 := runtime.AllocMSpan()
947-
defer runtime.FreeMSpan(s0)
948-
s1 := runtime.AllocMSpan()
949-
defer runtime.FreeMSpan(s1)
950-
951-
var q runtime.MSpanQueue
952-
q.Push(s0)
953-
expectSize(t, &q, 1)
954-
q.Push(s1)
955-
expectSize(t, &q, 2)
956-
expectMSpan(t, q.Pop(), s0, "pop")
957-
expectMSpan(t, q.Pop(), s1, "pop")
958-
expectMSpan(t, q.Pop(), nil, "pop")
959-
})
960-
t.Run("EmptyTakeAll", func(t *testing.T) {
961-
var q runtime.MSpanQueue
962-
var p runtime.MSpanQueue
963-
expectSize(t, &p, 0)
964-
expectSize(t, &q, 0)
965-
p.TakeAll(&q)
966-
expectSize(t, &p, 0)
967-
expectSize(t, &q, 0)
968-
expectMSpan(t, q.Pop(), nil, "pop")
969-
expectMSpan(t, p.Pop(), nil, "pop")
970-
})
971-
t.Run("Push4TakeAll", func(t *testing.T) {
972-
spans, free := makeSpans(t, 4)
973-
defer free()
974-
975-
var q runtime.MSpanQueue
976-
for i, s := range spans {
977-
expectSize(t, &q, i)
978-
q.Push(s)
979-
expectSize(t, &q, i+1)
980-
}
981-
982-
var p runtime.MSpanQueue
983-
p.TakeAll(&q)
984-
expectSize(t, &p, 4)
985-
for i := range p.Size() {
986-
expectMSpan(t, p.Pop(), spans[i], "pop")
987-
}
988-
expectSize(t, &p, 0)
989-
expectMSpan(t, q.Pop(), nil, "pop")
990-
expectMSpan(t, p.Pop(), nil, "pop")
991-
})
992-
t.Run("Push4Pop3", func(t *testing.T) {
993-
spans, free := makeSpans(t, 4)
994-
defer free()
995-
996-
var q runtime.MSpanQueue
997-
for i, s := range spans {
998-
expectSize(t, &q, i)
999-
q.Push(s)
1000-
expectSize(t, &q, i+1)
1001-
}
1002-
p := q.PopN(3)
1003-
expectSize(t, &p, 3)
1004-
expectSize(t, &q, 1)
1005-
for i := range p.Size() {
1006-
expectMSpan(t, p.Pop(), spans[i], "pop")
1007-
}
1008-
expectMSpan(t, q.Pop(), spans[len(spans)-1], "pop")
1009-
expectSize(t, &p, 0)
1010-
expectSize(t, &q, 0)
1011-
expectMSpan(t, q.Pop(), nil, "pop")
1012-
expectMSpan(t, p.Pop(), nil, "pop")
1013-
})
1014-
t.Run("Push4Pop0", func(t *testing.T) {
1015-
spans, free := makeSpans(t, 4)
1016-
defer free()
1017-
1018-
var q runtime.MSpanQueue
1019-
for i, s := range spans {
1020-
expectSize(t, &q, i)
1021-
q.Push(s)
1022-
expectSize(t, &q, i+1)
1023-
}
1024-
p := q.PopN(0)
1025-
expectSize(t, &p, 0)
1026-
expectSize(t, &q, 4)
1027-
for i := range q.Size() {
1028-
expectMSpan(t, q.Pop(), spans[i], "pop")
1029-
}
1030-
expectSize(t, &p, 0)
1031-
expectSize(t, &q, 0)
1032-
expectMSpan(t, q.Pop(), nil, "pop")
1033-
expectMSpan(t, p.Pop(), nil, "pop")
1034-
})
1035-
t.Run("Push4Pop4", func(t *testing.T) {
1036-
spans, free := makeSpans(t, 4)
1037-
defer free()
1038-
1039-
var q runtime.MSpanQueue
1040-
for i, s := range spans {
1041-
expectSize(t, &q, i)
1042-
q.Push(s)
1043-
expectSize(t, &q, i+1)
1044-
}
1045-
p := q.PopN(4)
1046-
expectSize(t, &p, 4)
1047-
expectSize(t, &q, 0)
1048-
for i := range p.Size() {
1049-
expectMSpan(t, p.Pop(), spans[i], "pop")
1050-
}
1051-
expectSize(t, &p, 0)
1052-
expectMSpan(t, q.Pop(), nil, "pop")
1053-
expectMSpan(t, p.Pop(), nil, "pop")
1054-
})
1055-
t.Run("Push4Pop5", func(t *testing.T) {
1056-
spans, free := makeSpans(t, 4)
1057-
defer free()
1058-
1059-
var q runtime.MSpanQueue
1060-
for i, s := range spans {
1061-
expectSize(t, &q, i)
1062-
q.Push(s)
1063-
expectSize(t, &q, i+1)
1064-
}
1065-
p := q.PopN(5)
1066-
expectSize(t, &p, 4)
1067-
expectSize(t, &q, 0)
1068-
for i := range p.Size() {
1069-
expectMSpan(t, p.Pop(), spans[i], "pop")
1070-
}
1071-
expectSize(t, &p, 0)
1072-
expectMSpan(t, q.Pop(), nil, "pop")
1073-
expectMSpan(t, p.Pop(), nil, "pop")
1074-
})
1075-
}
1076-
1077884
func TestDetectFinalizerAndCleanupLeaks(t *testing.T) {
1078885
got := runTestProg(t, "testprog", "DetectFinalizerAndCleanupLeaks", "GODEBUG=checkfinalizers=1")
1079886
sp := strings.SplitN(got, "detected possible issues with cleanups and/or finalizers", 2)

src/runtime/mgc.go

Lines changed: 24 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,7 @@ type workType struct {
326326
full lfstack // lock-free list of full blocks workbuf
327327
_ cpu.CacheLinePad // prevents false-sharing between full and empty
328328
empty lfstack // lock-free list of empty blocks workbuf
329-
_ cpu.CacheLinePad // prevents false-sharing between empty and nproc/nwait
329+
_ cpu.CacheLinePad // prevents false-sharing between empty and wbufSpans
330330

331331
wbufSpans struct {
332332
lock mutex
@@ -337,12 +337,24 @@ type workType struct {
337337
// one of the workbuf lists.
338338
busy mSpanList
339339
}
340-
_ cpu.CacheLinePad // prevents false-sharing between wbufSpans and spanq
340+
_ cpu.CacheLinePad // prevents false-sharing between wbufSpans and spanWorkMask
341341

342-
// Global queue of spans to scan.
342+
// spanqMask is a bitmap indicating which Ps have local work worth stealing.
343+
// Set or cleared by the owning P, cleared by stealing Ps.
344+
//
345+
// spanqMask is like a proxy for a global queue. An important invariant is that
346+
// forced flushing like gcw.dispose must set this bit on any P that has local
347+
// span work.
348+
spanqMask pMask
349+
_ cpu.CacheLinePad // prevents false-sharing between spanqMask and everything else
350+
351+
// List of all spanSPMCs.
343352
//
344353
// Only used if goexperiment.GreenTeaGC.
345-
spanq spanQueue
354+
spanSPMCs struct {
355+
lock mutex // no lock rank because it's a leaf lock (see mklockrank.go).
356+
all *spanSPMC
357+
}
346358

347359
// Restore 64-bit alignment on 32-bit.
348360
// _ uint32
@@ -711,8 +723,9 @@ func gcStart(trigger gcTrigger) {
711723
traceRelease(trace)
712724
}
713725

714-
// Check that all Ps have finished deferred mcache flushes.
726+
// Check and setup per-P state.
715727
for _, p := range allp {
728+
// Check that all Ps have finished deferred mcache flushes.
716729
if fg := p.mcache.flushGen.Load(); fg != mheap_.sweepgen {
717730
println("runtime: p", p.id, "flushGen", fg, "!= sweepgen", mheap_.sweepgen)
718731
throw("p mcache not flushed")
@@ -923,6 +936,7 @@ top:
923936
// TODO(austin): Break up these workbufs to
924937
// better distribute work.
925938
pp.gcw.dispose()
939+
926940
// Collect the flushedWork flag.
927941
if pp.gcw.flushedWork {
928942
atomic.Xadd(&gcMarkDoneFlushed, 1)
@@ -1623,17 +1637,6 @@ func gcEndWork() (last bool) {
16231637
return incnwait == work.nproc && !gcMarkWorkAvailable()
16241638
}
16251639

1626-
// gcMarkWorkAvailable reports whether there's any non-local work available to do.
1627-
func gcMarkWorkAvailable() bool {
1628-
if !work.full.empty() || !work.spanq.empty() {
1629-
return true // global work available
1630-
}
1631-
if work.markrootNext < work.markrootJobs {
1632-
return true // root scan work available
1633-
}
1634-
return false
1635-
}
1636-
16371640
// gcMark runs the mark (or, for concurrent GC, mark termination)
16381641
// All gcWork caches must be empty.
16391642
// STW is in effect at this point.
@@ -1644,8 +1647,8 @@ func gcMark(startTime int64) {
16441647
work.tstart = startTime
16451648

16461649
// Check that there's no marking work remaining.
1647-
if work.full != 0 || work.markrootNext < work.markrootJobs || !work.spanq.empty() {
1648-
print("runtime: full=", hex(work.full), " next=", work.markrootNext, " jobs=", work.markrootJobs, " nDataRoots=", work.nDataRoots, " nBSSRoots=", work.nBSSRoots, " nSpanRoots=", work.nSpanRoots, " nStackRoots=", work.nStackRoots, " spanq.n=", work.spanq.size(), "\n")
1650+
if work.full != 0 || work.markrootNext < work.markrootJobs {
1651+
print("runtime: full=", hex(work.full), " next=", work.markrootNext, " jobs=", work.markrootJobs, " nDataRoots=", work.nDataRoots, " nBSSRoots=", work.nBSSRoots, " nSpanRoots=", work.nSpanRoots, " nStackRoots=", work.nStackRoots, "\n")
16491652
panic("non-empty mark queue after concurrent mark")
16501653
}
16511654

@@ -1761,10 +1764,12 @@ func gcSweep(mode gcMode) bool {
17611764
// Sweep all spans eagerly.
17621765
for sweepone() != ^uintptr(0) {
17631766
}
1764-
// Free workbufs eagerly.
1767+
// Free workbufs and span rings eagerly.
17651768
prepareFreeWorkbufs()
17661769
for freeSomeWbufs(false) {
17671770
}
1771+
for freeSomeSpanSPMCs(false) {
1772+
}
17681773
// All "free" events for this mark/sweep cycle have
17691774
// now happened, so we can make this profile cycle
17701775
// available immediately.

0 commit comments

Comments
 (0)