runtime: split gcMarkWorkAvailable into two separate conditions

mknyszek · gopherbot · commit fde10c4ce7f3 · 2025-09-23T09:18:03.000-07:00
Right now, gcMarkWorkAvailable is used in two scenarios. The first is critical: we use it to determine whether we're approaching mark termination, and it's crucial to reaching a fixed point across the ragged barrier in gcMarkDone. The second is a heuristic: should we spin up another GC worker? This change splits gcMarkWorkAvailable into these two separate conditions. This change also deduplicates the logic for updating work.nwait into more abstract helpers "gcBeginWork" and "gcEndWork." This change is solely refactoring, and should be a no-op. There are only two functional changes: - work.nwait is incremented after setting pp.gcMarkWorkerMode in the background worker code. I don't believe this change is observable except if the code fails to update work.nwait (either it results in a non-sensical number, or the stack is corrupted) in which case the goroutine may not be labeled as a mark worker in the resulting stack trace (it should be obvious from the stack frames though). - endCheckmarks also checks work.nwait == work.nproc, which should be fine since we never mutate work.nwait on that path. That extra check should be a no-op. Splitting these two use-cases for gcMarkWorkAvailable is conceptually helpful, and the checks may also diverge from Green Tea once we get rid of the global span queue. Change-Id: I0bec244a14ee82919c4deb7c1575589c0dca1089 Reviewed-on: https://go-review.googlesource.com/c/go/+/701176 Reviewed-by: Michael Pratt <mpratt@google.com> Auto-Submit: Michael Knyszek <mknyszek@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
diff --git a/src/runtime/mcheckmark.go b/src/runtime/mcheckmark.go
@@ -68,7 +68,7 @@ func startCheckmarks() {
 
 // endCheckmarks ends the checkmarks phase.
 func endCheckmarks() {
-	if gcMarkWorkAvailable(nil) {
+	if !gcIsMarkDone() {
 		throw("GC work not flushed")
 	}
 	useCheckmark = false
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
@@ -869,10 +869,11 @@ var gcDebugMarkDone struct {
 // all local work to the global queues where it can be discovered by
 // other workers.
 //
+// All goroutines performing GC work must call gcBeginWork to signal
+// that they're executing GC work. They must call gcEndWork when done.
 // This should be called when all local mark work has been drained and
-// there are no remaining workers. Specifically, when
-//
-//	work.nwait == work.nproc && !gcMarkWorkAvailable(p)
+// there are no remaining workers. Specifically, when gcEndWork returns
+// true.
 //
 // The calling context must be preemptible.
 //
@@ -896,7 +897,7 @@ top:
 	// empty before performing the ragged barrier. Otherwise,
 	// there could be global work that a P could take after the P
 	// has passed the ragged barrier.
-	if !(gcphase == _GCmark && work.nwait == work.nproc && !gcMarkWorkAvailable(nil)) {
+	if !(gcphase == _GCmark && gcIsMarkDone()) {
 		semrelease(&work.markDoneSema)
 		return
 	}
@@ -1514,11 +1515,7 @@ func gcBgMarkWorker(ready chan struct{}) {
 			trackLimiterEvent = pp.limiterEvent.start(limiterEventIdleMarkWork, startTime)
 		}
 
-		decnwait := atomic.Xadd(&work.nwait, -1)
-		if decnwait == work.nproc {
-			println("runtime: work.nwait=", decnwait, "work.nproc=", work.nproc)
-			throw("work.nwait was > work.nproc")
-		}
+		gcBeginWork()
 
 		systemstack(func() {
 			// Mark our goroutine preemptible so its stack can be scanned or observed
@@ -1570,15 +1567,6 @@ func gcBgMarkWorker(ready chan struct{}) {
 			atomic.Xaddint64(&pp.gcFractionalMarkTime, duration)
 		}
 
-		// Was this the last worker and did we run out
-		// of work?
-		incnwait := atomic.Xadd(&work.nwait, +1)
-		if incnwait > work.nproc {
-			println("runtime: p.gcMarkWorkerMode=", pp.gcMarkWorkerMode,
-				"work.nwait=", incnwait, "work.nproc=", work.nproc)
-			throw("work.nwait > work.nproc")
-		}
-
 		// We'll releasem after this point and thus this P may run
 		// something else. We must clear the worker mode to avoid
 		// attributing the mode to a different (non-worker) G in
@@ -1587,7 +1575,7 @@ func gcBgMarkWorker(ready chan struct{}) {
 
 		// If this worker reached a background mark completion
 		// point, signal the main GC goroutine.
-		if incnwait == work.nproc && !gcMarkWorkAvailable(nil) {
+		if gcEndWork() {
 			// We don't need the P-local buffers here, allow
 			// preemption because we may schedule like a regular
 			// goroutine in gcMarkDone (block on locks, etc).
@@ -1599,13 +1587,44 @@ func gcBgMarkWorker(ready chan struct{}) {
 	}
 }
 
-// gcMarkWorkAvailable reports whether executing a mark worker
-// on p is potentially useful. p may be nil, in which case it only
-// checks the global sources of work.
-func gcMarkWorkAvailable(p *p) bool {
+// gcShouldScheduleWorker reports whether executing a mark worker
+// on p is potentially useful. p may be nil.
+func gcShouldScheduleWorker(p *p) bool {
 	if p != nil && !p.gcw.empty() {
 		return true
 	}
+	return gcMarkWorkAvailable()
+}
+
+// gcIsMarkDone reports whether the mark phase is (probably) done.
+func gcIsMarkDone() bool {
+	return work.nwait == work.nproc && !gcMarkWorkAvailable()
+}
+
+// gcBeginWork signals to the garbage collector that a new worker is
+// about to process GC work.
+func gcBeginWork() {
+	decnwait := atomic.Xadd(&work.nwait, -1)
+	if decnwait == work.nproc {
+		println("runtime: work.nwait=", decnwait, "work.nproc=", work.nproc)
+		throw("work.nwait was > work.nproc")
+	}
+}
+
+// gcEndWork signals to the garbage collector that a new worker has just finished
+// its work. It reports whether it was the last worker and there's no more work
+// to do. If it returns true, the caller must call gcMarkDone.
+func gcEndWork() (last bool) {
+	incnwait := atomic.Xadd(&work.nwait, +1)
+	if incnwait > work.nproc {
+		println("runtime: work.nwait=", incnwait, "work.nproc=", work.nproc)
+		throw("work.nwait > work.nproc")
+	}
+	return incnwait == work.nproc && !gcMarkWorkAvailable()
+}
+
+// gcMarkWorkAvailable reports whether there's any non-local work available to do.
+func gcMarkWorkAvailable() bool {
 	if !work.full.empty() || !work.spanq.empty() {
 		return true // global work available
 	}
diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go
@@ -675,11 +675,7 @@ func gcAssistAlloc1(gp *g, scanWork int64) {
 	startTime := nanotime()
 	trackLimiterEvent := gp.m.p.ptr().limiterEvent.start(limiterEventMarkAssist, startTime)
 
-	decnwait := atomic.Xadd(&work.nwait, -1)
-	if decnwait == work.nproc {
-		println("runtime: work.nwait =", decnwait, "work.nproc=", work.nproc)
-		throw("nwait > work.nprocs")
-	}
+	gcBeginWork()
 
 	// gcDrainN requires the caller to be preemptible.
 	casGToWaitingForSuspendG(gp, _Grunning, waitReasonGCAssistMarking)
@@ -702,14 +698,7 @@ func gcAssistAlloc1(gp *g, scanWork int64) {
 
 	// If this is the last worker and we ran out of work,
 	// signal a completion point.
-	incnwait := atomic.Xadd(&work.nwait, +1)
-	if incnwait > work.nproc {
-		println("runtime: work.nwait=", incnwait,
-			"work.nproc=", work.nproc)
-		throw("work.nwait > work.nproc")
-	}
-
-	if incnwait == work.nproc && !gcMarkWorkAvailable(nil) {
+	if gcEndWork() {
 		// This has reached a background completion point. Set
 		// gp.param to a non-nil value to indicate this. It
 		// doesn't matter what we set it to (it just has to be
diff --git a/src/runtime/mgcpacer.go b/src/runtime/mgcpacer.go
@@ -767,8 +767,8 @@ func (c *gcControllerState) findRunnableGCWorker(pp *p, now int64) (*g, int64) {
 		gcCPULimiter.update(now)
 	}
 
-	if !gcMarkWorkAvailable(pp) {
-		// No work to be done right now. This can happen at
+	if !gcShouldScheduleWorker(pp) {
+		// No good reason to schedule a worker. This can happen at
 		// the end of the mark phase when there are still
 		// assists tapering off. Don't bother running a worker
 		// now because it'll just return immediately.
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
@@ -3125,7 +3125,7 @@ func handoffp(pp *p) {
 		return
 	}
 	// if it has GC work, start it straight away
-	if gcBlackenEnabled != 0 && gcMarkWorkAvailable(pp) {
+	if gcBlackenEnabled != 0 && gcShouldScheduleWorker(pp) {
 		startm(pp, false, false)
 		return
 	}
@@ -3506,7 +3506,7 @@ top:
 	//
 	// If we're in the GC mark phase, can safely scan and blacken objects,
 	// and have work to do, run idle-time marking rather than give up the P.
-	if gcBlackenEnabled != 0 && gcMarkWorkAvailable(pp) && gcController.addIdleMarkWorker() {
+	if gcBlackenEnabled != 0 && gcShouldScheduleWorker(pp) && gcController.addIdleMarkWorker() {
 		node := (*gcBgMarkWorkerNode)(gcBgMarkWorkerPool.pop())
 		if node != nil {
 			pp.gcMarkWorkerMode = gcMarkWorkerIdleMode
@@ -3913,7 +3913,7 @@ func checkIdleGCNoP() (*p, *g) {
 	if atomic.Load(&gcBlackenEnabled) == 0 || !gcController.needIdleMarkWorker() {
 		return nil, nil
 	}
-	if !gcMarkWorkAvailable(nil) {
+	if !gcShouldScheduleWorker(nil) {
 		return nil, nil
 	}
 

Original file line number	Diff line number	Diff line change
`@@ -68,7 +68,7 @@ func startCheckmarks() {`
`68`	`68`
`69`	`69`	`// endCheckmarks ends the checkmarks phase.`
`70`	`70`	`func endCheckmarks() {`
`71`		`- if gcMarkWorkAvailable(nil) {`
	`71`	`+ if !gcIsMarkDone() {`
`72`	`72`	`throw("GC work not flushed")`
`73`	`73`	`}`
`74`	`74`	`useCheckmark = false`
Original file line number	Diff line number	Diff line change
`@@ -3125,7 +3125,7 @@ func handoffp(pp *p) {`
`3125`	`3125`	`return`
`3126`	`3126`	`}`
`3127`	`3127`	`// if it has GC work, start it straight away`
`3128`		`- if gcBlackenEnabled != 0 && gcMarkWorkAvailable(pp) {`
	`3128`	`+ if gcBlackenEnabled != 0 && gcShouldScheduleWorker(pp) {`
`3129`	`3129`	`startm(pp, false, false)`
`3130`	`3130`	`return`
`3131`	`3131`	`}`
`@@ -3506,7 +3506,7 @@ top:`
`3506`	`3506`	`//`
`3507`	`3507`	`// If we're in the GC mark phase, can safely scan and blacken objects,`
`3508`	`3508`	`// and have work to do, run idle-time marking rather than give up the P.`
`3509`		`- if gcBlackenEnabled != 0 && gcMarkWorkAvailable(pp) && gcController.addIdleMarkWorker() {`
	`3509`	`+ if gcBlackenEnabled != 0 && gcShouldScheduleWorker(pp) && gcController.addIdleMarkWorker() {`
`3510`	`3510`	`node := (*gcBgMarkWorkerNode)(gcBgMarkWorkerPool.pop())`
`3511`	`3511`	`if node != nil {`
`3512`	`3512`	`pp.gcMarkWorkerMode = gcMarkWorkerIdleMode`
`@@ -3913,7 +3913,7 @@ func checkIdleGCNoP() (p, g) {`
`3913`	`3913`	`if atomic.Load(&gcBlackenEnabled) == 0 \|\| !gcController.needIdleMarkWorker() {`
`3914`	`3914`	`return nil, nil`
`3915`	`3915`	`}`
`3916`		`- if !gcMarkWorkAvailable(nil) {`
	`3916`	`+ if !gcShouldScheduleWorker(nil) {`
`3917`	`3917`	`return nil, nil`
`3918`	`3918`	`}`
`3919`	`3919`