Skip to content

Commit 13df972

Browse files
mknyszekgopherbot
authored andcommitted
runtime/metrics: add metrics for goroutine sched states
This is largely a port of CL 38180. For golang#15490. Change-Id: I2726111e472e81e9f9f0f294df97872c2689f061 Reviewed-on: https://go-review.googlesource.com/c/go/+/690397 Reviewed-by: Michael Pratt <[email protected]> Auto-Submit: Michael Knyszek <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]>
1 parent bd07faf commit 13df972

File tree

10 files changed

+391
-8
lines changed

10 files changed

+391
-8
lines changed

src/runtime/debug.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ func totalMutexWaitTimeNanos() int64 {
177177

178178
// NumGoroutine returns the number of goroutines that currently exist.
179179
func NumGoroutine() int {
180-
return int(gcount())
180+
return int(gcount(false))
181181
}
182182

183183
//go:linkname debug_modinfo runtime/debug.modinfo

src/runtime/metrics.go

Lines changed: 110 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ package runtime
88

99
import (
1010
"internal/godebugs"
11+
"internal/runtime/atomic"
1112
"internal/runtime/gc"
1213
"unsafe"
1314
)
@@ -465,9 +466,38 @@ func initMetrics() {
465466
},
466467
},
467468
"/sched/goroutines:goroutines": {
468-
compute: func(_ *statAggregate, out *metricValue) {
469+
deps: makeStatDepSet(schedStatsDep),
470+
compute: func(in *statAggregate, out *metricValue) {
471+
out.kind = metricKindUint64
472+
out.scalar = uint64(in.schedStats.gTotal)
473+
},
474+
},
475+
"/sched/goroutines/not-in-go:goroutines": {
476+
deps: makeStatDepSet(schedStatsDep),
477+
compute: func(in *statAggregate, out *metricValue) {
478+
out.kind = metricKindUint64
479+
out.scalar = uint64(in.schedStats.gNonGo)
480+
},
481+
},
482+
"/sched/goroutines/running:goroutines": {
483+
deps: makeStatDepSet(schedStatsDep),
484+
compute: func(in *statAggregate, out *metricValue) {
485+
out.kind = metricKindUint64
486+
out.scalar = uint64(in.schedStats.gRunning)
487+
},
488+
},
489+
"/sched/goroutines/runnable:goroutines": {
490+
deps: makeStatDepSet(schedStatsDep),
491+
compute: func(in *statAggregate, out *metricValue) {
492+
out.kind = metricKindUint64
493+
out.scalar = uint64(in.schedStats.gRunnable)
494+
},
495+
},
496+
"/sched/goroutines/waiting:goroutines": {
497+
deps: makeStatDepSet(schedStatsDep),
498+
compute: func(in *statAggregate, out *metricValue) {
469499
out.kind = metricKindUint64
470-
out.scalar = uint64(gcount())
500+
out.scalar = uint64(in.schedStats.gWaiting)
471501
},
472502
},
473503
"/sched/latencies:seconds": {
@@ -547,6 +577,7 @@ const (
547577
cpuStatsDep // corresponds to cpuStatsAggregate
548578
gcStatsDep // corresponds to gcStatsAggregate
549579
finalStatsDep // corresponds to finalStatsAggregate
580+
schedStatsDep // corresponds to schedStatsAggregate
550581
numStatsDeps
551582
)
552583

@@ -740,6 +771,80 @@ func (a *finalStatsAggregate) compute() {
740771
a.cleanupsQueued, a.cleanupsExecuted = gcCleanups.readQueueStats()
741772
}
742773

774+
// schedStatsAggregate contains stats about the scheduler, including
775+
// an approximate count of goroutines in each state.
776+
type schedStatsAggregate struct {
777+
gTotal uint64
778+
gRunning uint64
779+
gRunnable uint64
780+
gNonGo uint64
781+
gWaiting uint64
782+
}
783+
784+
// compute populates the schedStatsAggregate with values from the runtime.
785+
func (a *schedStatsAggregate) compute() {
786+
// Lock the scheduler so the global run queue can't change and
787+
// the number of Ps can't change. This doesn't prevent the
788+
// local run queues from changing, so the results are still
789+
// approximate.
790+
lock(&sched.lock)
791+
792+
// Collect running/runnable from per-P run queues.
793+
for _, p := range allp {
794+
if p == nil || p.status == _Pdead {
795+
break
796+
}
797+
switch p.status {
798+
case _Prunning:
799+
a.gRunning++
800+
case _Psyscall:
801+
a.gNonGo++
802+
case _Pgcstop:
803+
// The world is stopping or stopped.
804+
// This is fine. The results will be
805+
// slightly odd since nothing else
806+
// is running, but it will be accurate.
807+
}
808+
809+
for {
810+
h := atomic.Load(&p.runqhead)
811+
t := atomic.Load(&p.runqtail)
812+
next := atomic.Loaduintptr((*uintptr)(&p.runnext))
813+
runnable := int32(t - h)
814+
if atomic.Load(&p.runqhead) != h || runnable < 0 {
815+
continue
816+
}
817+
if next != 0 {
818+
runnable++
819+
}
820+
a.gRunnable += uint64(runnable)
821+
break
822+
}
823+
}
824+
825+
// Global run queue.
826+
a.gRunnable += uint64(sched.runq.size)
827+
828+
// Account for Gs that are in _Gsyscall without a P in _Psyscall.
829+
nGsyscallNoP := sched.nGsyscallNoP.Load()
830+
831+
// nGsyscallNoP can go negative during temporary races.
832+
if nGsyscallNoP >= 0 {
833+
a.gNonGo += uint64(nGsyscallNoP)
834+
}
835+
836+
// Compute the number of blocked goroutines. We have to
837+
// include system goroutines in this count because we included
838+
// them above.
839+
a.gTotal = uint64(gcount(true))
840+
a.gWaiting = a.gTotal - (a.gRunning + a.gRunnable + a.gNonGo)
841+
if a.gWaiting < 0 {
842+
a.gWaiting = 0
843+
}
844+
845+
unlock(&sched.lock)
846+
}
847+
743848
// nsToSec takes a duration in nanoseconds and converts it to seconds as
744849
// a float64.
745850
func nsToSec(ns int64) float64 {
@@ -758,6 +863,7 @@ type statAggregate struct {
758863
cpuStats cpuStatsAggregate
759864
gcStats gcStatsAggregate
760865
finalStats finalStatsAggregate
866+
schedStats schedStatsAggregate
761867
}
762868

763869
// ensure populates statistics aggregates determined by deps if they
@@ -782,6 +888,8 @@ func (a *statAggregate) ensure(deps *statDepSet) {
782888
a.gcStats.compute()
783889
case finalStatsDep:
784890
a.finalStats.compute()
891+
case schedStatsDep:
892+
a.schedStats.compute()
785893
}
786894
}
787895
a.ensured = a.ensured.union(missing)

src/runtime/metrics/description.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -437,6 +437,26 @@ var allDesc = []Description{
437437
Description: "The current runtime.GOMAXPROCS setting, or the number of operating system threads that can execute user-level Go code simultaneously.",
438438
Kind: KindUint64,
439439
},
440+
{
441+
Name: "/sched/goroutines/not-in-go:goroutines",
442+
Description: "Approximate count of goroutines running or blocked in a system call or cgo call. Not guaranteed to add up to /sched/goroutines:goroutines with other goroutine metrics.",
443+
Kind: KindUint64,
444+
},
445+
{
446+
Name: "/sched/goroutines/runnable:goroutines",
447+
Description: "Approximate count of goroutines ready to execute, but not executing. Not guaranteed to add up to /sched/goroutines:goroutines with other goroutine metrics.",
448+
Kind: KindUint64,
449+
},
450+
{
451+
Name: "/sched/goroutines/running:goroutines",
452+
Description: "Approximate count of goroutines executing. Always less than or equal to /sched/gomaxprocs:threads. Not guaranteed to add up to /sched/goroutines:goroutines with other goroutine metrics.",
453+
Kind: KindUint64,
454+
},
455+
{
456+
Name: "/sched/goroutines/waiting:goroutines",
457+
Description: "Approximate count of goroutines waiting on a resource (I/O or sync primitives). Not guaranteed to add up to /sched/goroutines:goroutines with other goroutine metrics.",
458+
Kind: KindUint64,
459+
},
440460
{
441461
Name: "/sched/goroutines:goroutines",
442462
Description: "Count of live goroutines.",

src/runtime/metrics/doc.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -509,6 +509,26 @@ Below is the full list of supported metrics, ordered lexicographically.
509509
operating system threads that can execute user-level Go code
510510
simultaneously.
511511
512+
/sched/goroutines/not-in-go:goroutines
513+
Approximate count of goroutines running or blocked in
514+
a system call or cgo call. Not guaranteed to add up to
515+
/sched/goroutines:goroutines with other goroutine metrics.
516+
517+
/sched/goroutines/runnable:goroutines
518+
Approximate count of goroutines ready to execute,
519+
but not executing. Not guaranteed to add up to
520+
/sched/goroutines:goroutines with other goroutine metrics.
521+
522+
/sched/goroutines/running:goroutines
523+
Approximate count of goroutines executing. Always less than or
524+
equal to /sched/gomaxprocs:threads. Not guaranteed to add up to
525+
/sched/goroutines:goroutines with other goroutine metrics.
526+
527+
/sched/goroutines/waiting:goroutines
528+
Approximate count of goroutines waiting on a resource
529+
(I/O or sync primitives). Not guaranteed to add up to
530+
/sched/goroutines:goroutines with other goroutine metrics.
531+
512532
/sched/goroutines:goroutines
513533
Count of live goroutines.
514534

0 commit comments

Comments
 (0)