@@ -353,6 +353,190 @@ func Gosched() {
353353 mcall (gosched_m )
354354}
355355
356+ // Yield cooperatively yields if, and only if, the scheduler is "busy".
357+ //
358+ // This can be called by any work wishing to utilize strictly spare capacity
359+ // while minimizing the degree to which it delays other work from being promptly
360+ // scheduled.
361+ //
362+ // Yield is intended to have very low overhead, particularly in its no-op case
363+ // where there is idle capacity in the scheduler and the caller does not need to
364+ // yield. This should allow it to be called often, such as in the body of tight
365+ // loops, in any tasks wishing to yield promptly to any waiting work.
366+ //
367+ // When there is waiting work, the yielding goroutine may briefly be rescheduled
368+ // after it, or may, in some cases, be parked in a waiting 'yield' state until
369+ // the scheduler next has spare capacity to resume it. Yield does not guarantee
370+ // fairness or starvation-prevention: once a goroutine Yields(), it may remain
371+ // parked until the scheduler next has idle capacity. This means Yield can block
372+ // for unbounded durations in the presence of sustained over-saturation; callers
373+ // are responsible for deciding where to Yield() to avoid priority inversions.
374+ //
375+ // Yield will never park if the calling goroutine is locked to an OS thread.
376+ func Yield () {
377+ // Common/fast case: do nothing if npidle is non-zero meaning there is
378+ // an idle P so no reason to yield this one. Doing only this check here keeps
379+ // Yield inlineable (~70 of 80 as of writing).
380+ if sched .npidle .Load () == 0 {
381+ maybeYield ()
382+ }
383+ }
384+
385+ // maybeYield is called by Yield if npidle is zero, meaning there are no idle Ps
386+ // and thus there may be work to which the caller should yield. Such work could
387+ // be on this local runq of the caller's P, on the global runq, in the runq of
388+ // some other P, or even in the form of ready conns waiting to be noticed by a
389+ // netpoll which would then ready runnable goroutines.
390+ //
391+ // Keeping this function extremely cheap is essential: it must be cheap enough
392+ // that callers can call it in very tight loops, as very frequent calls ensure a
393+ // task wishing to yield when work is waiting will do so promptly. Checking the
394+ // runq of every P or calling netpoll are too expensive to do in every call, so
395+ // given intent is to bound how long work may wait, such checks only need to be
396+ // performed after some amount of time has elapsed (e.g. 0.25ms). To minimize
397+ // overhead when called at a higher frequency, this elapsed time is checked with
398+ // an exponential backoff.
399+ //
400+ // runqs are checked directly with non-atomic reads rather than runqempty: being
401+ // cheap is our top priority and a microsecond of staleness is fine as long as
402+ // the check does not get optimized out of a calling loop body (hence noinline).
403+ //
404+ //go:noinline
405+ func maybeYield () {
406+ gp := getg ()
407+
408+ // Don't park while locked to an OS thread.
409+ if gp .lockedm != 0 {
410+ return
411+ }
412+
413+ // If the local P's runq ring buffer/next is non-empty, yield to waiting G.
414+ if p := gp .m .p .ptr (); p .runqhead != p .runqtail || p .runnext != 0 {
415+ // If there is work in the local P's runq, we can yield by just going to the
416+ // back of the local P's runq via goyield: this achieves the same goal of
417+ // letting waiting work run instead of us, but without parking on the global
418+ // yieldq and potentially switching Ps. While that's our preferred choice,
419+ // we want to avoid thrashing back and forth between multiple Yield-calling
420+ // goroutines: in such a case it is better to just park one so the other
421+ // stops seeing it in the queue and yielding to it. To detect and break this
422+ // cycle, we put a 1 in the yieldchecks field: if the other goroutine yields
423+ // right back, but is then still in this runq bringing us here again, we'll
424+ // see this 1 and park instead. We can clobber yieldchecks here since we're
425+ // actively yielding -- we don't need the counter to decide to do so. And
426+ // our sentinel will in turn be clobbered the very next time the time is put
427+ // in the upper bits, which it will be when they're zero if we don't yield,
428+ // so this sentinel should be relatively reliable in indicating thrashing.
429+ if gp .yieldchecks == 1 {
430+ yieldPark ()
431+ return
432+ }
433+ gp .yieldchecks = 1
434+ // Go to the back of the local runq.
435+ goyield ()
436+ return
437+ }
438+
439+ // If the global runq is non-empty, park in the global yieldq right away: that
440+ // is work someone needs to pick up and it might as well be our P. We could,
441+ // potentially, directly claim it here and goyield or equivalently to try to
442+ // remain on this P, but just parking and letting this P go to findRunnable
443+ // avoid duplication of its logic and seems good enough.
444+ if ! sched .runq .empty () {
445+ yieldPark ()
446+ return
447+ }
448+
449+ // We didn't find anything via cheap O(1) checks of our runq or global runq but
450+ // it is possible there are goroutines waiting in runqs of other Ps that are
451+ // not being stolen by an idle P -- the lack of idle Ps (npidle=0) is what got
452+ // us here. Furthermore, given the lack of idle Ps, it is also possible that
453+ // ready conns are waiting for a netpoll to notice them and ready their
454+ // goroutines i.e. work to which we should then yield. However, searching all
455+ // runqs, and even more so netpoll, is too expensive for every maybeYield
456+ // call: being extremely low overhead is essential to allowing Yield() to be
457+ // called at high enough frequency to make the caller respond to changing load
458+ // promptly.
459+ //
460+ // Given our main goal here is to reduce/bound *how long* work waits, we can
461+ // do more extensive/expensive checks searching all runqs / netpoll less often
462+ // but we still need to do them often "enough". Given our goal is to bound the
463+ // time that work may wait before a call to Yield detects it, the time elapsed
464+ // since the last check would be a good signal, but even checking nanotime()
465+ // on each call to measure this would be too expensive. Instead, we can check
466+ // nanotime() with an exponential backoff using a simple counter, to ensure we
467+ // avoid overly frequent time checks under higher call frequencies while still
468+ // checking the time often at lower frequencies.
469+ //
470+ // To implement such a time-based cap with elapsed-time checked on a subset of
471+ // calls, we can combine a call count and elapsed-time indicator into a single
472+ // uint32 on G: its 11 lower bits store a counter while the remaining 21 bits
473+ // store nanos quantized to 0.25ms "epochs" by discarding the lower 18 bits.
474+ // of a int64 nanotime() value. For counter values after increment of 2^k-1,
475+ // we check if the time -- quantized to 0.25ms -- has changed and if so move
476+ // to do the more throrough check for waiting work.
477+ //
478+ // Choosing 11 bits for a counter allows backing off to a rate of checking the
479+ // clock once every 1k calls if called extremely frequently; it seems unlikely
480+ // a caller would be able to call this at a frequency high enough to desire a
481+ // higher backoff. The 21 remaining bits allows ~9mins between rollover of
482+ // the epoch: the slim chance of a false negative is quite acceptable as if we
483+ // hit it, we just delay one check of the runqs by a quarter millisecond.
484+ const yieldCountBits , yieldCountMask = 11 , (1 << 11 ) - 1
485+ const yieldEpochShift = 18 - yieldCountBits // only need to shift by the differnce, then mask.
486+ gp .yieldchecks ++
487+ // Exp-backoff using 2^k-1 as when we check.
488+ if count := gp .yieldchecks & yieldCountMask ; (count & (count + 1 )) == 0 {
489+ prev := gp .yieldchecks &^ yieldCountMask
490+ now := uint32 (nanotime ()>> yieldEpochShift ) &^ yieldCountMask
491+ if now != prev {
492+ // Set yieldchecks to just new high timestamp bits, cleaning counter.
493+ gp .yieldchecks = now
494+
495+ // Check runqs of all Ps; if we find anything park free this P to steal.
496+ for i := range allp {
497+ // We don't need the extra accuracy (and cost) of runqempty here either;
498+ // Worst-case we'll yield a check later or maybe park and unpark.
499+ if allp [i ].runqhead != allp [i ].runqtail || allp [i ].runnext != 0 {
500+ yieldPark ()
501+ return
502+ }
503+ }
504+
505+ // Check netpoll; a ready conn is basically a runnable goroutine which we
506+ // would yield to if we saw it, but the lack of idle Ps may mean nobody is
507+ // checking this as often right now and there may be ready conns waiting.
508+ if netpollinited () && netpollAnyWaiters () && sched .lastpoll .Load () != 0 {
509+ var found bool
510+ systemstack (func () {
511+ if list , delta := netpoll (0 ); ! list .empty () {
512+ injectglist (& list )
513+ netpollAdjustWaiters (delta )
514+ found = true
515+ }
516+ })
517+ if found {
518+ goyield ()
519+ }
520+ }
521+ } else if count == yieldCountMask {
522+ // Counter overflow before hitting time; reset half way back.
523+ gp .yieldchecks = prev | (yieldCountMask / 2 )
524+ }
525+ }
526+ }
527+
528+ // yieldPark parks the current goroutine in a waiting state with reason yield
529+ // and puts it in the yieldq queue for findRunnable. A goroutine that has to
530+ // park to Yield is considered "waiting" rather than "runnable" as it is blocked
531+ // in this state until there is strictly spare execution capacity available to
532+ // resume it, unlike runnable goroutines which generally take runs running at
533+ // regular intervals. A parked yielded goroutine is more like being blocked on
534+ // a cond var or lock that will be signaled when we next detect spare capacity.
535+ func yieldPark () {
536+ checkTimeouts ()
537+ gopark (yield_put , nil , waitReasonYield , traceBlockPreempted , 1 )
538+ }
539+
356540// goschedguarded yields the processor like gosched, but also checks
357541// for forbidden states and opts out of the yield in those cases.
358542//
@@ -3445,6 +3629,23 @@ top:
34453629 }
34463630 }
34473631
3632+ // Nothing runnable, so check for yielded goroutines parked in yieldq.
3633+ if ! sched .yieldq .empty () {
3634+ lock (& sched .lock )
3635+ bg := sched .yieldq .pop ()
3636+ unlock (& sched .lock )
3637+ if bg != nil {
3638+ trace := traceAcquire ()
3639+ casgstatus (bg , _Gwaiting , _Grunnable )
3640+ if trace .ok () {
3641+ // Match other ready paths for trace visibility.
3642+ trace .GoUnpark (bg , 0 )
3643+ traceRelease (trace )
3644+ }
3645+ return bg , false , false
3646+ }
3647+ }
3648+
34483649 // We have nothing to do.
34493650 //
34503651 // If we're in the GC mark phase, can safely scan and blacken objects,
@@ -3509,6 +3710,12 @@ top:
35093710 unlock (& sched .lock )
35103711 return gp , false , false
35113712 }
3713+
3714+ // Re-check yieldq again, this time while holding sched.lock.
3715+ if ! sched .yieldq .empty () {
3716+ unlock (& sched .lock )
3717+ goto top
3718+ }
35123719 if ! mp .spinning && sched .needspinning .Load () == 1 {
35133720 // See "Delicate dance" comment below.
35143721 mp .becomeSpinning ()
@@ -7111,6 +7318,20 @@ func (q *gQueue) popList() gList {
71117318 return stack
71127319}
71137320
7321+ // yield_put is the gopark unlock function for Yield. It enqueues the goroutine
7322+ // onto the global yield queue. Returning true keeps the G parked until another
7323+ // part of the scheduler makes it runnable again. The G remains in _Gwaiting
7324+ // after this returns. Nothing else will find/ready this G in the interim since
7325+ // it isn't on a runq until we put it on the yieldq for findRunnable to find.
7326+ //
7327+ //go:nosplit
7328+ func yield_put (gp * g , _ unsafe.Pointer ) bool {
7329+ lock (& sched .lock )
7330+ sched .yieldq .pushBack (gp )
7331+ unlock (& sched .lock )
7332+ return true
7333+ }
7334+
71147335// A gList is a list of Gs linked through g.schedlink. A G can only be
71157336// on one gQueue or gList at a time.
71167337type gList struct {
0 commit comments