Skip to content

Commit bca3e98

Browse files
jsmmatloob
authored andcommitted
cmd/go: test barrier actions
Add a barrier action between test run action and it's dependencies. Run will depend on this barrier action, and the barrier action will depend on: 1. The run action's dependencies 2. The previous barrier action This will force internal/work to schedule test run actions in-order, preventing potential goroutine starvation from the channel locking mechanism created to force test run actions to start in-order. Fixes golang#73106 golang#61233 Change-Id: I72e9f752f7521093f3c875eef7f2f29b2393fce9 Reviewed-on: https://go-review.googlesource.com/c/go/+/668035 LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Michael Matloob <[email protected]> Reviewed-by: David Chase <[email protected]> Reviewed-by: Michael Matloob <[email protected]>
1 parent 052fcde commit bca3e98

File tree

5 files changed

+71
-10
lines changed

5 files changed

+71
-10
lines changed

src/cmd/go/internal/test/test.go

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1044,11 +1044,36 @@ func runTest(ctx context.Context, cmd *base.Command, args []string) {
10441044
prints = append(prints, printTest)
10451045
}
10461046

1047-
// Order runs for coordinating start JSON prints.
1047+
// Order runs for coordinating start JSON prints via two mechanisms:
1048+
// 1. Channel locking forces runTest actions to start in-order.
1049+
// 2. Barrier tasks force runTest actions to be scheduled in-order.
1050+
// We need both for performant behavior, as channel locking without the barrier tasks starves the worker pool,
1051+
// and barrier tasks without channel locking doesn't guarantee start in-order behavior alone.
1052+
var prevBarrier *work.Action
10481053
ch := make(chan struct{})
10491054
close(ch)
10501055
for _, a := range runs {
10511056
if r, ok := a.Actor.(*runTestActor); ok {
1057+
// Inject a barrier task between the run action and its dependencies.
1058+
// This barrier task wil also depend on the previous barrier task.
1059+
// This prevents the run task from being scheduled until all previous run dependencies have finished.
1060+
// The build graph will be augmented to look roughly like this:
1061+
// build("a") build("b") build("c")
1062+
// | | |
1063+
// barrier("a.test") -> barrier("b.test") -> barrier("c.test")
1064+
// | | |
1065+
// run("a.test") run("b.test") run("c.test")
1066+
1067+
barrier := &work.Action{
1068+
Mode: "test barrier",
1069+
Deps: slices.Clip(a.Deps),
1070+
}
1071+
if prevBarrier != nil {
1072+
barrier.Deps = append(barrier.Deps, prevBarrier)
1073+
}
1074+
a.Deps = []*work.Action{barrier}
1075+
prevBarrier = barrier
1076+
10521077
r.prev = ch
10531078
ch = make(chan struct{})
10541079
r.next = ch
@@ -1400,6 +1425,8 @@ func (lockedStdout) Write(b []byte) (int, error) {
14001425

14011426
func (r *runTestActor) Act(b *work.Builder, ctx context.Context, a *work.Action) error {
14021427
sh := b.Shell(a)
1428+
barrierAction := a.Deps[0]
1429+
buildAction := barrierAction.Deps[0]
14031430

14041431
// Wait for previous test to get started and print its first json line.
14051432
select {
@@ -1530,7 +1557,7 @@ func (r *runTestActor) Act(b *work.Builder, ctx context.Context, a *work.Action)
15301557
// we have different link inputs but the same final binary,
15311558
// we still reuse the cached test result.
15321559
// c.saveOutput will store the result under both IDs.
1533-
r.c.tryCacheWithID(b, a, a.Deps[0].BuildContentID())
1560+
r.c.tryCacheWithID(b, a, buildAction.BuildContentID())
15341561
}
15351562
if r.c.buf != nil {
15361563
if stdout != &buf {
@@ -1581,7 +1608,7 @@ func (r *runTestActor) Act(b *work.Builder, ctx context.Context, a *work.Action)
15811608
// fresh copies of tools to test as part of the testing.
15821609
addToEnv = "GOCOVERDIR=" + gcd
15831610
}
1584-
args := str.StringList(execCmd, a.Deps[0].BuiltTarget(), testlogArg, panicArg, fuzzArg, coverdirArg, testArgs)
1611+
args := str.StringList(execCmd, buildAction.BuiltTarget(), testlogArg, panicArg, fuzzArg, coverdirArg, testArgs)
15851612

15861613
if testCoverProfile != "" {
15871614
// Write coverage to temporary profile, for merging later.
@@ -1741,8 +1768,8 @@ func (r *runTestActor) Act(b *work.Builder, ctx context.Context, a *work.Action)
17411768
// tryCache is called just before the link attempt,
17421769
// to see if the test result is cached and therefore the link is unneeded.
17431770
// It reports whether the result can be satisfied from cache.
1744-
func (c *runCache) tryCache(b *work.Builder, a *work.Action) bool {
1745-
return c.tryCacheWithID(b, a, a.Deps[0].BuildActionID())
1771+
func (c *runCache) tryCache(b *work.Builder, a *work.Action, linkAction *work.Action) bool {
1772+
return c.tryCacheWithID(b, a, linkAction.BuildActionID())
17461773
}
17471774

17481775
func (c *runCache) tryCacheWithID(b *work.Builder, a *work.Action, id string) bool {

src/cmd/go/internal/work/action.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ type Action struct {
9292

9393
buggyInstall bool // is this a buggy install (see -linkshared)?
9494

95-
TryCache func(*Builder, *Action) bool // callback for cache bypass
95+
TryCache func(*Builder, *Action, *Action) bool // callback for cache bypass
9696

9797
CacheExecutable bool // Whether to cache executables produced by link steps
9898

src/cmd/go/internal/work/buildid.go

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,25 @@ var (
401401
stdlibRecompiledIncOnce = sync.OnceFunc(stdlibRecompiled.Inc)
402402
)
403403

404+
// testRunAction returns the run action for a test given the link action
405+
// for the test binary, if the only (non-test-barrier) action that depend
406+
// on the link action is the run action.
407+
func testRunAction(a *Action) *Action {
408+
if len(a.triggers) != 1 || a.triggers[0].Mode != "test barrier" {
409+
return nil
410+
}
411+
var runAction *Action
412+
for _, t := range a.triggers[0].triggers {
413+
if t.Mode == "test run" {
414+
if runAction != nil {
415+
return nil
416+
}
417+
runAction = t
418+
}
419+
}
420+
return runAction
421+
}
422+
404423
// useCache tries to satisfy the action a, which has action ID actionHash,
405424
// by using a cached result from an earlier build.
406425
// If useCache decides that the cache can be used, it sets a.buildID
@@ -526,7 +545,7 @@ func (b *Builder) useCache(a *Action, actionHash cache.ActionID, target string,
526545
// then to avoid the link step, report the link as up-to-date.
527546
// We avoid the nested build ID problem in the previous special case
528547
// by recording the test results in the cache under the action ID half.
529-
if len(a.triggers) == 1 && a.triggers[0].TryCache != nil && a.triggers[0].TryCache(b, a.triggers[0]) {
548+
if ra := testRunAction(a); ra != nil && ra.TryCache != nil && ra.TryCache(b, ra, a) {
530549
// Best effort attempt to display output from the compile and link steps.
531550
// If it doesn't work, it doesn't work: reusing the test result is more
532551
// important than reprinting diagnostic information.

src/cmd/go/internal/work/cover.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,9 @@ func (b *Builder) CovData(a *Action, cmdargs ...any) ([]byte, error) {
3636
// but will be empty; in this case the return is an empty string.
3737
func BuildActionCoverMetaFile(runAct *Action) (string, error) {
3838
p := runAct.Package
39-
for i := range runAct.Deps {
40-
pred := runAct.Deps[i]
39+
barrierAct := runAct.Deps[0]
40+
for i := range barrierAct.Deps {
41+
pred := barrierAct.Deps[i]
4142
if pred.Mode != "build" || pred.Package == nil {
4243
continue
4344
}

src/cmd/go/internal/work/exec.go

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,21 @@ func (b *Builder) Do(ctx context.Context, root *Action) {
183183

184184
for _, a0 := range a.triggers {
185185
if a.Failed != nil {
186-
a0.Failed = a.Failed
186+
if a0.Mode == "test barrier" {
187+
// If this action was triggered by a test, there
188+
// will be a test barrier action in between the test
189+
// and the true trigger. But there will be other
190+
// triggers that are other barriers that are waiting
191+
// for this one. Propagate the failure to the true
192+
// trigger, but not to the other barriers.
193+
for _, bt := range a0.triggers {
194+
if bt.Mode != "test barrier" {
195+
bt.Failed = a.Failed
196+
}
197+
}
198+
} else {
199+
a0.Failed = a.Failed
200+
}
187201
}
188202
if a0.pending--; a0.pending == 0 {
189203
b.ready.push(a0)

0 commit comments

Comments
 (0)