Skip to content

Commit ecfe6f2

Browse files
committed
failureinjection: catch runAsync panics as errors
Now that the failureinjection framework can spin up goroutines, make sure to catch all panics as errors. This keeps it compatible with the roachtest framework, where we don't want any uncaught panics to take down an entire run.
1 parent 8ab6731 commit ecfe6f2

File tree

7 files changed

+50
-27
lines changed

7 files changed

+50
-27
lines changed

pkg/roachprod/failureinjection/failures/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ go_library(
1818
"//pkg/roachprod",
1919
"//pkg/roachprod/install",
2020
"//pkg/roachprod/logger",
21+
"//pkg/roachprod/roachprodutil",
2122
"//pkg/util/retry",
2223
"//pkg/util/syncutil",
2324
"//pkg/util/sysutil",

pkg/roachprod/failureinjection/failures/failure.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515
"github.com/cockroachdb/cockroach/pkg/roachprod"
1616
"github.com/cockroachdb/cockroach/pkg/roachprod/install"
1717
"github.com/cockroachdb/cockroach/pkg/roachprod/logger"
18+
"github.com/cockroachdb/cockroach/pkg/roachprod/roachprodutil"
1819
"github.com/cockroachdb/cockroach/pkg/util/retry"
1920
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
2021
"github.com/cockroachdb/errors"
@@ -284,10 +285,12 @@ func forEachNode(nodes install.Nodes, fn func(install.Nodes) error) error {
284285
return nil
285286
}
286287

287-
func runAsync(f func() error) <-chan error {
288+
func runAsync(ctx context.Context, l *logger.Logger, f func(context.Context) error) <-chan error {
288289
errCh := make(chan error, 1)
289290
go func() {
290-
err := f()
291+
err := roachprodutil.PanicAsError(ctx, l, func(context.Context) error {
292+
return f(ctx)
293+
})
291294
errCh <- err
292295
close(errCh)
293296
}()

pkg/roachprod/failureinjection/failures/process_kill.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ func (f *ProcessKillFailure) Inject(ctx context.Context, l *logger.Logger, args
8787
// WaitForFailureToPropagate. We run Stop in a goroutine to achieve this, although it
8888
// does mean we will ignore all errors unless the user also calls WaitForFailureToPropagate.
8989
// We make this tradeoff in order to avoid maintaining two different Stop implementations.
90-
f.waitCh = runAsync(func() error {
90+
f.waitCh = runAsync(ctx, l, func(ctx context.Context) error {
9191
return f.c.WithNodes(nodes).Stop(ctx, l, int(signal), true, gracePeriod, label)
9292
})
9393
return nil

pkg/roachprod/install/BUILD.bazel

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,14 @@ go_library(
3333
"//pkg/roachprod/config",
3434
"//pkg/roachprod/errors",
3535
"//pkg/roachprod/logger",
36+
"//pkg/roachprod/roachprodutil",
3637
"//pkg/roachprod/ssh",
3738
"//pkg/roachprod/ui",
3839
"//pkg/roachprod/vm",
3940
"//pkg/roachprod/vm/aws",
4041
"//pkg/roachprod/vm/gce",
4142
"//pkg/roachprod/vm/local",
4243
"//pkg/testutils",
43-
"//pkg/util/debugutil",
4444
"//pkg/util/intsets",
4545
"//pkg/util/log",
4646
"//pkg/util/retry",

pkg/roachprod/install/cockroach.go

Lines changed: 2 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,10 @@ import (
2525
"github.com/cockroachdb/cockroach/pkg/ccl/utilccl/licenseccl"
2626
"github.com/cockroachdb/cockroach/pkg/roachprod/config"
2727
"github.com/cockroachdb/cockroach/pkg/roachprod/logger"
28+
"github.com/cockroachdb/cockroach/pkg/roachprod/roachprodutil"
2829
"github.com/cockroachdb/cockroach/pkg/roachprod/ssh"
2930
"github.com/cockroachdb/cockroach/pkg/roachprod/vm/gce"
3031
"github.com/cockroachdb/cockroach/pkg/testutils"
31-
"github.com/cockroachdb/cockroach/pkg/util/debugutil"
3232
"github.com/cockroachdb/cockroach/pkg/util/retry"
3333
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
3434
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
@@ -443,7 +443,7 @@ func (c *SyncedCluster) Start(ctx context.Context, l *logger.Logger, startOpts S
443443
for _, hook := range startOpts.PreStartHooks {
444444
hookCtx, cancel := context.WithTimeout(ctx, hook.Timeout)
445445
l.Printf("running pre-start hook: %s", hook.Name)
446-
err := panicAsError(hookCtx, l, hook.Fn)
446+
err := roachprodutil.PanicAsError(hookCtx, l, hook.Fn)
447447
cancel()
448448
if err != nil {
449449
return err
@@ -1703,23 +1703,3 @@ type PreStartHook struct {
17031703
Fn func(context.Context) error
17041704
Timeout time.Duration
17051705
}
1706-
1707-
// logPanicToErr logs the panic stack trace and returns an error with the
1708-
// panic message.
1709-
func panicAsError(
1710-
ctx context.Context, l *logger.Logger, f func(context.Context) error,
1711-
) (retErr error) {
1712-
defer func() {
1713-
if r := recover(); r != nil {
1714-
retErr = logPanicToErr(l, r)
1715-
}
1716-
}()
1717-
return f(ctx)
1718-
}
1719-
1720-
// logPanicToErr logs the panic stack trace and returns an error with the
1721-
// panic message.
1722-
func logPanicToErr(l *logger.Logger, r interface{}) error {
1723-
l.Printf("panic stack trace:\n%s", debugutil.Stack())
1724-
return fmt.Errorf("panic (stack trace above): %v", r)
1725-
}

pkg/roachprod/roachprodutil/BUILD.bazel

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,15 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library")
22

33
go_library(
44
name = "roachprodutil",
5-
srcs = ["identity.go"],
5+
srcs = [
6+
"identity.go",
7+
"utils.go",
8+
],
69
importpath = "github.com/cockroachdb/cockroach/pkg/roachprod/roachprodutil",
710
visibility = ["//visibility:public"],
811
deps = [
12+
"//pkg/roachprod/logger",
13+
"//pkg/util/debugutil",
914
"@com_github_cockroachdb_errors//:errors",
1015
"@com_google_cloud_go_storage//:storage",
1116
"@org_golang_google_api//idtoken",
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// Copyright 2025 The Cockroach Authors.
2+
//
3+
// Use of this software is governed by the CockroachDB Software License
4+
// included in the /LICENSE file.
5+
6+
package roachprodutil
7+
8+
import (
9+
"context"
10+
"fmt"
11+
12+
"github.com/cockroachdb/cockroach/pkg/roachprod/logger"
13+
"github.com/cockroachdb/cockroach/pkg/util/debugutil"
14+
)
15+
16+
// PanicAsError logs the panic stack trace and returns an error with the
17+
// panic message.
18+
func PanicAsError(
19+
ctx context.Context, l *logger.Logger, f func(context.Context) error,
20+
) (retErr error) {
21+
defer func() {
22+
if r := recover(); r != nil {
23+
retErr = logPanicToErr(l, r)
24+
}
25+
}()
26+
return f(ctx)
27+
}
28+
29+
// logPanicToErr logs the panic stack trace and returns an error with the
30+
// panic message.
31+
func logPanicToErr(l *logger.Logger, r interface{}) error {
32+
l.Printf("panic stack trace:\n%s", debugutil.Stack())
33+
return fmt.Errorf("panic (stack trace above): %v", r)
34+
}

0 commit comments

Comments
 (0)