@@ -14,7 +14,6 @@ import (
14
14
"path/filepath"
15
15
"regexp"
16
16
"strings"
17
- "sync"
18
17
"sync/atomic"
19
18
"text/tabwriter"
20
19
"time"
@@ -25,6 +24,7 @@ import (
25
24
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry"
26
25
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestutil/clusterupgrade"
27
26
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestutil/task"
27
+ "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test"
28
28
"github.com/cockroachdb/cockroach/pkg/roachpb"
29
29
"github.com/cockroachdb/cockroach/pkg/roachprod/install"
30
30
"github.com/cockroachdb/cockroach/pkg/roachprod/logger"
@@ -33,23 +33,9 @@ import (
33
33
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
34
34
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
35
35
"github.com/cockroachdb/errors"
36
- "golang.org/x/exp/maps"
37
36
)
38
37
39
38
type (
40
- // crdbMonitor is a thin wrapper around the roachtest monitor API
41
- // (cluster.NewMonitor) that produces error events through a channel
42
- // whenever an unexpected node death happens. It also allows us to
43
- // provide an API for test authors to inform the framework that a
44
- // node death is expected if the test performs its own restarts or
45
- // chaos events.
46
- crdbMonitor struct {
47
- once sync.Once
48
- crdbNodes option.NodeListOption
49
- monitor cluster.Monitor
50
- errCh chan error
51
- }
52
-
53
39
serviceRuntime struct {
54
40
descriptor * ServiceDescriptor
55
41
binaryVersions * atomic.Value
72
58
logger * logger.Logger
73
59
74
60
background task.Manager
75
- monitor * crdbMonitor
61
+ monitor test. Monitor
76
62
77
63
// ranUserHooks keeps track of whether the runner has run any
78
64
// user-provided hooks so far.
@@ -111,6 +97,7 @@ func newTestRunner(
111
97
ctx context.Context ,
112
98
cancel context.CancelFunc ,
113
99
plan * TestPlan ,
100
+ rt test.Test ,
114
101
tag string ,
115
102
l * logger.Logger ,
116
103
c cluster.Cluster ,
@@ -142,7 +129,7 @@ func newTestRunner(
142
129
tenantService : tenantService ,
143
130
cluster : c ,
144
131
background : task .NewManager (ctx , l ),
145
- monitor : newCRDBMonitor ( ctx , c , maps . Keys ( allCRDBNodes ) ),
132
+ monitor : rt . Monitor ( ),
146
133
ranUserHooks : & ranUserHooks ,
147
134
}
148
135
}
@@ -193,9 +180,6 @@ func (tr *testRunner) run() (retErr error) {
193
180
}
194
181
195
182
return fmt .Errorf ("background step `%s` returned error: %w" , event .Name , event .Err )
196
-
197
- case err := <- tr .monitor .Err ():
198
- return tr .testFailure (tr .ctx , err , tr .logger , nil )
199
183
}
200
184
}
201
185
}
@@ -388,11 +372,6 @@ func (tr *testRunner) teardown(stepsChan chan error, testFailed bool) {
388
372
tr .logger .Printf ("stopping background functions" )
389
373
tr .background .Terminate (tr .logger )
390
374
391
- tr .logger .Printf ("stopping node monitor" )
392
- if err := tr .monitor .Stop (); err != nil {
393
- tr .logger .Printf ("monitor returned error: %v" , err )
394
- }
395
-
396
375
// If the test failed, we wait for any currently running steps to
397
376
// return before passing control back to the roachtest
398
377
// framework. This achieves a test.log that does not contain any
@@ -614,22 +593,6 @@ func (tr *testRunner) refreshServiceData(ctx context.Context, service *serviceRu
614
593
return err
615
594
}
616
595
617
- // We only want to start the monitor once we know every relevant
618
- // cockroach binary is running. This is due to a limitation on the
619
- // roachprod monitor: it is only able to monitor cockroach processes
620
- // that are running at the time the monitor is created.
621
- //
622
- // For system-only and separate-process deployments, we can
623
- // initialize the monitor right away, since this function is only
624
- // called once the storage cluster is running. For separate-process
625
- // deployments, we start the monitor if this function is called with
626
- // the tenant service. The system is always started first, so when
627
- // this function is called with the tenant service, we know that
628
- // every relevant cockroach binary is running at this point.
629
- if tr .plan .deploymentMode != SeparateProcessDeployment || ! isSystem {
630
- tr .monitor .Init ()
631
- }
632
-
633
596
return nil
634
597
}
635
598
@@ -749,46 +712,6 @@ func (tr *testRunner) addGrafanaAnnotation(
749
712
return tr .cluster .AddGrafanaAnnotation (ctx , l , req )
750
713
}
751
714
752
- func newCRDBMonitor (
753
- ctx context.Context , c cluster.Cluster , crdbNodes option.NodeListOption ,
754
- ) * crdbMonitor {
755
- return & crdbMonitor {
756
- crdbNodes : crdbNodes ,
757
- monitor : c .NewMonitor (ctx , crdbNodes ),
758
- errCh : make (chan error ),
759
- }
760
- }
761
-
762
- // Init must be called once the cluster is initialized and the
763
- // cockroach process is running on the nodes. Init is idempotent.
764
- func (cm * crdbMonitor ) Init () {
765
- cm .once .Do (func () {
766
- go func () {
767
- if err := cm .monitor .WaitForNodeDeath (); err != nil {
768
- cm .errCh <- err
769
- }
770
- }()
771
- })
772
- }
773
-
774
- // Err returns a channel that will receive errors whenever an
775
- // unexpected node death is observed.
776
- func (cm * crdbMonitor ) Err () chan error {
777
- return cm .errCh
778
- }
779
-
780
- func (cm * crdbMonitor ) ExpectDeaths (n int ) {
781
- cm .monitor .ExpectDeaths (int32 (n ))
782
- }
783
-
784
- func (cm * crdbMonitor ) Stop () error {
785
- if cm .monitor == nil { // test-only
786
- return nil
787
- }
788
-
789
- return cm .monitor .WaitE ()
790
- }
791
-
792
715
// tableWriter is a thin wrapper around the `tabwriter` package used
793
716
// by the test runner to display logical and released binary versions
794
717
// in a tabular format.
0 commit comments