Skip to content

Commit 1757367

Browse files
committed
testcluster: assign unique ClusterName
This commit makes TestCluster by default assign unique cluster name in the TestServerArgs. The cluster name is shared by all participating or added nodes, unless overridden. Epic: none Release note: none
1 parent a59328e commit 1757367

File tree

8 files changed

+72
-37
lines changed

8 files changed

+72
-37
lines changed

pkg/base/test_server_args.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,14 @@ type TestServerArgs struct {
122122
UseDatabase string
123123

124124
// If set, this will be configured in the test server to check connections
125-
// from other test servers and to report in the SQL introspection.
125+
// from other test servers and to report in the SQL introspection. It is
126+
// advised to make the name sufficiently unique, in order to prevent a
127+
// TestCluster from accidentally getting messages from unrelated clusters in
128+
// the same environment that used the same TCP ports recently (e.g. see
129+
// https://github.com/cockroachdb/cockroach/issues/157838).
130+
//
131+
// If empty (most cases), a unique ClusterName is generated automatically, or
132+
// a higher-level default is used (e.g. taken from TestClusterArgs).
126133
ClusterName string
127134

128135
// Stopper can be used to stop the server. If not set, a stopper will be

pkg/cli/debug_recover_loss_of_quorum_test.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,8 +137,8 @@ func TestCollectInfoFromOnlineCluster(t *testing.T) {
137137
"recover",
138138
"collect-info",
139139
"--insecure",
140-
"--host",
141-
tc.Server(2).AdvRPCAddr(),
140+
"--host", tc.Server(2).AdvRPCAddr(),
141+
"--cluster-name", tc.ClusterName(),
142142
replicaInfoFileName,
143143
})
144144

@@ -554,6 +554,7 @@ func TestHalfOnlineLossOfQuorumRecovery(t *testing.T) {
554554
"--confirm=y",
555555
"--certs-dir=test_certs",
556556
"--host=" + tc.Server(0).AdvRPCAddr(),
557+
"--cluster-name=" + tc.ClusterName(),
557558
"--plan=" + planFile,
558559
})
559560
require.NoError(t, err, "failed to run make-plan")
@@ -577,6 +578,7 @@ func TestHalfOnlineLossOfQuorumRecovery(t *testing.T) {
577578
"debug", "recover", "apply-plan",
578579
"--certs-dir=test_certs",
579580
"--host=" + tc.Server(0).AdvRPCAddr(),
581+
"--cluster-name=" + tc.ClusterName(),
580582
"--confirm=y", planFile,
581583
})
582584
require.NoError(t, err, "failed to run apply plan")
@@ -592,6 +594,7 @@ func TestHalfOnlineLossOfQuorumRecovery(t *testing.T) {
592594
"debug", "recover", "verify",
593595
"--certs-dir=test_certs",
594596
"--host=" + tc.Server(0).AdvRPCAddr(),
597+
"--cluster-name=" + tc.ClusterName(),
595598
planFile,
596599
})
597600
require.NoError(t, err, "failed to run verify plan")
@@ -641,6 +644,7 @@ func TestHalfOnlineLossOfQuorumRecovery(t *testing.T) {
641644
"debug", "recover", "verify",
642645
"--certs-dir=test_certs",
643646
"--host=" + tc.Server(0).AdvRPCAddr(),
647+
"--cluster-name=" + tc.ClusterName(),
644648
planFile,
645649
})
646650
require.NoError(t, err, "failed to run verify plan")

pkg/cli/testdata/zip/partial1

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
zip
22
----
3-
debug zip --concurrency=1 --cpu-profile-duration=0s --validate-zip-file=false /dev/null
3+
debug zip --concurrency=1 --cpu-profile-duration=0s --validate-zip-file=false --cluster-name=<cluster-name> /dev/null
44
[cluster] discovering virtual clusters... done
55
[cluster] creating output file /dev/null... done
66
[cluster] establishing RPC connection to ...

pkg/cli/testdata/zip/partial1_excluded

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
zip
22
----
3-
debug zip /dev/null --concurrency=1 --exclude-nodes=2 --cpu-profile-duration=0 --validate-zip-file=false
3+
debug zip --concurrency=1 --exclude-nodes=2 --cpu-profile-duration=0 --validate-zip-file=false --cluster-name=<cluster-name> /dev/null
44
[cluster] discovering virtual clusters... done
55
[cluster] creating output file /dev/null... done
66
[cluster] establishing RPC connection to ...

pkg/cli/testdata/zip/partial2

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
zip
22
----
3-
debug zip --concurrency=1 --cpu-profile-duration=0 --validate-zip-file=false /dev/null
3+
debug zip --concurrency=1 --cpu-profile-duration=0 --validate-zip-file=false --cluster-name=<cluster-name> /dev/null
44
[cluster] discovering virtual clusters... done
55
[cluster] creating output file /dev/null... done
66
[cluster] establishing RPC connection to ...

pkg/cli/testdata/zip/testzip_concurrent

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -227,4 +227,4 @@ zip
227227
[node ?] ? log files found
228228
[node ?] ? log files found
229229
[node ?] ? log files found
230-
debug zip --timeout=30s --cpu-profile-duration=0s --validate-zip-file=false /dev/null
230+
debug zip --timeout=30s --cpu-profile-duration=0s --validate-zip-file=false --cluster-name=<cluster-name> /dev/null

pkg/cli/zip_test.go

Lines changed: 40 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -421,10 +421,11 @@ func TestConcurrentZip(t *testing.T) {
421421
defer func(prevStderr *os.File) { stderr = prevStderr }(stderr)
422422
stderr = os.Stdout
423423

424-
out, err := c.RunWithCapture("debug zip --timeout=30s --cpu-profile-duration=0s --validate-zip-file=false " + os.DevNull)
425-
if err != nil {
426-
t.Fatal(err)
427-
}
424+
out, err := c.RunWithCapture(fmt.Sprintf(
425+
"debug zip --timeout=30s --cpu-profile-duration=0s --validate-zip-file=false --cluster-name=%s %s",
426+
tc.ClusterName(), os.DevNull,
427+
))
428+
require.NoError(t, err)
428429

429430
// Strip any non-deterministic messages.
430431
out = eraseNonDeterministicZipOutput(out)
@@ -437,6 +438,8 @@ func TestConcurrentZip(t *testing.T) {
437438
// which the original messages interleve with other messages mean the number
438439
// of them after each series is collapsed is also non-derministic.
439440
out = regexp.MustCompile(`<dumping SQL tables>\n`).ReplaceAllString(out, "")
441+
// Replace the non-deterministic cluster name with a placeholder.
442+
out = eraseClusterName(out, tc.ClusterName())
440443

441444
// We use datadriven simply to read the golden output file; we don't actually
442445
// run any commands. Using datadriven allows TESTFLAGS=-rewrite.
@@ -541,9 +544,8 @@ func TestUnavailableZip(t *testing.T) {
541544
tc := testcluster.StartTestCluster(t, 3,
542545
base.TestClusterArgs{ServerArgs: base.TestServerArgs{
543546
DefaultTestTenant: base.TestIsSpecificToStorageLayerAndNeedsASystemTenant,
544-
545-
Insecure: true,
546-
Knobs: base.TestingKnobs{Store: knobs},
547+
Insecure: true,
548+
Knobs: base.TestingKnobs{Store: knobs},
547549
}})
548550
defer tc.Stopper().Stop(context.Background())
549551

@@ -559,9 +561,10 @@ func TestUnavailableZip(t *testing.T) {
559561
defer close(ch)
560562

561563
// Run debug zip against node 1.
562-
debugZipCommand :=
563-
"debug zip --concurrency=1 --cpu-profile-duration=0 " + os.
564-
DevNull + " --timeout=.5s"
564+
debugZipCommand := fmt.Sprintf(
565+
"debug zip --concurrency=1 --cpu-profile-duration=0 --timeout=.5s --cluster-name=%s %s",
566+
tc.ClusterName(), os.DevNull,
567+
)
565568

566569
t.Run("server 1", func(t *testing.T) {
567570
c := TestCLI{
@@ -651,6 +654,10 @@ func baseZipOutput(nodeId int) []string {
651654
return output
652655
}
653656

657+
func eraseClusterName(str, name string) string {
658+
return strings.ReplaceAll(str, name, "<cluster-name>")
659+
}
660+
654661
func eraseNonDeterministicZipOutput(out string) string {
655662
re := regexp.MustCompile(`(?m)postgresql://.*$`)
656663
out = re.ReplaceAllString(out, `postgresql://...`)
@@ -736,13 +743,15 @@ func TestPartialZip(t *testing.T) {
736743
defer func(prevStderr *os.File) { stderr = prevStderr }(stderr)
737744
stderr = os.Stdout
738745

739-
out, err := c.RunWithCapture("debug zip --concurrency=1 --cpu-profile-duration=0s --validate-zip-file=false " + os.DevNull)
740-
if err != nil {
741-
t.Fatal(err)
742-
}
746+
out, err := c.RunWithCapture(fmt.Sprintf(
747+
"debug zip --concurrency=1 --cpu-profile-duration=0s --validate-zip-file=false --cluster-name=%s %s",
748+
tc.ClusterName(), os.DevNull,
749+
))
750+
require.NoError(t, err)
743751

744752
// Strip any non-deterministic messages.
745753
t.Log(out)
754+
out = eraseClusterName(out, tc.ClusterName())
746755
out = eraseNonDeterministicZipOutput(out)
747756

748757
datadriven.RunTest(t, datapathutils.TestDataPath(t, "zip", "partial1"),
@@ -751,12 +760,13 @@ func TestPartialZip(t *testing.T) {
751760
})
752761

753762
// Now do it again and exclude the down node explicitly.
754-
out, err = c.RunWithCapture("debug zip " + os.DevNull + " --concurrency=1 --exclude-nodes=2 --cpu-profile-duration=0" +
755-
" --validate-zip-file=false")
756-
if err != nil {
757-
t.Fatal(err)
758-
}
763+
out, err = c.RunWithCapture(fmt.Sprintf(
764+
"debug zip --concurrency=1 --exclude-nodes=2 --cpu-profile-duration=0 --validate-zip-file=false --cluster-name=%s %s",
765+
tc.ClusterName(), os.DevNull,
766+
))
767+
require.NoError(t, err)
759768

769+
out = eraseClusterName(out, tc.ClusterName())
760770
out = eraseNonDeterministicZipOutput(out)
761771
datadriven.RunTest(t, datapathutils.TestDataPath(t, "zip", "partial1_excluded"),
762772
func(t *testing.T, td *datadriven.TestData) string {
@@ -767,12 +777,11 @@ func TestPartialZip(t *testing.T) {
767777
// skips over it automatically. We specifically use --wait=none because
768778
// we're decommissioning a node in a 3-node cluster, so there's no node to
769779
// up-replicate the under-replicated ranges to.
770-
{
771-
_, err := c.RunWithCapture(fmt.Sprintf("node decommission --checks=skip --wait=none %d", 2))
772-
if err != nil {
773-
t.Fatal(err)
774-
}
775-
}
780+
_, err = c.RunWithCapture(fmt.Sprintf(
781+
"node decommission --checks=skip --wait=none --cluster-name=%s %d",
782+
tc.ClusterName(), 2,
783+
))
784+
require.NoError(t, err)
776785

777786
// We use .Override() here instead of SET CLUSTER SETTING in SQL to
778787
// override the 1m15s minimum placed on the cluster setting. There
@@ -787,12 +796,13 @@ func TestPartialZip(t *testing.T) {
787796
datadriven.RunTest(t, datapathutils.TestDataPath(t, "zip", "partial2"),
788797
func(t *testing.T, td *datadriven.TestData) string {
789798
f := func() string {
790-
out, err := c.RunWithCapture("debug zip --concurrency=1 --cpu-profile-duration=0 --validate-zip-file=false " + os.DevNull)
791-
if err != nil {
792-
t.Fatal(err)
793-
}
794-
799+
out, err := c.RunWithCapture(fmt.Sprintf(
800+
"debug zip --concurrency=1 --cpu-profile-duration=0 --validate-zip-file=false --cluster-name=%s %s",
801+
tc.ClusterName(), os.DevNull,
802+
))
803+
require.NoError(t, err)
795804
// Strip any non-deterministic messages.
805+
out = eraseClusterName(out, tc.ClusterName())
796806
return eraseNonDeterministicZipOutput(out)
797807
}
798808

pkg/testutils/testcluster/testcluster.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"context"
1010
gosql "database/sql"
1111
"fmt"
12+
"math/rand/v2"
1213
"net"
1314
"os"
1415
"reflect"
@@ -86,6 +87,11 @@ type TestCluster struct {
8687

8788
var _ serverutils.TestClusterInterface = &TestCluster{}
8889

90+
// ClusterName returns the configured or auto-generated cluster name.
91+
func (tc *TestCluster) ClusterName() string {
92+
return tc.clusterArgs.ServerArgs.ClusterName
93+
}
94+
8995
// NumServers is part of TestClusterInterface.
9096
func (tc *TestCluster) NumServers() int {
9197
return len(tc.Servers)
@@ -317,6 +323,11 @@ func NewTestCluster(
317323
if clusterArgs.StartSingleNode && nodes > 1 {
318324
t.Fatal("StartSingleNode implies 1 node only, but asked to create", nodes)
319325
}
326+
if clusterArgs.ServerArgs.ClusterName == "" {
327+
// Use a cluster name that is sufficiently unique (within the CI env) but is
328+
// concise and recognizable.
329+
clusterArgs.ServerArgs.ClusterName = fmt.Sprintf("TestCluster-%d", rand.Uint32())
330+
}
320331

321332
if err := checkServerArgsForCluster(
322333
clusterArgs.ServerArgs, clusterArgs.ReplicationMode, disallowJoinAddr,
@@ -634,6 +645,9 @@ func (tc *TestCluster) AddServer(
634645
if serverArgs.JoinAddr != "" {
635646
serverArgs.NoAutoInitializeCluster = true
636647
}
648+
if serverArgs.ClusterName == "" {
649+
serverArgs.ClusterName = tc.clusterArgs.ServerArgs.ClusterName
650+
}
637651

638652
// Check args even though we have called checkServerArgsForCluster()
639653
// already in NewTestCluster(). AddServer might be called for servers

0 commit comments

Comments
 (0)