Skip to content

Commit 9149e0e

Browse files
craig[bot]jayshrivastavayuzefovich
committed
107271: roachtest/cdc: add clear error message when a golang cannot be installed r=jayshrivastava a=jayshrivastava ### roachtest/cdc: add clear error message when a golang cannot be installed Previously, the test would fail immediately if installing golang, which is currently required to run the mock webhook sink, would fail. This change adds retries for up to 5 mins. If installation still fails after 5 minutes, the error is wrapped with a more clear message. Informs: cockroachdb#107088 Epic: None Release note: None --- ### roachtest: add retries to Cluster.Install This change adds retries to `Cluster.Install` to mitigate test failures due to package installation failures. The webhook test in `cdc.go` is updated to use `Cluster.Install` to install `go`. The install command for `go` is just `sudo apt --yes install golang-go;` now. No previously existing uses of `Cluster.Install` used this command, so changing it should be safe. Informs: cockroachdb#103316 Informs: cockroachdb#107088 Release note: None Epic: None 107366: randgen: skip virtual columns in generate_test_objects r=yuzefovich a=yuzefovich If we don't, then `validateTableIndexes` will fail. Found when running `local` logic tests against the test tenant. Epic: None Release note: None Co-authored-by: Jayant Shrivastava <[email protected]> Co-authored-by: Yahor Yuzefovich <[email protected]>
3 parents 832e5f2 + f0a3de3 + e4a4212 commit 9149e0e

File tree

6 files changed

+47
-17
lines changed

6 files changed

+47
-17
lines changed

pkg/cmd/roachtest/tests/cdc.go

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,16 @@ type cdcTester struct {
9191
doneCh chan struct{}
9292
}
9393

94+
// The node on which the webhook sink will be installed and run on.
95+
func (ct *cdcTester) webhookSinkNode() option.NodeListOption {
96+
return ct.cluster.Node(ct.cluster.Spec().NodeCount)
97+
}
98+
99+
// The node on which the kafka sink will be installed and run on.
100+
func (ct *cdcTester) kafkaSinkNode() option.NodeListOption {
101+
return ct.cluster.Node(ct.cluster.Spec().NodeCount)
102+
}
103+
94104
// startStatsCollection sets the start point of the stats collection window
95105
// and returns a function which should be called at the end of the test to dump a
96106
// stats.json file to the artifacts directory.
@@ -158,7 +168,7 @@ func (ct *cdcTester) setupSink(args feedArgs) string {
158168
sinkURI = `experimental-gs://cockroach-tmp/roachtest/` + ts + "?AUTH=implicit"
159169
case webhookSink:
160170
ct.t.Status("webhook install")
161-
webhookNode := ct.cluster.Node(ct.cluster.Spec().NodeCount)
171+
webhookNode := ct.webhookSinkNode()
162172
rootFolder := `/home/ubuntu`
163173
nodeIPs, _ := ct.cluster.ExternalIP(ct.ctx, ct.logger, webhookNode)
164174

@@ -184,8 +194,6 @@ func (ct *cdcTester) setupSink(args feedArgs) string {
184194
ct.t.Fatal(err)
185195
}
186196

187-
ct.cluster.Run(ct.ctx, webhookNode, `sudo apt --yes install golang-go;`)
188-
189197
// Start the server in its own monitor to not block ct.mon.Wait()
190198
serverExecCmd := fmt.Sprintf(`go run webhook-server-%d.go`, webhookPort)
191199
m := ct.cluster.NewMonitor(ct.ctx, ct.workloadNode)
@@ -206,7 +214,7 @@ func (ct *cdcTester) setupSink(args feedArgs) string {
206214
case pubsubSink:
207215
sinkURI = changefeedccl.GcpScheme + `://cockroach-ephemeral` + "?AUTH=implicit&topic_name=pubsubSink-roachtest&region=us-east1"
208216
case kafkaSink:
209-
kafkaNode := ct.cluster.Node(ct.cluster.Spec().NodeCount)
217+
kafkaNode := ct.kafkaSinkNode()
210218
kafka := kafkaManager{
211219
t: ct.t,
212220
c: ct.cluster,
@@ -1290,6 +1298,13 @@ func registerCDC(r registry.Registry) {
12901298
ct := newCDCTester(ctx, t, c)
12911299
defer ct.Close()
12921300

1301+
// Consider an installation failure to be a flake which is out of
1302+
// our control. This should be rare.
1303+
err := c.Install(ctx, t.L(), ct.webhookSinkNode(), "go")
1304+
if err != nil {
1305+
t.Skip(err)
1306+
}
1307+
12931308
ct.runTPCCWorkload(tpccArgs{warehouses: 100, duration: "30m"})
12941309

12951310
// The deprecated webhook sink is unable to handle the throughput required for 100 warehouses
@@ -1338,7 +1353,7 @@ func registerCDC(r registry.Registry) {
13381353

13391354
ct.runTPCCWorkload(tpccArgs{warehouses: 1})
13401355

1341-
kafkaNode := ct.cluster.Node(ct.cluster.Spec().NodeCount)
1356+
kafkaNode := ct.kafkaSinkNode()
13421357
kafka := kafkaManager{
13431358
t: ct.t,
13441359
c: ct.cluster,

pkg/roachprod/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ go_library(
2525
"//pkg/server/debug/replay",
2626
"//pkg/util/ctxgroup",
2727
"//pkg/util/httputil",
28+
"//pkg/util/retry",
2829
"//pkg/util/syncutil",
2930
"//pkg/util/timeutil",
3031
"@com_github_cockroachdb_errors//:errors",

pkg/roachprod/install/install.go

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -102,15 +102,7 @@ sudo apt-get update;
102102
sudo apt-get install -y gcc;
103103
`,
104104

105-
// graphviz and rlwrap are useful for pprof
106-
"go": `
107-
sudo apt-get update;
108-
sudo apt-get install -y graphviz rlwrap;
109-
110-
curl https://dl.google.com/go/go1.12.linux-amd64.tar.gz | sudo tar -C /usr/local -xz;
111-
echo 'export PATH=$PATH:/usr/local/go/bin' | sudo tee /etc/profile.d/go.sh > /dev/null;
112-
sudo chmod +x /etc/profile.d/go.sh;
113-
`,
105+
"go": `sudo apt --yes install golang-go;`,
114106

115107
"haproxy": `
116108
sudo apt-get update;

pkg/roachprod/roachprod.go

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ import (
4545
"github.com/cockroachdb/cockroach/pkg/server/debug/replay"
4646
"github.com/cockroachdb/cockroach/pkg/util/ctxgroup"
4747
"github.com/cockroachdb/cockroach/pkg/util/httputil"
48+
"github.com/cockroachdb/cockroach/pkg/util/retry"
4849
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
4950
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
5051
"github.com/cockroachdb/errors"
@@ -833,7 +834,21 @@ func Install(ctx context.Context, l *logger.Logger, clusterName string, software
833834
if err != nil {
834835
return err
835836
}
836-
return install.Install(ctx, l, c, software)
837+
838+
// As seen in #103316, this can hit a 503 Service Unavailable when
839+
// trying to download the package, so we retry every 30 seconds
840+
// for up to 5 mins below. The caller may choose to fail or skip the test.
841+
return retry.WithMaxAttempts(ctx, retry.Options{
842+
InitialBackoff: 30 * time.Second,
843+
Multiplier: 1,
844+
}, 10, func() error {
845+
err := install.Install(ctx, l, c, software)
846+
err = errors.Wrapf(err, "retryable infrastructure error: could not install %s", software)
847+
if err != nil {
848+
l.Printf(err.Error())
849+
}
850+
return err
851+
})
837852
}
838853

839854
// Download downloads 3rd party tools, using a GCS cache if possible.

pkg/sql/catalog/randgen/templates.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,8 +138,8 @@ outer:
138138
}
139139

140140
for _, origColDef := range origDesc.Columns {
141-
// We don't take over hidden/inaccessible columns.
142-
if origColDef.Hidden || origColDef.Inaccessible {
141+
// We don't take over hidden/inaccessible/virtual columns.
142+
if origColDef.Hidden || origColDef.Inaccessible || origColDef.Virtual {
143143
continue
144144
}
145145
colID := t.desc.NextColumnID

pkg/sql/logictest/testdata/logic_test/gen_test_objects

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,12 @@ SELECT crdb_internal.generate_test_objects('{"seed":1234,"counts":[10],"table_te
169169
----
170170
{"databases": 0, "schemas": 0, "tables": 10}
171171

172+
# Regression test for not ignoring virtual columns.
173+
query T
174+
SELECT crdb_internal.generate_test_objects('{"seed":1234,"counts":[1],"table_templates":["system.statement_statistics"]}'::JSONB)->'generated_counts'
175+
----
176+
{"databases": 0, "schemas": 0, "tables": 1}
177+
172178
query T
173179
SELECT table_name FROM [SHOW TABLES]
174180
ORDER BY table_name
@@ -182,6 +188,7 @@ priVileges
182188
replication_critical_localities
183189
role_id_seq
184190
statement_bundle_chunks
191+
sta😣tement_statistics
185192
ten ant_set tings
186193

187194
# Again, the column names are randomized.

0 commit comments

Comments
 (0)