Skip to content

Commit d598226

Browse files
committed
roachtest: adding defensive code in ceph/reef test
We have seen sporadic failures in the ceph tests, due to failures in creating users in the ceph object gateway. To address this we are adding code to check that the gateway is up by submitting a read only request, before attempting to add the user. Epic: none Fixes: #148731 Release note: None
1 parent 7898c83 commit d598226

File tree

1 file changed

+23
-1
lines changed

1 file changed

+23
-1
lines changed

pkg/cmd/roachtest/tests/s3_microceph.go

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,14 @@ import (
1010
"fmt"
1111
"net/url"
1212
"path/filepath"
13+
"time"
1314

1415
"github.com/cockroachdb/cockroach/pkg/cloud/amazon"
1516
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster"
1617
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option"
1718
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test"
19+
"github.com/cockroachdb/cockroach/pkg/roachprod/install"
20+
"github.com/cockroachdb/cockroach/pkg/util/retry"
1821
)
1922

2023
// cephDisksScript creates 3 4GB loop devices, e.g. virtual block devices that allows
@@ -124,7 +127,9 @@ func (m cephManager) install(ctx context.Context) {
124127
rgwCmd = rgwCmd + ` --ssl-certificate="$(base64 -w0 certs/node.crt)" --ssl-private-key="$(base64 -w0 certs/node.key)"`
125128
}
126129
m.run(ctx, `starting object gateway`, rgwCmd)
127-
130+
// We have seen occasional failures in creating users, so we
131+
// wait until a read only request succeeds before proceeding.
132+
m.checkRGW(ctx)
128133
m.run(ctx, `creating backup user`,
129134
`sudo radosgw-admin user create --uid=backup --display-name=backup`)
130135
m.run(ctx, `add keys to the user`,
@@ -166,3 +171,20 @@ func (m cephManager) run(ctx context.Context, msg string, cmd ...string) {
166171
m.c.Run(ctx, option.WithNodes(m.cephNodes), cmd...)
167172
m.t.Status(msg, " done")
168173
}
174+
175+
// checkRGW verifies that the Ceph Object Gateway is up.
176+
func (m cephManager) checkRGW(ctx context.Context) {
177+
m.t.Status("waiting for Ceph Object Gateway...")
178+
if err := m.c.RunE(ctx,
179+
option.WithNodes(m.cephNodes).
180+
WithRetryOpts(retry.Options{
181+
InitialBackoff: 2 * time.Second,
182+
MaxBackoff: 30 * time.Second,
183+
MaxRetries: 10,
184+
}).
185+
WithShouldRetryFn(func(*install.RunResultDetails) bool { return true }),
186+
`sudo radosgw-admin user list`,
187+
); err != nil {
188+
m.t.Error("failed to connect to Ceph Object Gateway", err)
189+
}
190+
}

0 commit comments

Comments
 (0)