Skip to content

Commit 42852c5

Browse files
authored
GODRIVER-3638 Prohibit using failpoints on sharded topologies. (#2168)
1 parent 7d13d02 commit 42852c5

File tree

8 files changed

+82
-23
lines changed

8 files changed

+82
-23
lines changed

internal/integration/crud_prose_test.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -499,7 +499,10 @@ func TestClientBulkWriteProse(t *testing.T) {
499499
assert.Equal(mt, 1, opsCnt[1], "expected %d secondEvent.command.ops, got: %d", 1, opsCnt[1])
500500
})
501501

502-
mt.Run("5. MongoClient.bulkWrite collects WriteConcernErrors across batches", func(mt *mtest.T) {
502+
// TODO(GODRIVER-3328): FailPoints are not currently reliable on sharded
503+
// topologies. Allow running on sharded topologies once that is fixed.
504+
noShardedOpts := mtest.NewOptions().Topologies(mtest.Single, mtest.ReplicaSet, mtest.LoadBalanced)
505+
mt.RunOpts("5. MongoClient.bulkWrite collects WriteConcernErrors across batches", noShardedOpts, func(mt *mtest.T) {
503506
var eventCnt int
504507
monitor := &event.CommandMonitor{
505508
Started: func(_ context.Context, e *event.CommandStartedEvent) {
@@ -715,7 +718,9 @@ func TestClientBulkWriteProse(t *testing.T) {
715718
assert.Equal(mt, 1, getMoreCalled, "expected %d getMore call, got: %d", 1, getMoreCalled)
716719
})
717720

718-
mt.Run("9. MongoClient.bulkWrite handles a getMore error", func(mt *mtest.T) {
721+
// TODO(GODRIVER-3328): FailPoints are not currently reliable on sharded
722+
// topologies. Allow running on sharded topologies once that is fixed.
723+
mt.RunOpts("9. MongoClient.bulkWrite handles a getMore error", noShardedOpts, func(mt *mtest.T) {
719724
var getMoreCalled int
720725
var killCursorsCalled int
721726
monitor := &event.CommandMonitor{

internal/integration/csot_prose_test.go

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,10 @@ func TestCSOTProse_GridFS(t *testing.T) {
238238
mt := mtest.New(t, mtest.NewOptions().CreateClient(false))
239239

240240
mt.RunOpts("6. gridfs - upload", mtest.NewOptions().MinServerVersion("4.4"), func(mt *mtest.T) {
241-
mt.Run("uploads via openUploadStream can be timed out", func(mt *mtest.T) {
241+
// TODO(GODRIVER-3328): FailPoints are not currently reliable on sharded
242+
// topologies. Allow running on sharded topologies once that is fixed.
243+
noShardedOpts := mtest.NewOptions().Topologies(mtest.Single, mtest.ReplicaSet, mtest.LoadBalanced)
244+
mt.RunOpts("uploads via openUploadStream can be timed out", noShardedOpts, func(mt *mtest.T) {
242245
// Drop and re-create the db.fs.files and db.fs.chunks collections.
243246
err := mt.Client.Database("db").Collection("fs.files").Drop(context.Background())
244247
assert.NoError(mt, err, "failed to drop files")
@@ -298,7 +301,9 @@ func TestCSOTProse_GridFS(t *testing.T) {
298301
assert.Error(t, err, context.DeadlineExceeded)
299302
})
300303

301-
mt.Run("Aborting an upload stream can be timed out", func(mt *mtest.T) {
304+
// TODO(GODRIVER-3328): FailPoints are not currently reliable on sharded
305+
// topologies. Allow running on sharded topologies once that is fixed.
306+
mt.RunOpts("Aborting an upload stream can be timed out", noShardedOpts, func(mt *mtest.T) {
302307
// Drop and re-create the db.fs.files and db.fs.chunks collections.
303308
err := mt.Client.Database("db").Collection("fs.files").Drop(context.Background())
304309
assert.NoError(mt, err, "failed to drop files")
@@ -414,7 +419,12 @@ func TestCSOTProse_GridFS(t *testing.T) {
414419
})
415420

416421
const test62 = "6.2 gridfs - upload with operation-level timeout"
417-
mt.RunOpts(test62, mtest.NewOptions().MinServerVersion("4.4"), func(mt *mtest.T) {
422+
mtOpts := mtest.NewOptions().
423+
MinServerVersion("4.4").
424+
// TODO(GODRIVER-3328): FailPoints are not currently reliable on sharded
425+
// topologies. Allow running on sharded topologies once that is fixed.
426+
Topologies(mtest.Single, mtest.ReplicaSet, mtest.LoadBalanced)
427+
mt.RunOpts(test62, mtOpts, func(mt *mtest.T) {
418428
// Drop and re-create the db.fs.files and db.fs.chunks collections.
419429
err := mt.Client.Database("db").Collection("fs.files").Drop(context.Background())
420430
assert.NoError(mt, err, "failed to drop files")

internal/integration/mtest/mongotest.go

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,11 @@ type T struct {
8080
requireAPIVersion *bool
8181

8282
// options copied to sub-tests
83-
clientType ClientType
84-
clientOpts *options.ClientOptions
85-
collOpts *options.CollectionOptionsBuilder
86-
shareClient *bool
83+
clientType ClientType
84+
clientOpts *options.ClientOptions
85+
collOpts *options.CollectionOptionsBuilder
86+
shareClient *bool
87+
allowFailPointsOnSharded bool
8788

8889
baseOpts *Options // used to create subtests
8990

@@ -125,6 +126,9 @@ func newT(wrapped *testing.T, opts ...*Options) *T {
125126
if t.shareClient != nil {
126127
t.baseOpts.ShareClient(*t.shareClient)
127128
}
129+
if t.allowFailPointsOnSharded {
130+
t.baseOpts.AllowFailPointsOnSharded()
131+
}
128132

129133
return t
130134
}
@@ -501,6 +505,21 @@ func (t *T) ClearCollections() {
501505
// SetFailPoint sets a fail point for the client associated with T. Commands to create the failpoint will appear
502506
// in command monitoring channels. The fail point will automatically be disabled after this test has run.
503507
func (t *T) SetFailPoint(fp failpoint.FailPoint) {
508+
// Do not allow failpoints to be used on sharded topologies unless
509+
// specifically configured to allow it.
510+
//
511+
// On sharded topologies, failpoints are applied to only a single mongoS. If
512+
// the driver is connected to multiple mongoS instances, there's a
513+
// possibility a different mongoS will be selected for a subsequent command.
514+
// In that case, the failpoint is effectively ignored, leading to a test
515+
// failure that is extremely difficult to diagnose.
516+
//
517+
// TODO(GODRIVER-3328): Remove this once we set failpoints on every mongoS
518+
// in sharded topologies.
519+
if testContext.topoKind == Sharded && !t.allowFailPointsOnSharded {
520+
t.Fatalf("cannot use failpoints with sharded topologies unless AllowFailPointsOnSharded is set")
521+
}
522+
504523
// ensure mode fields are int32
505524
if modeMap, ok := fp.Mode.(map[string]any); ok {
506525
var key string

internal/integration/mtest/options.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,3 +281,18 @@ func (op *Options) RequireAPIVersion(rav bool) *Options {
281281
})
282282
return op
283283
}
284+
285+
// AllowFailPointsOnSharded bypasses the check for failpoints used on sharded
286+
// topologies.
287+
//
288+
// Failpoints are generally unreliable on sharded topologies, but can be used if
289+
// the failpoint is explicitly applied to every mongoS node in the cluster.
290+
//
291+
// TODO(GODRIVER-3328): Remove this option once we set failpoints on every
292+
// mongoS in sharded topologies.
293+
func (op *Options) AllowFailPointsOnSharded() *Options {
294+
op.optFuncs = append(op.optFuncs, func(t *T) {
295+
t.allowFailPointsOnSharded = true
296+
})
297+
return op
298+
}

internal/integration/retryable_reads_prose_test.go

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,17 @@ func TestRetryableReadsProse(t *testing.T) {
3434
SetPoolMonitor(tpm.PoolMonitor).SetHeartbeatInterval(500 * time.Millisecond).
3535
SetHosts(hosts[:1])
3636

37-
mtOpts := mtest.NewOptions().ClientOptions(clientOpts).MinServerVersion("4.3")
38-
mt := mtest.New(t, mtOpts)
39-
40-
mt.Run("PoolClearedError retryability", func(mt *mtest.T) {
41-
if mtest.ClusterTopologyKind() == mtest.LoadBalanced {
42-
mt.Skip("skipping as load balanced topology has different pool clearing behavior")
43-
}
44-
37+
mt := mtest.New(t, mtest.NewOptions().ClientOptions(clientOpts))
38+
39+
mtOpts := mtest.NewOptions().
40+
MinServerVersion("4.3").
41+
// Load-balanced topologies have a different behavior for clearing the
42+
// pool, so don't run the test on load-balanced topologies
43+
//
44+
// TODO(GODRIVER-3328): FailPoints are not currently reliable on sharded
45+
// topologies. Allow running on sharded topologies once that is fixed.
46+
Topologies(mtest.Single, mtest.ReplicaSet)
47+
mt.RunOpts("PoolClearedError retryability", mtOpts, func(mt *mtest.T) {
4548
// Insert a document to test collection.
4649
_, err := mt.Coll.InsertOne(context.Background(), bson.D{{"x", 1}})
4750
assert.Nil(mt, err, "InsertOne error: %v", err)
@@ -106,7 +109,7 @@ func TestRetryableReadsProse(t *testing.T) {
106109
}
107110
})
108111

109-
mtOpts = mtest.NewOptions().Topologies(mtest.Sharded).MinServerVersion("4.2")
112+
mtOpts = mtest.NewOptions().Topologies(mtest.Sharded).MinServerVersion("4.2").AllowFailPointsOnSharded()
110113
mt.RunOpts("retrying in sharded cluster", mtOpts, func(mt *mtest.T) {
111114
tests := []struct {
112115
name string

internal/integration/retryable_writes_prose_test.go

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -155,8 +155,12 @@ func TestRetryableWritesProse(t *testing.T) {
155155
SetPoolMonitor(tpm.PoolMonitor).SetHeartbeatInterval(500 * time.Millisecond).
156156
SetHosts(hosts[:1])
157157

158-
mtPceOpts := mtest.NewOptions().ClientOptions(pceOpts).MinServerVersion("4.3").
159-
Topologies(mtest.ReplicaSet, mtest.Sharded)
158+
mtPceOpts := mtest.NewOptions().
159+
ClientOptions(pceOpts).
160+
MinServerVersion("4.3").
161+
// TODO(GODRIVER-3328): FailPoints are not currently reliable on sharded
162+
// topologies. Allow running on sharded topologies once that is fixed.
163+
Topologies(mtest.ReplicaSet)
160164
mt.RunOpts("PoolClearedError retryability", mtPceOpts, func(mt *mtest.T) {
161165
// Force Find to block for 1 second once.
162166
mt.SetFailPoint(failpoint.FailPoint{
@@ -287,7 +291,7 @@ func TestRetryableWritesProse(t *testing.T) {
287291
require.True(mt, err.(mongo.WriteException).HasErrorCode(int(shutdownInProgressErrorCode)))
288292
})
289293

290-
mtOpts = mtest.NewOptions().Topologies(mtest.Sharded).MinServerVersion("4.2")
294+
mtOpts = mtest.NewOptions().Topologies(mtest.Sharded).MinServerVersion("4.2").AllowFailPointsOnSharded()
291295
mt.RunOpts("retrying in sharded cluster", mtOpts, func(mt *mtest.T) {
292296
tests := []struct {
293297
name string

internal/integration/sdam_prose_test.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,10 @@ func TestSDAMProse(t *testing.T) {
9898
SetAppName("streamingRttTest")
9999
mtOpts := mtest.NewOptions().
100100
MinServerVersion("4.4").
101-
ClientOptions(clientOpts)
101+
ClientOptions(clientOpts).
102+
// TODO(GODRIVER-3328): FailPoints are not currently reliable on sharded
103+
// clusters. Remove this exclusion once we fix that.
104+
Topologies(mtest.Single, mtest.ReplicaSet, mtest.LoadBalanced)
102105
mt.RunOpts("rtt is continuously updated", mtOpts, func(mt *mtest.T) {
103106
// Test that the RTT monitor updates the RTT for server descriptions.
104107

internal/integration/server_selection_prose_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ func TestServerSelectionProse(t *testing.T) {
112112

113113
mt := mtest.New(t, mtest.NewOptions().CreateClient(false))
114114

115-
mtOpts := mtest.NewOptions().Topologies(mtest.Sharded).MinServerVersion("4.9")
115+
mtOpts := mtest.NewOptions().Topologies(mtest.Sharded).MinServerVersion("4.9").AllowFailPointsOnSharded()
116116
mt.RunOpts("operationCount-based selection within latency window, with failpoint", mtOpts, func(mt *mtest.T) {
117117
_, err := mt.Coll.InsertOne(context.Background(), bson.D{})
118118
require.NoError(mt, err, "InsertOne() error")

0 commit comments

Comments
 (0)