Skip to content

Commit 5ea1f14

Browse files
Julien-Benfealebenpae
authored andcommitted
CLOUDP-290619 Refactor and unit test logic to mark hosts as non-voting when scaling down (#4146)
# Summary This is a follow up from one of the MC Sharded features branches. https://jira.mongodb.org/browse/CLOUDP-290619 This PR refactors `prepareScaleDownShardedCluster` to make it easier to unit test. - extract logic to compute members to scale down from `prepareScaleDownShardedCluster` - unit test the new function `computeMembersToScaleDown ` - fix old tests that relied on the resource status, now migrated to a config map ## Proof of Work Unit tests
1 parent 29a41c6 commit 5ea1f14

File tree

3 files changed

+219
-52
lines changed

3 files changed

+219
-52
lines changed

controllers/operator/mongodbshardedcluster_controller.go

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -162,8 +162,23 @@ func (r *ShardedClusterReconcileHelper) initializeMemberClusters(globalMemberClu
162162
r.shardsMemberClustersMap, r.allShardsMemberClusters = r.createShardsMemberClusterLists(shardsMap, globalMemberClustersMap, log, r.deploymentState, false)
163163
} else {
164164
r.shardsMemberClustersMap, r.allShardsMemberClusters = r.createShardsMemberClusterLists(shardsMap, globalMemberClustersMap, log, r.deploymentState, true)
165-
r.configSrvMemberClusters = []multicluster.MemberCluster{multicluster.GetLegacyCentralMemberCluster(r.deploymentState.Status.MongodbShardedClusterSizeConfig.ConfigServerCount, 0, r.commonController.client, r.commonController.SecretClient)}
166-
r.mongosMemberClusters = []multicluster.MemberCluster{multicluster.GetLegacyCentralMemberCluster(r.deploymentState.Status.MongodbShardedClusterSizeConfig.MongosCount, 0, r.commonController.client, r.commonController.SecretClient)}
165+
166+
// SizeStatusInClusters is the primary struct for storing state, designed with multi-cluster support in mind.
167+
// For a single-cluster setup, we first attempt to read from the fields in SizeStatusInClusters,
168+
// falling back to the legacy structure (MongodbShardedClusterSizeConfig) if they are unavailable. The fallback
169+
// is to be defensive but with the migration performed at the beginning of the reconcile (if necessary), there
170+
// should be no case of having only the legacy fields populated in the state.
171+
configSrvCount, configSrvCountExists := r.deploymentState.Status.SizeStatusInClusters.ConfigServerMongodsInClusters[multicluster.LegacyCentralClusterName]
172+
if !configSrvCountExists {
173+
configSrvCount = r.deploymentState.Status.MongodbShardedClusterSizeConfig.ConfigServerCount
174+
}
175+
r.configSrvMemberClusters = []multicluster.MemberCluster{multicluster.GetLegacyCentralMemberCluster(configSrvCount, 0, r.commonController.client, r.commonController.SecretClient)}
176+
177+
mongosCount, mongosCountExists := r.deploymentState.Status.SizeStatusInClusters.MongosCountInClusters[multicluster.LegacyCentralClusterName]
178+
if !mongosCountExists {
179+
mongosCount = r.deploymentState.Status.MongodbShardedClusterSizeConfig.MongosCount
180+
}
181+
r.mongosMemberClusters = []multicluster.MemberCluster{multicluster.GetLegacyCentralMemberCluster(mongosCount, 0, r.commonController.client, r.commonController.SecretClient)}
167182
}
168183

169184
r.allMemberClusters = r.createAllMemberClustersList()
@@ -1665,12 +1680,9 @@ func (r *ShardedClusterReconcileHelper) getMongosHostnames(memberCluster multicl
16651680
}
16661681
}
16671682

1668-
// prepareScaleDownShardedCluster collects all replicasets members to scale down, from configservers and shards, accross
1669-
// all clusters, and pass them to PrepareScaleDownFromMap, which sets their votes and priorities to 0
1670-
func (r *ShardedClusterReconcileHelper) prepareScaleDownShardedCluster(omClient om.Connection, log *zap.SugaredLogger) error {
1683+
func (r *ShardedClusterReconcileHelper) computeMembersToScaleDown(configSrvMemberClusters []multicluster.MemberCluster, shardsMemberClustersMap map[int][]multicluster.MemberCluster, log *zap.SugaredLogger) map[string][]string {
16711684
membersToScaleDown := make(map[string][]string)
1672-
1673-
for _, memberCluster := range r.configSrvMemberClusters {
1685+
for _, memberCluster := range configSrvMemberClusters {
16741686
currentReplicas := memberCluster.Replicas
16751687
desiredReplicas := scale.ReplicasThisReconciliation(r.GetConfigSrvScaler(memberCluster))
16761688
_, currentPodNames := r.getConfigSrvHostnames(memberCluster, currentReplicas)
@@ -1686,7 +1698,7 @@ func (r *ShardedClusterReconcileHelper) prepareScaleDownShardedCluster(omClient
16861698
}
16871699

16881700
// Scaledown size of each shard
1689-
for shardIdx, memberClusters := range r.shardsMemberClustersMap {
1701+
for shardIdx, memberClusters := range shardsMemberClustersMap {
16901702
for _, memberCluster := range memberClusters {
16911703
currentReplicas := memberCluster.Replicas
16921704
desiredReplicas := scale.ReplicasThisReconciliation(r.GetShardScaler(shardIdx, memberCluster))
@@ -1703,6 +1715,14 @@ func (r *ShardedClusterReconcileHelper) prepareScaleDownShardedCluster(omClient
17031715
}
17041716
}
17051717

1718+
return membersToScaleDown
1719+
}
1720+
1721+
// prepareScaleDownShardedCluster collects all replicasets members to scale down, from configservers and shards, across
1722+
// all clusters, and pass them to PrepareScaleDownFromMap, which sets their votes and priorities to 0
1723+
func (r *ShardedClusterReconcileHelper) prepareScaleDownShardedCluster(omClient om.Connection, log *zap.SugaredLogger) error {
1724+
membersToScaleDown := r.computeMembersToScaleDown(r.configSrvMemberClusters, r.shardsMemberClustersMap, log)
1725+
17061726
if len(membersToScaleDown) > 0 {
17071727
healthyProcessesToWaitForReadyState := r.getHealthyProcessNamesToWaitForReadyState(omClient, log)
17081728
if err := replicaset.PrepareScaleDownFromMap(omClient, membersToScaleDown, healthyProcessesToWaitForReadyState, log); err != nil {

controllers/operator/mongodbshardedcluster_controller_multi_test.go

Lines changed: 133 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,22 @@ func newShardedClusterReconcilerForMultiCluster(ctx context.Context, sc *mdbv1.M
5252
// createMockStateConfigMap creates a configMap with the sizeStatusInClusters populated based on the cluster state
5353
// passed in parameters, to simulate it is the current state of the cluster
5454
func createMockStateConfigMap(kubeClient client.Client, namespace, scName string, state MultiClusterShardedScalingStep) error {
55+
sumMap := func(m map[string]int) int {
56+
sum := 0
57+
for _, val := range m {
58+
sum += val
59+
}
60+
return sum
61+
}
62+
63+
configServerSum := sumMap(state.configServerDistribution)
64+
mongosSum := sumMap(state.mongosDistribution)
65+
5566
sizeStatus := map[string]interface{}{
5667
"status": map[string]interface{}{
57-
"shardCount": state.shardCount,
68+
"shardCount": state.shardCount,
69+
"configServerCount": configServerSum,
70+
"mongosCount": mongosSum,
5871
"sizeStatusInClusters": map[string]interface{}{
5972
"shardMongodsInClusters": state.shardDistribution,
6073
"mongosCountInClusters": state.mongosDistribution,
@@ -2477,6 +2490,125 @@ func checkCorrectShardDistributionInStatus(t *testing.T, sc *mdbv1.MongoDB) {
24772490
assert.Equal(t, sumSlice(util.Transform(sc.Spec.ShardSpec.ClusterSpecList, clusterSpecItemToMembers)), sc.Status.MongodbShardedClusterSizeConfig.MongodsPerShardCount)
24782491
}
24792492

2493+
func TestComputeMembersToScaleDown(t *testing.T) {
2494+
ctx := context.Background()
2495+
memberCluster1 := "cluster1"
2496+
memberCluster2 := "cluster2"
2497+
memberClusterNames := []string{memberCluster1, memberCluster2}
2498+
2499+
type testCase struct {
2500+
name string
2501+
shardCount int
2502+
cfgServerCurrentClusters []multicluster.MemberCluster
2503+
shardsCurrentClusters map[int][]multicluster.MemberCluster
2504+
targetCfgServerDistribution map[string]int
2505+
targetShardDistribution map[string]int
2506+
expected map[string][]string
2507+
}
2508+
2509+
testCases := []testCase{
2510+
{
2511+
name: "Case 1: Downscale config server and shard",
2512+
shardCount: 1,
2513+
cfgServerCurrentClusters: []multicluster.MemberCluster{
2514+
{Name: memberCluster1, Index: 0, Replicas: 5},
2515+
{Name: memberCluster2, Index: 1, Replicas: 0},
2516+
},
2517+
shardsCurrentClusters: map[int][]multicluster.MemberCluster{
2518+
0: {
2519+
{Name: memberCluster1, Index: 0, Replicas: 3},
2520+
{Name: memberCluster2, Index: 1, Replicas: 2},
2521+
},
2522+
},
2523+
targetCfgServerDistribution: map[string]int{
2524+
memberCluster1: 2,
2525+
memberCluster2: 1,
2526+
},
2527+
targetShardDistribution: map[string]int{
2528+
memberCluster1: 1,
2529+
memberCluster2: 2,
2530+
},
2531+
expected: map[string][]string{
2532+
// For the config replica set: downscale from 5 to 2 means remove members with indices 2, 3, 4
2533+
test.SCBuilderDefaultName + "-config": {
2534+
test.SCBuilderDefaultName + "-config-0-2",
2535+
test.SCBuilderDefaultName + "-config-0-3",
2536+
test.SCBuilderDefaultName + "-config-0-4",
2537+
},
2538+
// For the shard replica set (shard 0): downscale from 3 to 1, so remove two members
2539+
test.SCBuilderDefaultName + "-0": {
2540+
test.SCBuilderDefaultName + "-0-0-1",
2541+
test.SCBuilderDefaultName + "-0-0-2",
2542+
},
2543+
},
2544+
},
2545+
{
2546+
name: "Case 2: Scale down and move replicas among clusters",
2547+
shardCount: 2,
2548+
cfgServerCurrentClusters: []multicluster.MemberCluster{
2549+
{Name: memberCluster1, Index: 0, Replicas: 2},
2550+
{Name: memberCluster2, Index: 1, Replicas: 1},
2551+
},
2552+
shardsCurrentClusters: map[int][]multicluster.MemberCluster{
2553+
0: {
2554+
{Name: memberCluster1, Index: 0, Replicas: 3},
2555+
{Name: memberCluster2, Index: 1, Replicas: 2},
2556+
},
2557+
1: {
2558+
{Name: memberCluster1, Index: 0, Replicas: 3},
2559+
{Name: memberCluster2, Index: 1, Replicas: 2},
2560+
},
2561+
},
2562+
targetCfgServerDistribution: map[string]int{
2563+
memberCluster1: 1,
2564+
memberCluster2: 2,
2565+
},
2566+
targetShardDistribution: map[string]int{
2567+
memberCluster1: 3,
2568+
memberCluster2: 0,
2569+
},
2570+
expected: map[string][]string{
2571+
test.SCBuilderDefaultName + "-config": {
2572+
test.SCBuilderDefaultName + "-config" + "-0" + "-1",
2573+
},
2574+
// For each shard replica set, we remove two members from cluster with index 1
2575+
test.SCBuilderDefaultName + "-0": {
2576+
test.SCBuilderDefaultName + "-0-1-0",
2577+
test.SCBuilderDefaultName + "-0-1-1",
2578+
},
2579+
test.SCBuilderDefaultName + "-1": {
2580+
test.SCBuilderDefaultName + "-1-1-0",
2581+
test.SCBuilderDefaultName + "-1-1-1",
2582+
},
2583+
},
2584+
},
2585+
}
2586+
2587+
for _, tc := range testCases {
2588+
t.Run(tc.name, func(t *testing.T) {
2589+
targetSpec := test.DefaultClusterBuilder().
2590+
SetTopology(mdbv1.ClusterTopologyMultiCluster).
2591+
SetShardCountSpec(tc.shardCount).
2592+
SetMongodsPerShardCountSpec(0).
2593+
SetConfigServerCountSpec(0).
2594+
SetMongosCountSpec(0).
2595+
SetConfigSrvClusterSpec(test.CreateClusterSpecList(memberClusterNames, tc.targetCfgServerDistribution)).
2596+
SetShardClusterSpec(test.CreateClusterSpecList(memberClusterNames, tc.targetShardDistribution)).
2597+
Build()
2598+
2599+
_, omConnectionFactory := mock.NewDefaultFakeClient(targetSpec)
2600+
memberClusterMap := getFakeMultiClusterMapWithClusters(memberClusterNames, omConnectionFactory)
2601+
2602+
_, reconcileHelper, _, _, err := defaultClusterReconciler(ctx, targetSpec, memberClusterMap)
2603+
assert.NoError(t, err)
2604+
2605+
membersToScaleDown := reconcileHelper.computeMembersToScaleDown(tc.cfgServerCurrentClusters, tc.shardsCurrentClusters, zap.S())
2606+
2607+
assert.Equal(t, tc.expected, membersToScaleDown)
2608+
})
2609+
}
2610+
}
2611+
24802612
func sumSlice[T constraints.Integer](s []T) int {
24812613
result := 0
24822614
for i := range s {

controllers/operator/mongodbshardedcluster_controller_test.go

Lines changed: 58 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -450,26 +450,34 @@ func getEmptyDeploymentOptions() deploymentOptions {
450450
// TestPrepareScaleDownShardedCluster tests the scale down operation for config servers and mongods per shard. It checks
451451
// that all members that will be removed are marked as unvoted
452452
func TestPrepareScaleDownShardedCluster_ConfigMongodsUp(t *testing.T) {
453-
t.Skip("This test is too fragile to be executed; it's based on status and not deployment state and test internal interactions that are no longer true. Either we rewrite it to full Reconcile or remove it.")
454453
ctx := context.Background()
454+
455+
initialState := MultiClusterShardedScalingStep{
456+
shardCount: 2,
457+
configServerDistribution: map[string]int{
458+
multicluster.LegacyCentralClusterName: 3,
459+
},
460+
shardDistribution: map[string]int{
461+
multicluster.LegacyCentralClusterName: 4,
462+
},
463+
}
464+
455465
scBeforeScale := test.DefaultClusterBuilder().
456-
SetConfigServerCountStatus(3).
457466
SetConfigServerCountSpec(3).
458-
SetMongodsPerShardCountStatus(4).
459467
SetMongodsPerShardCountSpec(4).
460468
Build()
461469

462-
omConnectionFactory := om.NewCachedOMConnectionFactoryWithInitializedConnection(om.NewMockedOmConnection(createDeploymentFromShardedCluster(t, scBeforeScale)))
463-
_, reconcileHelper, _, _, _ := defaultClusterReconciler(ctx, scBeforeScale, nil)
464-
465-
// TODO prepareScaleDownShardedCluster is getting data from deployment state so modify it instead of passing state in MongoDB object
466470
scAfterScale := test.DefaultClusterBuilder().
467-
SetConfigServerCountStatus(3).
468471
SetConfigServerCountSpec(2).
469-
SetMongodsPerShardCountStatus(4).
470472
SetMongodsPerShardCountSpec(3).
471473
Build()
472474

475+
omConnectionFactory := om.NewCachedOMConnectionFactoryWithInitializedConnection(om.NewMockedOmConnection(createDeploymentFromShardedCluster(t, scBeforeScale)))
476+
kubeClient, _ := mock.NewDefaultFakeClient(scAfterScale)
477+
// Store the initial scaling status in state configmap
478+
assert.NoError(t, createMockStateConfigMap(kubeClient, mock.TestNamespace, scBeforeScale.Name, initialState))
479+
_, reconcileHelper, err := newShardedClusterReconcilerFromResource(ctx, scAfterScale, nil, kubeClient, omConnectionFactory)
480+
assert.NoError(t, err)
473481
assert.NoError(t, reconcileHelper.prepareScaleDownShardedCluster(omConnectionFactory.GetConnection(), zap.S()))
474482

475483
// create the expected deployment from the sharded cluster that has not yet scaled
@@ -493,16 +501,31 @@ func TestPrepareScaleDownShardedCluster_ConfigMongodsUp(t *testing.T) {
493501
// TestPrepareScaleDownShardedCluster_ShardsUpMongodsDown checks the situation when shards count increases and mongods
494502
// count per shard is decreased - scale down operation is expected to be called only for existing shards
495503
func TestPrepareScaleDownShardedCluster_ShardsUpMongodsDown(t *testing.T) {
496-
t.Skip("This test is too fragile to be executed; it's based on status and not deployment state and test internal interactions that are no longer true. Either we rewrite it to full Reconcile or remove it.")
497504
ctx := context.Background()
505+
506+
initialState := MultiClusterShardedScalingStep{
507+
shardCount: 4,
508+
shardDistribution: map[string]int{
509+
multicluster.LegacyCentralClusterName: 4,
510+
},
511+
}
512+
498513
scBeforeScale := test.DefaultClusterBuilder().
499-
SetShardCountStatus(4).
500514
SetShardCountSpec(4).
501-
SetMongodsPerShardCountStatus(4).
502515
SetMongodsPerShardCountSpec(4).
503516
Build()
504517

505-
_, reconcileHelper, _, omConnectionFactory, _ := defaultClusterReconciler(ctx, scBeforeScale, nil)
518+
scAfterScale := test.DefaultClusterBuilder().
519+
SetShardCountSpec(2).
520+
SetMongodsPerShardCountSpec(3).
521+
Build()
522+
523+
omConnectionFactory := om.NewCachedOMConnectionFactoryWithInitializedConnection(om.NewMockedOmConnection(createDeploymentFromShardedCluster(t, scBeforeScale)))
524+
kubeClient, _ := mock.NewDefaultFakeClient(scAfterScale)
525+
assert.NoError(t, createMockStateConfigMap(kubeClient, mock.TestNamespace, scBeforeScale.Name, initialState))
526+
_, reconcileHelper, err := newShardedClusterReconcilerFromResource(ctx, scAfterScale, nil, kubeClient, omConnectionFactory)
527+
assert.NoError(t, err)
528+
506529
omConnectionFactory.SetPostCreateHook(func(connection om.Connection) {
507530
deployment := createDeploymentFromShardedCluster(t, scBeforeScale)
508531
if _, err := connection.UpdateDeployment(deployment); err != nil {
@@ -512,17 +535,6 @@ func TestPrepareScaleDownShardedCluster_ShardsUpMongodsDown(t *testing.T) {
512535
connection.(*om.MockedOmConnection).CleanHistory()
513536
})
514537

515-
// TODO prepareScaleDownShardedCluster is getting data from deployment state so modify it instead of passing state in MongoDB object
516-
scAfterScale := test.DefaultClusterBuilder().
517-
SetShardCountStatus(4).
518-
SetShardCountSpec(2).
519-
SetMongodsPerShardCountStatus(4).
520-
SetMongodsPerShardCountSpec(3).
521-
Build()
522-
523-
// necessary otherwise next omConnectionFactory.GetConnection() will return nil as the connectionFactoryFunc hasn't been called yet
524-
initializeOMConnection(t, ctx, reconcileHelper, scAfterScale, zap.S(), omConnectionFactory)
525-
526538
assert.NoError(t, reconcileHelper.prepareScaleDownShardedCluster(omConnectionFactory.GetConnection(), zap.S()))
527539

528540
// expected change of state: rs members are marked unvoted only for two shards (old state)
@@ -589,17 +601,34 @@ func initializeOMConnection(t *testing.T, ctx context.Context, reconcileHelper *
589601
// TestUpdateOmDeploymentShardedCluster_HostsRemovedFromMonitoring verifies that if scale down operation was performed -
590602
// hosts are removed
591603
func TestUpdateOmDeploymentShardedCluster_HostsRemovedFromMonitoring(t *testing.T) {
592-
t.Skip("This test is too fragile to be executed; it's based on status and not deployment state and test internal interactions that are no longer true. Either we rewrite it to full Reconcile or remove it.")
593604
ctx := context.Background()
594-
// TODO use deployment state instead of status
605+
606+
initialState := MultiClusterShardedScalingStep{
607+
mongosDistribution: map[string]int{
608+
multicluster.LegacyCentralClusterName: 2,
609+
},
610+
configServerDistribution: map[string]int{
611+
multicluster.LegacyCentralClusterName: 4,
612+
},
613+
}
614+
595615
sc := test.DefaultClusterBuilder().
596-
SetMongosCountStatus(2).
597616
SetMongosCountSpec(2).
598-
SetConfigServerCountStatus(4).
599617
SetConfigServerCountSpec(4).
600618
Build()
601619

602-
_, reconcileHelper, _, omConnectionFactory, _ := defaultClusterReconciler(ctx, sc, nil)
620+
// we need to create a different sharded cluster that is currently in the process of scaling down
621+
scScaledDown := test.DefaultClusterBuilder().
622+
SetMongosCountSpec(1).
623+
SetConfigServerCountSpec(3).
624+
Build()
625+
626+
omConnectionFactory := om.NewCachedOMConnectionFactoryWithInitializedConnection(om.NewMockedOmConnection(createDeploymentFromShardedCluster(t, sc)))
627+
kubeClient, _ := mock.NewDefaultFakeClient(sc)
628+
assert.NoError(t, createMockStateConfigMap(kubeClient, mock.TestNamespace, sc.Name, initialState))
629+
_, reconcileHelper, err := newShardedClusterReconcilerFromResource(ctx, scScaledDown, nil, kubeClient, omConnectionFactory)
630+
assert.NoError(t, err)
631+
603632
omConnectionFactory.SetPostCreateHook(func(connection om.Connection) {
604633
// the initial deployment we create should have all processes
605634
deployment := createDeploymentFromShardedCluster(t, sc)
@@ -609,20 +638,6 @@ func TestUpdateOmDeploymentShardedCluster_HostsRemovedFromMonitoring(t *testing.
609638
connection.(*om.MockedOmConnection).AddHosts(deployment.GetAllHostnames())
610639
connection.(*om.MockedOmConnection).CleanHistory()
611640
})
612-
// necessary otherwise next omConnectionFactory.GetConnection() will return nil as the connectionFactoryFunc hasn't been called yet
613-
initializeOMConnection(t, ctx, reconcileHelper, sc, zap.S(), omConnectionFactory)
614-
615-
// we need to create a different sharded cluster that is currently in the process of scaling down
616-
// TODO use deployment state instead of status
617-
scScaledDown := test.DefaultClusterBuilder().
618-
SetMongosCountStatus(2).
619-
SetMongosCountSpec(1).
620-
SetConfigServerCountStatus(4).
621-
SetConfigServerCountSpec(3).
622-
Build()
623-
624-
// necessary otherwise next omConnectionFactory.GetConnection() will return nil as the connectionFactoryFunc hasn't been called yet
625-
initializeOMConnection(t, ctx, reconcileHelper, scScaledDown, zap.S(), omConnectionFactory)
626641

627642
// updateOmDeploymentShardedCluster checks an element from ac.Auth.DeploymentAuthMechanisms
628643
// so we need to ensure it has a non-nil value. An empty list implies no authentication

0 commit comments

Comments
 (0)