Skip to content

Commit 5da2adb

Browse files
authored
Update test case to set correct resource quota and work with multi-cluster setup (#2264)
* Update test case to set correct resource quota and work with multi-cluster setup
1 parent 9e30cef commit 5da2adb

22 files changed

+562
-196
lines changed

api/v1beta2/foundationdbcluster_types.go

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -443,9 +443,26 @@ func (processGroupID ProcessGroupID) GetIDNumber() (int, error) {
443443
return idNum, nil
444444
}
445445

446+
// GetProcessClass returns the process class from the process group ID.
447+
func (processGroupID ProcessGroupID) GetProcessClass() ProcessClass {
448+
tmp := strings.Split(string(processGroupID), "-")
449+
if len(tmp) < 2 {
450+
return ""
451+
}
452+
453+
// The process class will always be the second last item, the last item is the ID number,
454+
// e.g. stateless-1 or with a prefix: prefix-stateless-2.
455+
return ProcessClass(tmp[len(tmp)-2])
456+
}
457+
458+
// GetExclusionString returns the exclusion string
459+
func (processGroupID ProcessGroupID) GetExclusionString() string {
460+
return fmt.Sprintf("%s:%s", FDBLocalityExclusionPrefix, processGroupID)
461+
}
462+
446463
// GetExclusionString returns the exclusion string
447464
func (processGroupStatus *ProcessGroupStatus) GetExclusionString() string {
448-
return fmt.Sprintf("%s:%s", FDBLocalityExclusionPrefix, processGroupStatus.ProcessGroupID)
465+
return processGroupStatus.ProcessGroupID.GetExclusionString()
449466
}
450467

451468
// IsExcluded returns if a process group is excluded
@@ -499,9 +516,8 @@ func (processGroupStatus *ProcessGroupStatus) GetPodName(cluster *FoundationDBCl
499516
// in the processGroupStatus without doing any parsing, so we have to use the Process Group ID, which might contain
500517
// a prefix, so we take the part after the prefix, which will be ${process-class}-${id}.
501518
sanitizedProcessGroup := strings.ReplaceAll(string(processGroupStatus.ProcessGroupID), "_", "-")
502-
sanitizedProcessClass := strings.ReplaceAll(string(processGroupStatus.ProcessClass), "_", "-")
503519

504-
idx := strings.Index(sanitizedProcessGroup, sanitizedProcessClass)
520+
idx := strings.Index(sanitizedProcessGroup, processGroupStatus.ProcessClass.GetProcessClassForPodName())
505521
sb.WriteString(sanitizedProcessGroup[idx:])
506522

507523
return sb.String()

api/v1beta2/foundationdbcluster_types_test.go

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5520,7 +5520,7 @@ var _ = Describe("[api] FoundationDBCluster", func() {
55205520

55215521
Context("with a custom prefix", func() {
55225522
It("parses the prefix", func() {
5523-
id, err := ProcessGroupID("dc-1storage-12").GetIDNumber()
5523+
id, err := ProcessGroupID("dc1-storage-12").GetIDNumber()
55245524
Expect(err).NotTo(HaveOccurred())
55255525
Expect(id).To(Equal(12))
55265526
})
@@ -5551,6 +5551,23 @@ var _ = Describe("[api] FoundationDBCluster", func() {
55515551
})
55525552
})
55535553

5554+
DescribeTable("getting the process class from the process group ID", func(input ProcessGroupID, expected ProcessClass) {
5555+
Expect(input.GetProcessClass()).To(Equal(expected))
5556+
},
5557+
Entry("storage ID",
5558+
ProcessGroupID("storage-12"),
5559+
ProcessClassStorage,
5560+
),
5561+
Entry("cluster controller ID",
5562+
ProcessGroupID("cluster_controller-12"),
5563+
ProcessClassClusterController,
5564+
),
5565+
Entry("with a custom prefix",
5566+
ProcessGroupID("dc1-storage-12"),
5567+
ProcessClassStorage,
5568+
),
5569+
)
5570+
55545571
DescribeTable("when checking if all addresses are excluded for a process group", func(processGroupStatus *ProcessGroupStatus, remainingMap map[string]bool, expected bool, expectedErr error) {
55555572
excluded, err := processGroupStatus.AllAddressesExcluded(log, remainingMap)
55565573
Expect(excluded).To(Equal(expected))

controllers/bounce_processes.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ func (c bounceProcesses) reconcile(_ context.Context, r *FoundationDBClusterReco
209209
return &requeue{curError: err, delayedRequeue: true}
210210
}
211211

212-
addresses = coordination.GetAddressesFromStatus(logger, status, readyForRestart, false)
212+
addresses = coordination.GetAddressesFromStatus(logger, status, readyForRestart)
213213
logger.Info("Addresses from status", "addresses", addresses)
214214
}
215215

controllers/exclude_processes.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ func (e excludeProcesses) reconcile(ctx context.Context, r *FoundationDBClusterR
112112
// Make sure it's safe to exclude processes.
113113
err = fdbstatus.CanSafelyExcludeProcessesWithRecoveryState(cluster, status, r.MinimumRecoveryTimeForExclusion)
114114
if err != nil {
115-
return &requeue{curError: err, delayedRequeue: true}
115+
return &requeue{curError: err, delayedRequeue: true, delay: 10 * time.Second}
116116
}
117117

118118
var fdbProcessesToExclude []fdbv1beta2.ProcessAddress
@@ -232,7 +232,11 @@ func (e excludeProcesses) reconcile(ctx context.Context, r *FoundationDBClusterR
232232
}
233233

234234
// Convert all the process groups that should be excluded to the right addresses based on the cluster status.
235-
fdbProcessesToExclude = coordination.GetAddressesFromStatus(logger, status, allowedExclusions, cluster.UseLocalitiesForExclusion())
235+
useLocalities := cluster.UseLocalitiesForExclusion()
236+
fdbProcessesToExclude, err = coordination.GetAddressesFromCoordinationState(logger, adminClient, allowedExclusions, useLocalities, !useLocalities)
237+
if err != nil {
238+
return &requeue{curError: err, delayedRequeue: true}
239+
}
236240
}
237241

238242
r.Recorder.Event(cluster, corev1.EventTypeNormal, "ExcludingProcesses", fmt.Sprintf("Excluding %v", fdbProcessesToExclude))

controllers/exclude_processes_test.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1331,6 +1331,7 @@ var _ = Describe("exclude_processes", func() {
13311331

13321332
When("a process from another cluster is pending to be excluded", func() {
13331333
otherProcessGroupID := fdbv1beta2.ProcessGroupID("another-cluster-log-1")
1334+
var targetedProcessGroupID fdbv1beta2.ProcessGroupID
13341335

13351336
BeforeEach(func() {
13361337
adminClient, err := mock.NewMockAdminClientUncast(cluster, k8sClient)
@@ -1357,6 +1358,7 @@ var _ = Describe("exclude_processes", func() {
13571358
}
13581359

13591360
cluster.Status.ProcessGroups[idx].MarkForRemoval()
1361+
targetedProcessGroupID = processGroup.ProcessGroupID
13601362
break
13611363
}
13621364
})
@@ -1402,6 +1404,17 @@ var _ = Describe("exclude_processes", func() {
14021404
When("localities for exclusion are disabled", func() {
14031405
BeforeEach(func() {
14041406
cluster.Spec.AutomationOptions.UseLocalitiesForExclusion = pointer.Bool(false)
1407+
adminClient, err := mock.NewMockAdminClientUncast(cluster, k8sClient)
1408+
Expect(err).NotTo(HaveOccurred())
1409+
1410+
Expect(adminClient.UpdateProcessAddresses(map[fdbv1beta2.ProcessGroupID][]string{
1411+
otherProcessGroupID: {
1412+
"192.168.0.2",
1413+
},
1414+
targetedProcessGroupID: {
1415+
"192.168.0.3",
1416+
},
1417+
})).To(Succeed())
14051418
})
14061419

14071420
It("should exclude the processes", func() {

controllers/remove_process_groups.go

Lines changed: 60 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,9 @@ func (u removeProcessGroups) reconcile(ctx context.Context, r *FoundationDBClust
7373
// If no process groups are marked to remove we have to check if all process groups are excluded.
7474
if len(processGroupsToRemove) == 0 {
7575
if !allExcluded {
76-
return &requeue{message: "Reconciliation needs to exclude more processes"}
76+
return &requeue{message: "Reconciliation needs to exclude more processes", delay: 15 * time.Second}
7777
}
78+
7879
return nil
7980
}
8081

@@ -132,7 +133,7 @@ func (u removeProcessGroups) reconcile(ctx context.Context, r *FoundationDBClust
132133
logger.Info("Removing process groups", "zone", zone, "count", len(zoneRemovals), "deletionMode", cluster.GetRemovalMode())
133134
// This will return a map of the newly removed ProcessGroups and the ProcessGroups with the ResourcesTerminating condition
134135
removedProcessGroups := r.removeProcessGroups(ctx, logger, cluster, zoneRemovals, zonedRemovals[removals.TerminatingZone])
135-
err = includeProcessGroup(ctx, logger, r, cluster, removedProcessGroups, status, adminClient)
136+
err = includeProcessGroup(ctx, logger, r, status, cluster, removedProcessGroups, adminClient)
136137
if err != nil {
137138
// If the inclusion is blocked or another issues happened we will retry in 60 seconds.
138139
return &requeue{curError: err, delayedRequeue: true, delay: 60 * time.Second}
@@ -221,24 +222,20 @@ func confirmRemoval(ctx context.Context, logger logr.Logger, r *FoundationDBClus
221222
canBeIncluded = false
222223
}
223224

224-
// TODO(johscheuer): https://github.com/FoundationDB/fdb-kubernetes-operator/v2/issues/1638
225-
pvcs := &corev1.PersistentVolumeClaimList{}
226-
err = r.List(ctx, pvcs, internal.GetSinglePodListOptions(cluster, processGroup.ProcessGroupID)...)
227-
if err != nil {
225+
pvc := &corev1.PersistentVolumeClaim{}
226+
err = r.Get(ctx, client.ObjectKey{Name: processGroup.GetPvcName(cluster), Namespace: cluster.Namespace}, pvc)
227+
if err != nil && !k8serrors.IsNotFound(err) {
228228
return false, err
229229
}
230230

231-
if len(pvcs.Items) == 1 {
232-
pvc := pvcs.Items[0]
233-
if pvc.DeletionTimestamp == nil {
234-
logger.Info("Waiting for volume claim to get torn down", "processGroupID", processGroup.ProcessGroupID, "pvc", pvc.Name)
235-
return false, internal.ResourceNotDeleted{Resource: &pvc}
231+
if err == nil {
232+
if pvc.DeletionTimestamp.IsZero() {
233+
logger.Info("Waiting for process group to get torn down", "processGroupID", processGroup.ProcessGroupID, "pod", podName)
234+
return false, internal.ResourceNotDeleted{Resource: pod}
236235
}
237236

238237
// PVC is in terminating state so we don't want to block, but we also don't want to include it
239238
canBeIncluded = false
240-
} else if len(pvcs.Items) > 1 {
241-
return false, fmt.Errorf("multiple PVCs found for cluster %s, processGroupID %s", cluster.Name, processGroup.ProcessGroupID)
242239
}
243240

244241
service := &corev1.Service{}
@@ -262,7 +259,13 @@ func confirmRemoval(ctx context.Context, logger logr.Logger, r *FoundationDBClus
262259
return canBeIncluded, nil
263260
}
264261

265-
func includeProcessGroup(ctx context.Context, logger logr.Logger, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, removedProcessGroups map[fdbv1beta2.ProcessGroupID]bool, status *fdbv1beta2.FoundationDBStatus, adminClient fdbadminclient.AdminClient) error {
262+
func includeProcessGroup(ctx context.Context, logger logr.Logger, r *FoundationDBClusterReconciler, status *fdbv1beta2.FoundationDBStatus, cluster *fdbv1beta2.FoundationDBCluster, removedProcessGroups map[fdbv1beta2.ProcessGroupID]bool, adminClient fdbadminclient.AdminClient) error {
263+
// Fetch the latest status to ensure the excluded server list is the latest one.
264+
currentExclusions, err := adminClient.GetExclusions()
265+
if err != nil {
266+
return err
267+
}
268+
266269
// Update here for ready inclusion --> Check here
267270
var readyForInclusion map[fdbv1beta2.ProcessGroupID]time.Time
268271
readyForInclusionUpdates := map[fdbv1beta2.ProcessGroupID]fdbv1beta2.UpdateAction{}
@@ -274,11 +277,8 @@ func includeProcessGroup(ctx context.Context, logger logr.Logger, r *FoundationD
274277
}
275278
}
276279

277-
fdbProcessesToInclude, newProcessGroups, err := getProcessesToInclude(logger, cluster, removedProcessGroups, status, readyForInclusion, readyForInclusionUpdates)
278-
if err != nil {
279-
return err
280-
}
281-
280+
// In this step we will check if any of the "local" processes should be included.
281+
fdbProcessesToInclude, newProcessGroups := getProcessesToInclude(logger, cluster, removedProcessGroups, currentExclusions, readyForInclusion, readyForInclusionUpdates)
282282
if cluster.GetSynchronizationMode() == fdbv1beta2.SynchronizationModeGlobal {
283283
err = adminClient.UpdateReadyForInclusion(readyForInclusionUpdates)
284284
if err != nil {
@@ -303,6 +303,12 @@ func includeProcessGroup(ctx context.Context, logger logr.Logger, r *FoundationD
303303
return err
304304
}
305305

306+
// Make sure the inclusion are coordinated across multiple operator instances.
307+
err = r.takeLock(logger, cluster, "include removed process groups")
308+
if err != nil {
309+
return err
310+
}
311+
306312
if cluster.GetSynchronizationMode() == fdbv1beta2.SynchronizationModeGlobal {
307313
pendingForInclusion, err := adminClient.GetPendingForInclusion("")
308314
if err != nil {
@@ -318,12 +324,13 @@ func includeProcessGroup(ctx context.Context, logger logr.Logger, r *FoundationD
318324
if err != nil {
319325
return err
320326
}
321-
}
322327

323-
// Make sure the inclusion are coordinated across multiple operator instances.
324-
err = r.takeLock(logger, cluster, "include removed process groups")
325-
if err != nil {
326-
return err
328+
fdbProcessesToInclude, err = coordination.GetAddressesFromCoordinationState(logger, adminClient, readyForInclusion, true, true)
329+
if err != nil {
330+
return err
331+
}
332+
333+
fdbProcessesToInclude = filterAddressesToInclude(fdbProcessesToInclude, currentExclusions)
327334
}
328335

329336
r.Recorder.Event(cluster, corev1.EventTypeNormal, "IncludingProcesses", fmt.Sprintf("Including removed processes: %v", fdbProcessesToInclude))
@@ -340,17 +347,33 @@ func includeProcessGroup(ctx context.Context, logger logr.Logger, r *FoundationD
340347
return r.updateOrApply(ctx, cluster)
341348
}
342349

343-
func getProcessesToInclude(logger logr.Logger, cluster *fdbv1beta2.FoundationDBCluster, removedProcessGroups map[fdbv1beta2.ProcessGroupID]bool, status *fdbv1beta2.FoundationDBStatus, readyForInclusion map[fdbv1beta2.ProcessGroupID]time.Time, readyForInclusionUpdates map[fdbv1beta2.ProcessGroupID]fdbv1beta2.UpdateAction) ([]fdbv1beta2.ProcessAddress, []*fdbv1beta2.ProcessGroupStatus, error) {
350+
// filterAddressesToInclude will remove all addresses that are part of the fdbProcessesToInclude slice but are not excluded in FDB itself.
351+
func filterAddressesToInclude(fdbProcessesToInclude []fdbv1beta2.ProcessAddress, excludedServers []fdbv1beta2.ProcessAddress) []fdbv1beta2.ProcessAddress {
352+
excludedServersMap := make(map[string]fdbv1beta2.None, len(excludedServers))
353+
for _, excludedServer := range excludedServers {
354+
excludedServersMap[excludedServer.String()] = fdbv1beta2.None{}
355+
}
356+
357+
result := make([]fdbv1beta2.ProcessAddress, 0, len(fdbProcessesToInclude))
358+
for _, addr := range fdbProcessesToInclude {
359+
_, ok := excludedServersMap[addr.String()]
360+
if !ok {
361+
continue
362+
}
363+
364+
result = append(result, addr)
365+
}
366+
367+
return result
368+
}
369+
370+
func getProcessesToInclude(logger logr.Logger, cluster *fdbv1beta2.FoundationDBCluster, removedProcessGroups map[fdbv1beta2.ProcessGroupID]bool, excludedServers []fdbv1beta2.ProcessAddress, readyForInclusion map[fdbv1beta2.ProcessGroupID]time.Time, readyForInclusionUpdates map[fdbv1beta2.ProcessGroupID]fdbv1beta2.UpdateAction) ([]fdbv1beta2.ProcessAddress, []*fdbv1beta2.ProcessGroupStatus) {
344371
fdbProcessesToInclude := make([]fdbv1beta2.ProcessAddress, 0)
345372

346373
if len(removedProcessGroups) == 0 {
347-
return fdbProcessesToInclude, cluster.Status.ProcessGroups, nil
374+
return fdbProcessesToInclude, cluster.Status.ProcessGroups
348375
}
349376

350-
excludedServers, err := fdbstatus.GetExclusions(status)
351-
if err != nil {
352-
return fdbProcessesToInclude, nil, fmt.Errorf("unable to get excluded servers from status, %w", err)
353-
}
354377
excludedServersMap := make(map[string]fdbv1beta2.None, len(excludedServers))
355378
for _, excludedServer := range excludedServers {
356379
excludedServersMap[excludedServer.String()] = fdbv1beta2.None{}
@@ -359,6 +382,11 @@ func getProcessesToInclude(logger logr.Logger, cluster *fdbv1beta2.FoundationDBC
359382
processGroups := cluster.Status.DeepCopy().ProcessGroups
360383
idx := 0
361384
for _, processGroup := range processGroups {
385+
// Tester processes are not excluded, so there is no need to include them.
386+
if processGroup.ProcessClass == fdbv1beta2.ProcessClassTest {
387+
continue
388+
}
389+
362390
if processGroup.IsMarkedForRemoval() && removedProcessGroups[processGroup.ProcessGroupID] {
363391
foundInExcludedServerList := false
364392
exclusionString := processGroup.GetExclusionString()
@@ -374,7 +402,7 @@ func getProcessesToInclude(logger logr.Logger, cluster *fdbv1beta2.FoundationDBC
374402
for _, pAddr := range processGroup.Addresses {
375403
// Ensure we include the process address if the removed process group is a log process as we are always
376404
// excluding the log process with locality and IP address when validating that the log process was
377-
// fully excluded. Otherwise we might leave the IP address in the excluded list.
405+
// fully excluded. Otherwise, we might leave the IP address in the excluded list.
378406
if foundInExcludedServerList && processGroup.ProcessClass.IsLogProcess() {
379407
fdbProcessesToInclude = append(fdbProcessesToInclude, fdbv1beta2.ProcessAddress{IPAddress: net.ParseIP(pAddr)})
380408
continue
@@ -403,7 +431,7 @@ func getProcessesToInclude(logger logr.Logger, cluster *fdbv1beta2.FoundationDBC
403431
idx++
404432
}
405433

406-
return fdbProcessesToInclude, processGroups[:idx], nil
434+
return fdbProcessesToInclude, processGroups[:idx]
407435
}
408436

409437
func (r *FoundationDBClusterReconciler) getProcessGroupsToRemove(logger logr.Logger, cluster *fdbv1beta2.FoundationDBCluster, remainingMap map[string]bool, cordSet map[string]fdbv1beta2.None) (bool, bool, []*fdbv1beta2.ProcessGroupStatus) {

0 commit comments

Comments
 (0)