Skip to content

Commit d9c0404

Browse files
committed
Prevent re-including processes that are still in Terminating.
1 parent 430629e commit d9c0404

File tree

1 file changed

+14
-11
lines changed

1 file changed

+14
-11
lines changed

controllers/remove_pods.go

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ func (u RemovePods) Reconcile(r *FoundationDBClusterReconciler, context ctx.Cont
5252
}
5353

5454
r.Recorder.Event(cluster, "Normal", "RemovingProcesses", fmt.Sprintf("Removing pods: %v", processGroupsToRemove))
55+
removedProcessGroups := make(map[string]bool)
5556
allRemoved := true
5657
for _, id := range processGroupsToRemove {
5758

@@ -64,15 +65,17 @@ func (u RemovePods) Reconcile(r *FoundationDBClusterReconciler, context ctx.Cont
6465
if err != nil {
6566
return false, err
6667
}
67-
if !removed {
68+
if removed {
69+
removedProcessGroups[id] = true
70+
} else {
6871
allRemoved = false
6972
continue
7073
}
74+
}
7175

72-
err = includeInstance(r, context, cluster, id)
73-
if err != nil {
74-
return false, err
75-
}
76+
err := includeInstance(r, context, cluster, removedProcessGroups)
77+
if err != nil {
78+
return false, err
7679
}
7780

7881
return allRemoved, nil
@@ -134,7 +137,7 @@ func confirmPodRemoval(r *FoundationDBClusterReconciler, context ctx.Context, cl
134137
return false, err
135138
}
136139
if len(instances) == 1 {
137-
log.Info("Waiting for instance get torn down", "namespace", cluster.Namespace, "cluster", cluster.Name, "instanceID", instanceID, "pod", instances[0].Metadata.Name)
140+
log.Info("Waiting for instance to get torn down", "namespace", cluster.Namespace, "cluster", cluster.Name, "instanceID", instanceID, "pod", instances[0].Metadata.Name)
138141
return false, nil
139142
} else if len(instances) > 0 {
140143
return false, fmt.Errorf("Multiple pods found for cluster %s, instance ID %s", cluster.Name, instanceID)
@@ -146,7 +149,7 @@ func confirmPodRemoval(r *FoundationDBClusterReconciler, context ctx.Context, cl
146149
return false, err
147150
}
148151
if len(pods.Items) == 1 {
149-
log.Info("Waiting for pod get torn down", "namespace", cluster.Namespace, "cluster", cluster.Name, "instanceID", instanceID, "pod", pods.Items[0].Name)
152+
log.Info("Waiting for pod to get torn down", "namespace", cluster.Namespace, "cluster", cluster.Name, "instanceID", instanceID, "pod", pods.Items[0].Name)
150153
return false, nil
151154
} else if len(pods.Items) > 0 {
152155
return false, fmt.Errorf("Multiple pods found for cluster %s, instance ID %s", cluster.Name, instanceID)
@@ -158,7 +161,7 @@ func confirmPodRemoval(r *FoundationDBClusterReconciler, context ctx.Context, cl
158161
return false, err
159162
}
160163
if len(pvcs.Items) == 1 {
161-
log.Info("Waiting for volume claim get torn down", "namespace", cluster.Namespace, "cluster", cluster.Name, "instanceID", instanceID, "pvc", pvcs.Items[0].Name)
164+
log.Info("Waiting for volume claim to get torn down", "namespace", cluster.Namespace, "cluster", cluster.Name, "instanceID", instanceID, "pvc", pvcs.Items[0].Name)
162165
return false, nil
163166
} else if len(pvcs.Items) > 0 {
164167
return false, fmt.Errorf("Multiple PVCs found for cluster %s, instance ID %s", cluster.Name, instanceID)
@@ -170,7 +173,7 @@ func confirmPodRemoval(r *FoundationDBClusterReconciler, context ctx.Context, cl
170173
return false, err
171174
}
172175
if len(services.Items) == 1 {
173-
log.Info("Waiting for service get torn down", "namespace", cluster.Namespace, "cluster", cluster.Name, "instanceID", instanceID, "service", services.Items[0].Name)
176+
log.Info("Waiting for service to get torn down", "namespace", cluster.Namespace, "cluster", cluster.Name, "instanceID", instanceID, "service", services.Items[0].Name)
174177
return false, nil
175178
} else if len(services.Items) > 0 {
176179
return false, fmt.Errorf("Multiple services found for cluster %s, instance ID %s", cluster.Name, instanceID)
@@ -179,7 +182,7 @@ func confirmPodRemoval(r *FoundationDBClusterReconciler, context ctx.Context, cl
179182
return true, nil
180183
}
181184

182-
func includeInstance(r *FoundationDBClusterReconciler, context ctx.Context, cluster *fdbtypes.FoundationDBCluster, instanceID string) error {
185+
func includeInstance(r *FoundationDBClusterReconciler, context ctx.Context, cluster *fdbtypes.FoundationDBCluster, removedProcessGroups map[string]bool) error {
183186
adminClient, err := r.AdminClientProvider(cluster, r)
184187
if err != nil {
185188
return err
@@ -192,7 +195,7 @@ func includeInstance(r *FoundationDBClusterReconciler, context ctx.Context, clus
192195

193196
processGroups := make([]*fdbtypes.ProcessGroupStatus, 0, len(cluster.Status.ProcessGroups))
194197
for _, processGroup := range cluster.Status.ProcessGroups {
195-
if processGroup.Remove {
198+
if processGroup.Remove && removedProcessGroups[processGroup.ProcessGroupID] {
196199
addresses = append(addresses, processGroup.Addresses...)
197200
hasStatusUpdate = true
198201
} else {

0 commit comments

Comments
 (0)