Skip to content

Commit a4e350b

Browse files
committed
Add Final Resource Check to rmdata
Adds a final check to the rmdata process that verifies all resources have been deleted. If the final check fails, an error is thrown and the Job will fail. This allows users to monitor for the completion condition in the Job to verify that all resources have been successfully deleted. Additionally, the pgcluster is now deleted last as part of the rmdata process (assuming the rmdata process has been triggered via a pgtask, and not by deleting a pgcluster). This means the pgcluster will remain visible in the environment until all resources have been removed. Please note that the "DISABLE_FINAL_CHECK" environment variable can be set to true to disable the new "final check" behavior, essentially reverting back to previous behavior in which the Job might complete despite all resources not being fully deleted/removed. Additionally, as of this commit wait logic has been implemented for any/all Deployments and PVCs being deleted. [sc-13106]
1 parent 2d2f2c6 commit a4e350b

File tree

4 files changed

+211
-41
lines changed

4 files changed

+211
-41
lines changed

cmd/pgo-rmdata/main.go

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,19 @@ func main() {
6161

6262
request.Clientset = client
6363

64+
// create a dynamic client using the same REST config as the typed client
65+
dynamicClient, err := kubeapi.NewDynamicClientForConfig(client.Config)
66+
if err != nil {
67+
log.Fatalln(err)
68+
}
69+
70+
request.DynamicClient = dynamicClient
71+
6472
log.Infoln("pgo-rmdata starts")
6573
log.Infof("request is %s", request.String())
6674

67-
Delete(request)
75+
// if an error occurs while deleting, then exit with exit code 1
76+
if err := Delete(request); err != nil {
77+
log.Fatalln(err)
78+
}
6879
}

cmd/pgo-rmdata/process.go

Lines changed: 190 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,23 @@ import (
1919
"context"
2020
"errors"
2121
"fmt"
22+
"os"
23+
"strconv"
2224
"strings"
2325
"time"
2426

2527
"github.com/crunchydata/postgres-operator/internal/config"
2628
"github.com/crunchydata/postgres-operator/internal/util"
29+
crv1 "github.com/crunchydata/postgres-operator/pkg/apis/crunchydata.com/v1"
30+
appsv1 "k8s.io/api/apps/v1"
31+
batchv1 "k8s.io/api/batch/v1"
32+
corev1 "k8s.io/api/core/v1"
2733

2834
log "github.com/sirupsen/logrus"
2935
kerror "k8s.io/apimachinery/pkg/api/errors"
3036
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
37+
"k8s.io/apimachinery/pkg/runtime/schema"
38+
"k8s.io/apimachinery/pkg/util/wait"
3139
)
3240

3341
const (
@@ -46,10 +54,13 @@ const (
4654
syncConfigMapSuffix = "sync"
4755
)
4856

49-
func Delete(request Request) {
57+
func Delete(request Request) error {
5058
ctx := context.TODO()
5159
log.Infof("rmdata.Process %v", request)
5260

61+
// defines the number of PVCs & Secrets that should be retained
62+
retainPVCCount, retainSecretCount := 0, 0
63+
5364
// the case of 'pgo scaledown'
5465
if request.IsReplica {
5566
log.Info("rmdata.Process scaledown replica use case")
@@ -69,7 +80,7 @@ func Delete(request Request) {
6980
// is no longer a primary, and has become a replica.
7081
if !(request.ReplicaName == request.ClusterPGHAScope && kerror.IsNotFound(err)) {
7182
log.Error(err)
72-
return
83+
return nil
7384
}
7485
log.Debug("replica name matches PGHA scope, assuming scale down of original primary" +
7586
"and therefore ignoring error attempting to delete nonexistent pgreplica")
@@ -85,7 +96,7 @@ func Delete(request Request) {
8596
}
8697

8798
// scale down is its own use case so we leave when done
88-
return
99+
return nil
89100
}
90101

91102
if request.IsBackup {
@@ -96,30 +107,29 @@ func Delete(request Request) {
96107
removeLogicalBackupPVCs(request)
97108
// this is the special case of removing an ad hoc backup removal, so we can
98109
// exit here
99-
return
110+
return nil
100111
}
101112

102113
log.Info("rmdata.Process cluster use case")
103114

104-
// attempt to delete the pgcluster object if it has not already been deleted.
105-
// quite possibly, we are here because one deleted the pgcluster object
106-
// already, so this step is optional
107-
if _, err := request.Clientset.CrunchydataV1().Pgclusters(request.Namespace).Get(
108-
ctx, request.ClusterName, metav1.GetOptions{}); err == nil {
109-
if err := request.Clientset.CrunchydataV1().Pgclusters(request.Namespace).Delete(
110-
ctx, request.ClusterName, metav1.DeleteOptions{}); err != nil {
111-
log.Error(err)
112-
}
113-
}
114-
115115
// clear out any of the scheduled jobs that may occur, as this would be
116116
// executing asynchronously against any stale data
117117
removeSchedules(request)
118118

119-
// the user had done something like:
120-
// pgo delete cluster mycluster --delete-data
119+
// Now remove any cluster secrets other that those for pgBackRest.
120+
// Note that these secrets are only removed when Postgres data is also being deleted.
121+
secretSelector := fmt.Sprintf("%s=%s,%s!=%s", config.LABEL_PG_CLUSTER, request.ClusterName,
122+
config.LABEL_PGO_BACKREST_REPO, config.LABEL_TRUE)
123+
secrets, err := request.Clientset.
124+
CoreV1().Secrets(request.Namespace).
125+
List(ctx, metav1.ListOptions{LabelSelector: secretSelector})
126+
if err != nil {
127+
log.Error(err)
128+
}
121129
if request.RemoveData {
122-
removeUserSecrets(request)
130+
removeUserSecrets(request, secrets.Items)
131+
} else {
132+
retainSecretCount += len(secrets.Items)
123133
}
124134

125135
// remove the cluster Deployments
@@ -129,15 +139,17 @@ func Delete(request Request) {
129139
removePgreplicas(request)
130140
removePgtasks(request)
131141
removeClusterConfigmaps(request)
132-
// removeClusterJobs(request)
142+
143+
pvcList, err := getInstancePVCs(request)
144+
if err != nil {
145+
log.Error(err)
146+
}
133147
if request.RemoveData {
134-
if pvcList, err := getInstancePVCs(request); err != nil {
135-
log.Error(err)
136-
} else {
137-
log.Debugf("rmdata pvc list: [%v]", pvcList)
148+
log.Debugf("rmdata pvc list: [%v]", pvcList)
138149

139-
removePVCs(pvcList, request)
140-
}
150+
removePVCs(pvcList, request)
151+
} else {
152+
retainPVCCount += len(pvcList)
141153
}
142154

143155
// backups have to be the last thing we remove. We want to ensure that all
@@ -158,9 +170,122 @@ func Delete(request Request) {
158170
if request.RemoveBackup {
159171
removeBackupSecrets(request)
160172
removeAllBackupPVCs(request)
173+
} else {
174+
retainPVCCount++
175+
retainSecretCount++
161176
}
162177
// remove the bootstrap secret if present
163178
removeBootstrapSecret(request)
179+
180+
// Do not perform a final check if DISABLE_FINAL_CHECK is set to "true"
181+
if disable, _ := strconv.ParseBool(os.Getenv("DISABLE_FINAL_CHECK")); !disable {
182+
log.Info("Now verifying that all resources have been successfully removed")
183+
// verify that all resources have been remvoed
184+
if err := verifyResourcesDeleted(request, retainPVCCount,
185+
retainSecretCount); err != nil {
186+
return err
187+
}
188+
} else {
189+
log.Info("Final resource check disabled")
190+
}
191+
192+
// attempt to delete the pgcluster object if it has not already been deleted.
193+
// quite possibly, we are here because one deleted the pgcluster object
194+
// already, so this step is optional
195+
// please note that the pgcluster is deleted last to ensure all resources are
196+
// successfully removed prior to deleting the pgcluster
197+
if _, err := request.Clientset.CrunchydataV1().Pgclusters(request.Namespace).Get(
198+
ctx, request.ClusterName, metav1.GetOptions{}); err == nil {
199+
if err := request.Clientset.CrunchydataV1().Pgclusters(request.Namespace).Delete(
200+
ctx, request.ClusterName, metav1.DeleteOptions{}); err != nil {
201+
return err
202+
}
203+
}
204+
205+
return nil
206+
}
207+
208+
// verifyResourcesDeleted performs a final check of the environment to verify that any resources
209+
// expected to be deleted using the rmdata process have indeed been deleted from the environment.
210+
func verifyResourcesDeleted(request Request, retainPVCCount, retainSecretCount int) error {
211+
ctx := context.TODO()
212+
213+
// we check for the all of resources using the "pg-cluster" label
214+
pgClusterSelector := config.LABEL_PG_CLUSTER + "=" + request.ClusterName
215+
// we check for configMaps created by Patroni using the "crunchy-pgha-scope" label
216+
pgHAScopeSelector := config.LABEL_PGHA_SCOPE + "=" + request.ClusterName
217+
218+
// create slice containing the various resources we want to check for
219+
pgClusterResources := make([]schema.GroupVersionResource, 9)
220+
position := 0
221+
addResource := func(resource schema.GroupVersionResource) {
222+
pgClusterResources[position] = resource
223+
position++
224+
}
225+
addResource(appsv1.SchemeGroupVersion.WithResource("deployments"))
226+
addResource(batchv1.SchemeGroupVersion.WithResource("jobs"))
227+
addResource(corev1.SchemeGroupVersion.WithResource("configmaps"))
228+
addResource(corev1.SchemeGroupVersion.WithResource("persistentvolumeclaims"))
229+
addResource(corev1.SchemeGroupVersion.WithResource("pods"))
230+
addResource(corev1.SchemeGroupVersion.WithResource("secrets"))
231+
addResource(corev1.SchemeGroupVersion.WithResource("services"))
232+
addResource(crv1.SchemeGroupVersion.WithResource("pgreplicas"))
233+
addResource(crv1.SchemeGroupVersion.WithResource("pgtasks"))
234+
235+
for _, resource := range pgClusterResources {
236+
237+
// The number of resources expected to be found for a specific resource. Typically we
238+
// expect to find zero resources, but depending on data retention settings, this value
239+
// might need to be adjusted.
240+
expectedResourceCount := 0
241+
242+
// Depending on whether or not data and/or backups are being retained, we might expect to
243+
// find PVCs and Secrets in the environment on a successful deletion. Therefore, adjust
244+
// the number of expected resources based on the number of PVCs or Secrets we expect to
245+
// find.
246+
if resource.Resource == "persistentvolumeclaims" {
247+
expectedResourceCount = retainPVCCount
248+
} else if resource.Resource == "secrets" {
249+
expectedResourceCount = retainSecretCount
250+
}
251+
252+
// filter out the rmdata job
253+
if resource.Resource == "jobs" {
254+
pgClusterSelector += fmt.Sprintf(",%s!=%s", config.LABEL_RMDATA, config.LABEL_TRUE)
255+
}
256+
257+
resources, err := request.DynamicClient.Resource(resource).Namespace(request.Namespace).
258+
List(ctx, metav1.ListOptions{LabelSelector: pgClusterSelector})
259+
if err != nil {
260+
// if we cannot successfully list resources then we also cannot verify all resources
261+
// have been deleted, so an error is returned
262+
log.Error(err)
263+
return err
264+
}
265+
266+
// if more resources are found than expected, return an error
267+
if len(resources.Items) > expectedResourceCount {
268+
return fmt.Errorf("%s resources still exist", resources.GetKind())
269+
}
270+
271+
// ConfigMaps created by patroni need to be checked using the a selector for
272+
// 'crunchy-pgha-scope'. Therefore look for additional ConfigMaps using this
273+
// label. If an error occurs when attempting to list ConfigMaps, or if any
274+
// more ConfigMaps with this label are found, then return an error.
275+
if resource.Resource == "configmaps" {
276+
resources, err := request.DynamicClient.Resource(resource).Namespace(request.Namespace).
277+
List(ctx, metav1.ListOptions{LabelSelector: pgHAScopeSelector})
278+
if err != nil {
279+
log.Error(err)
280+
return err
281+
}
282+
if len(resources.Items) > 0 {
283+
return fmt.Errorf("%s resources still exist", resources.GetKind())
284+
}
285+
}
286+
}
287+
288+
return nil
164289
}
165290

166291
// removeBackRestRepo removes the pgBackRest repo that is associated with the
@@ -180,6 +305,16 @@ func removeBackrestRepo(request Request) {
180305
log.Error(err)
181306
}
182307

308+
if err := wait.Poll(time.Second, time.Minute, func() (bool, error) {
309+
if _, err := request.Clientset.AppsV1().Deployments(request.Namespace).
310+
Get(ctx, deploymentName, metav1.GetOptions{}); err == nil || !kerror.IsNotFound(err) {
311+
return false, nil
312+
}
313+
return true, nil
314+
}); err != nil {
315+
log.Error("could not terminate the pgBackRest repo deployment")
316+
}
317+
183318
// delete the service for the backrest repo
184319
err = request.Clientset.
185320
CoreV1().Services(request.Namespace).
@@ -359,20 +494,10 @@ func removeReplica(request Request) error {
359494
return nil
360495
}
361496

362-
func removeUserSecrets(request Request) {
497+
func removeUserSecrets(request Request, secrets []corev1.Secret) {
363498
ctx := context.TODO()
364-
// get all that match pg-cluster=db
365-
selector := config.LABEL_PG_CLUSTER + "=" + request.ClusterName
366499

367-
secrets, err := request.Clientset.
368-
CoreV1().Secrets(request.Namespace).
369-
List(ctx, metav1.ListOptions{LabelSelector: selector})
370-
if err != nil {
371-
log.Error(err)
372-
return
373-
}
374-
375-
for _, s := range secrets.Items {
500+
for _, s := range secrets {
376501
if s.ObjectMeta.Labels[config.LABEL_PGO_BACKREST_REPO] == "" {
377502
err := request.Clientset.CoreV1().Secrets(request.Namespace).Delete(ctx, s.ObjectMeta.Name, metav1.DeleteOptions{})
378503
if err != nil {
@@ -389,9 +514,22 @@ func removeAddons(request Request) {
389514
pgbouncerDepName := request.ClusterName + "-pgbouncer"
390515

391516
deletePropagation := metav1.DeletePropagationForeground
392-
_ = request.Clientset.
393-
AppsV1().Deployments(request.Namespace).
394-
Delete(ctx, pgbouncerDepName, metav1.DeleteOptions{PropagationPolicy: &deletePropagation})
517+
if err := request.Clientset.AppsV1().Deployments(request.Namespace).
518+
Delete(ctx, pgbouncerDepName,
519+
metav1.DeleteOptions{PropagationPolicy: &deletePropagation}); err != nil {
520+
log.Error(err)
521+
return
522+
}
523+
524+
if err := wait.Poll(time.Second, time.Minute, func() (bool, error) {
525+
if _, err := request.Clientset.AppsV1().Deployments(request.Namespace).
526+
Get(ctx, pgbouncerDepName, metav1.GetOptions{}); err == nil || !kerror.IsNotFound(err) {
527+
return false, nil
528+
}
529+
return true, nil
530+
}); err != nil {
531+
log.Error("could not terminate the pgBouncer deployment")
532+
}
395533

396534
// delete the service name=<clustename>-pgbouncer
397535

@@ -578,6 +716,18 @@ func removePVCs(pvcList []string, request Request) {
578716
log.Error(err)
579717
}
580718
}
719+
720+
if err := wait.Poll(time.Second, time.Minute, func() (bool, error) {
721+
for _, pvcName := range pvcList {
722+
if _, err := request.Clientset.AppsV1().Deployments(request.Namespace).
723+
Get(ctx, pvcName, metav1.GetOptions{}); err == nil || !kerror.IsNotFound(err) {
724+
return false, nil
725+
}
726+
}
727+
return true, nil
728+
}); err != nil {
729+
log.Error("could not remove the logical backup pvcs")
730+
}
581731
}
582732

583733
// removeBackupJobs removes any job associated with a backup. These include:

cmd/pgo-rmdata/types.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,12 @@ import (
1919
"fmt"
2020

2121
"github.com/crunchydata/postgres-operator/internal/kubeapi"
22+
"k8s.io/client-go/dynamic"
2223
)
2324

2425
type Request struct {
2526
Clientset kubeapi.Interface
27+
DynamicClient dynamic.Interface
2628
RemoveData bool
2729
RemoveBackup bool
2830
IsBackup bool

internal/kubeapi/client_config.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ package kubeapi
1616
*/
1717

1818
import (
19+
"k8s.io/client-go/dynamic"
1920
"k8s.io/client-go/kubernetes"
2021
"k8s.io/client-go/kubernetes/scheme"
2122
"k8s.io/client-go/rest"
@@ -101,3 +102,9 @@ func NewClientForConfig(config *rest.Config) (*Client, error) {
101102

102103
return client, err
103104
}
105+
106+
// NewDynamicClientForConfig returns a dynamic client that is created and configured using the
107+
// provided REST configuration.
108+
func NewDynamicClientForConfig(config *rest.Config) (dynamic.Interface, error) {
109+
return dynamic.NewForConfig(config)
110+
}

0 commit comments

Comments
 (0)