Skip to content

Commit 95f77b0

Browse files
authored
Clean up managed resources when disabled (#255)
* Reconciler now removes un-used managed resources for CWA collector
1 parent 313843f commit 95f77b0

File tree

3 files changed

+146
-6
lines changed

3 files changed

+146
-6
lines changed

controllers/amazoncloudwatchagent_controller.go

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,17 @@ import (
1111
appsv1 "k8s.io/api/apps/v1"
1212
corev1 "k8s.io/api/core/v1"
1313
apierrors "k8s.io/apimachinery/pkg/api/errors"
14+
"k8s.io/apimachinery/pkg/labels"
1415
"k8s.io/apimachinery/pkg/runtime"
16+
"k8s.io/apimachinery/pkg/types"
1517
"k8s.io/client-go/tools/record"
1618
ctrl "sigs.k8s.io/controller-runtime"
1719
"sigs.k8s.io/controller-runtime/pkg/client"
1820

1921
"github.com/aws/amazon-cloudwatch-agent-operator/apis/v1alpha1"
2022
"github.com/aws/amazon-cloudwatch-agent-operator/internal/config"
2123
"github.com/aws/amazon-cloudwatch-agent-operator/internal/manifests"
24+
"github.com/aws/amazon-cloudwatch-agent-operator/internal/manifests/manifestutils"
2225
collectorStatus "github.com/aws/amazon-cloudwatch-agent-operator/internal/status/collector"
2326
)
2427

@@ -40,6 +43,79 @@ type Params struct {
4043
Config config.Config
4144
}
4245

46+
func (r *AmazonCloudWatchAgentReconciler) findCloudWatchAgentOwnedObjects(ctx context.Context, owner v1alpha1.AmazonCloudWatchAgent) (map[types.UID]client.Object, error) {
47+
// Define a map to store the owned objects
48+
ownedObjects := make(map[types.UID]client.Object)
49+
selector := manifestutils.SelectorLabelsForAllOperatorManaged(owner.ObjectMeta)
50+
listOps := &client.ListOptions{
51+
Namespace: owner.Namespace,
52+
LabelSelector: labels.SelectorFromSet(selector),
53+
}
54+
// Define lists for different Kubernetes resources
55+
configMapList := &corev1.ConfigMapList{}
56+
serviceList := &corev1.ServiceList{}
57+
serviceAccountList := &corev1.ServiceAccountList{}
58+
deploymentList := &appsv1.DeploymentList{}
59+
statefulSetList := &appsv1.StatefulSetList{}
60+
daemonSetList := &appsv1.DaemonSetList{}
61+
var err error
62+
63+
// List ConfigMaps
64+
err = r.List(ctx, configMapList, listOps)
65+
if err != nil {
66+
return nil, err
67+
}
68+
for i := range configMapList.Items {
69+
ownedObjects[configMapList.Items[i].GetUID()] = &configMapList.Items[i]
70+
}
71+
72+
// List Services
73+
err = r.List(ctx, serviceList, listOps)
74+
if err != nil {
75+
return nil, err
76+
}
77+
for i := range serviceList.Items {
78+
ownedObjects[serviceList.Items[i].GetUID()] = &serviceList.Items[i]
79+
}
80+
// List ServiceAccounts
81+
err = r.List(ctx, serviceAccountList, listOps)
82+
if err != nil {
83+
return nil, err
84+
}
85+
for i := range serviceAccountList.Items {
86+
ownedObjects[serviceAccountList.Items[i].GetUID()] = &serviceAccountList.Items[i]
87+
}
88+
89+
// List Deployments
90+
err = r.List(ctx, deploymentList, listOps)
91+
if err != nil {
92+
return nil, err
93+
}
94+
for i := range deploymentList.Items {
95+
ownedObjects[deploymentList.Items[i].GetUID()] = &deploymentList.Items[i]
96+
}
97+
98+
// List StatefulSets
99+
err = r.List(ctx, statefulSetList, listOps)
100+
if err != nil {
101+
return nil, err
102+
}
103+
for i := range statefulSetList.Items {
104+
ownedObjects[statefulSetList.Items[i].GetUID()] = &statefulSetList.Items[i]
105+
}
106+
107+
// List DaemonSets
108+
err = r.List(ctx, daemonSetList, listOps)
109+
if err != nil {
110+
return nil, err
111+
}
112+
for i := range daemonSetList.Items {
113+
ownedObjects[daemonSetList.Items[i].GetUID()] = &daemonSetList.Items[i]
114+
}
115+
116+
return ownedObjects, nil
117+
118+
}
43119
func (r *AmazonCloudWatchAgentReconciler) getParams(instance v1alpha1.AmazonCloudWatchAgent) manifests.Params {
44120
return manifests.Params{
45121
Config: r.config,
@@ -108,7 +184,8 @@ func (r *AmazonCloudWatchAgentReconciler) Reconcile(ctx context.Context, req ctr
108184
if buildErr != nil {
109185
return ctrl.Result{}, buildErr
110186
}
111-
err := reconcileDesiredObjects(ctx, r.Client, log, &params.OtelCol, params.Scheme, desiredObjects...)
187+
188+
err := reconcileDesiredObjectsWPrune(ctx, r.Client, log, params.OtelCol, params.Scheme, desiredObjects, r.findCloudWatchAgentOwnedObjects)
112189
return collectorStatus.HandleReconcileStatus(ctx, log, params, err)
113190
}
114191

controllers/common.go

Lines changed: 60 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
rbacv1 "k8s.io/api/rbac/v1"
1414
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1515
"k8s.io/apimachinery/pkg/runtime"
16+
"k8s.io/apimachinery/pkg/types"
1617
"k8s.io/client-go/util/retry"
1718
ctrl "sigs.k8s.io/controller-runtime"
1819
"sigs.k8s.io/controller-runtime/pkg/client"
@@ -56,10 +57,12 @@ func BuildCollector(params manifests.Params) ([]client.Object, error) {
5657
}
5758
return resources, nil
5859
}
59-
60-
// reconcileDesiredObjects runs the reconcile process using the mutateFn over the given list of objects.
61-
func reconcileDesiredObjects(ctx context.Context, kubeClient client.Client, logger logr.Logger, owner metav1.Object, scheme *runtime.Scheme, desiredObjects ...client.Object) error {
60+
func reconcileDesiredObjectUIDs(ctx context.Context, kubeClient client.Client, logger logr.Logger,
61+
owner metav1.Object, scheme *runtime.Scheme, desiredObjects ...client.Object) (map[types.UID]client.Object, error) {
6262
var errs []error
63+
existingObjectMap := make(map[types.UID]client.Object)
64+
var existingObjectList []client.Object
65+
6366
for _, desired := range desiredObjects {
6467
l := logger.WithValues(
6568
"object_name", desired.GetName(),
@@ -76,6 +79,8 @@ func reconcileDesiredObjects(ctx context.Context, kubeClient client.Client, logg
7679
// existing is an object the controller runtime will hydrate for us
7780
// we obtain the existing object by deep copying the desired object because it's the most convenient way
7881
existing := desired.DeepCopyObject().(client.Object)
82+
existingObjectList = append(existingObjectList, existing) //uid are not assigned yet
83+
7984
mutateFn := manifests.MutateFuncFor(existing, desired)
8085
var op controllerutil.OperationResult
8186
crudErr := retry.RetryOnConflict(retry.DefaultRetry, func() error {
@@ -87,7 +92,7 @@ func reconcileDesiredObjects(ctx context.Context, kubeClient client.Client, logg
8792
l.Error(crudErr, "detected immutable field change, trying to delete, new object will be created on next reconcile", "existing", existing.GetName())
8893
delErr := kubeClient.Delete(ctx, existing)
8994
if delErr != nil {
90-
return delErr
95+
return nil, delErr
9196
}
9297
continue
9398
} else if crudErr != nil {
@@ -99,11 +104,61 @@ func reconcileDesiredObjects(ctx context.Context, kubeClient client.Client, logg
99104
l.V(1).Info(fmt.Sprintf("desired has been %s", op))
100105
}
101106
if len(errs) > 0 {
102-
return fmt.Errorf("failed to create objects for %s: %w", owner.GetName(), errors.Join(errs...))
107+
return nil, fmt.Errorf("failed to create objects for %s: %w", owner.GetName(), errors.Join(errs...))
108+
}
109+
for _, obj := range existingObjectList {
110+
existingObjectMap[obj.GetUID()] = obj
111+
}
112+
return existingObjectMap, nil
113+
}
114+
115+
func reconcileDesiredObjectsWPrune(ctx context.Context, kubeClient client.Client, logger logr.Logger, owner v1alpha1.AmazonCloudWatchAgent, scheme *runtime.Scheme,
116+
desiredObjects []client.Object,
117+
searchOwnedObjectsFunc func(ctx context.Context, owner v1alpha1.AmazonCloudWatchAgent) (map[types.UID]client.Object, error),
118+
) error {
119+
previouslyOwnedObjects, err := searchOwnedObjectsFunc(ctx, owner)
120+
if err != nil {
121+
return fmt.Errorf("failed to search owned objects: %w", err)
122+
}
123+
124+
desiredObjectMap, err := reconcileDesiredObjectUIDs(ctx, kubeClient, logger, &owner, scheme, desiredObjects...)
125+
126+
// Pruning owned objects in the cluster which are not should not be present after the reconciliation.
127+
err = pruneStaleObjects(ctx, kubeClient, logger, previouslyOwnedObjects, desiredObjectMap)
128+
if err != nil {
129+
return fmt.Errorf("failed to prune objects for %s: %w", owner.GetName(), err)
103130
}
104131
return nil
105132
}
106133

134+
// reconcileDesiredObjects runs the reconcile process using the mutateFn over the given list of objects.
135+
func reconcileDesiredObjects(ctx context.Context, kubeClient client.Client, logger logr.Logger, owner metav1.Object, scheme *runtime.Scheme, desiredObjects ...client.Object) error {
136+
_, err := reconcileDesiredObjectUIDs(ctx, kubeClient, logger, owner, scheme, desiredObjects...)
137+
return err
138+
}
139+
140+
func pruneStaleObjects(ctx context.Context, kubeClient client.Client, logger logr.Logger, previouslyOwnedMap, desiredMap map[types.UID]client.Object) error {
141+
// Pruning owned objects in the cluster which should not be present after the reconciliation.
142+
var pruneErrs []error
143+
for uid, obj := range previouslyOwnedMap {
144+
l := logger.WithValues(
145+
"object_name", obj.GetName(),
146+
"object_kind", obj.GetObjectKind().GroupVersionKind().Kind,
147+
)
148+
if _, found := desiredMap[uid]; found {
149+
continue
150+
}
151+
152+
l.Info("pruning unmanaged resource")
153+
err := kubeClient.Delete(ctx, obj)
154+
if err != nil {
155+
l.Error(err, "failed to delete resource")
156+
pruneErrs = append(pruneErrs, err)
157+
}
158+
}
159+
return errors.Join(pruneErrs...)
160+
}
161+
107162
func enabledAcceleratedComputeByAgentConfig(ctx context.Context, c client.Client, log logr.Logger) bool {
108163
agentResource := getAmazonCloudWatchAgentResource(ctx, c)
109164
// missing feature flag means it's on by default

internal/manifests/manifestutils/labels.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,3 +72,11 @@ func SelectorLabels(instance metav1.ObjectMeta, component string) map[string]str
7272
"app.kubernetes.io/component": component,
7373
}
7474
}
75+
76+
func SelectorLabelsForAllOperatorManaged(instance metav1.ObjectMeta) map[string]string {
77+
return map[string]string{
78+
"app.kubernetes.io/managed-by": "amazon-cloudwatch-agent-operator",
79+
"app.kubernetes.io/instance": naming.Truncate("%s.%s", 63, instance.Namespace, instance.Name),
80+
"app.kubernetes.io/part-of": "amazon-cloudwatch-agent",
81+
}
82+
}

0 commit comments

Comments
 (0)