Skip to content

Commit 2de8dad

Browse files
authored
OCPBUGS-32592: Adjust controllers to handle backup and restore (#138)
* OCPBUGS-32592: Vendoring for patch * OCPBUGS-32592: Adjust controllers to handle backup and restore Handle pause annotation in AgentCluster and AgentMachine. Labels assisted-service CRs with its reference in case the AgentCluster and/or AgentMachine are destroyed and restored.
1 parent d0f3139 commit 2de8dad

File tree

10 files changed

+875
-61
lines changed

10 files changed

+875
-61
lines changed

controllers/agentcluster_controller.go

Lines changed: 152 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -35,13 +35,21 @@ import (
3535
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
3636
"k8s.io/apimachinery/pkg/runtime"
3737
"k8s.io/apimachinery/pkg/types"
38+
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
3839
clusterutilv1 "sigs.k8s.io/cluster-api/util"
40+
"sigs.k8s.io/cluster-api/util/patch"
3941
ctrl "sigs.k8s.io/controller-runtime"
4042
"sigs.k8s.io/controller-runtime/pkg/client"
43+
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
4144
)
4245

4346
const (
4447
agentClusterDependenciesWaitTime = 5 * time.Second
48+
AgentClusterRefLabel = "agentClusterRef"
49+
)
50+
51+
var (
52+
agentClusterFinalizer = "agentcluster" + capiproviderv1.GroupVersion.Group + "/deprovision"
4553
)
4654

4755
// AgentClusterReconciler reconciles a AgentCluster object
@@ -66,16 +74,13 @@ type ControlPlane struct {
6674
//+kubebuilder:rbac:groups=extensions.hive.openshift.io,resources=agentclusterinstalls,verbs=get;list;watch;create;update;patch;delete
6775
//+kubebuilder:rbac:groups=hypershift.openshift.io,resources=hostedcontrolplanes,verbs=get;list;watch;
6876

69-
func (r *AgentClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
77+
func (r *AgentClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, rerr error) {
7078
log := r.Log.WithFields(
7179
logrus.Fields{
7280
"agent_cluster": req.Name,
7381
"agent_cluster_namespace": req.Namespace,
7482
})
7583

76-
defer func() {
77-
log.Info("AgentCluster Reconcile ended")
78-
}()
7984
log.Info("AgentCluster Reconcile start")
8085

8186
agentCluster := &capiproviderv1.AgentCluster{}
@@ -84,17 +89,54 @@ func (r *AgentClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request
8489
return ctrl.Result{}, client.IgnoreNotFound(err)
8590
}
8691

87-
// If the agentCluster has no reference to a ClusterDeployment, create one
92+
patchHelper, err := patch.NewHelper(agentCluster, r.Client)
93+
if err != nil {
94+
return ctrl.Result{}, err
95+
}
96+
defer func() {
97+
if rerr := patchHelper.Patch(ctx, agentCluster); rerr != nil {
98+
log.WithError(err).Errorf("failed patching AgentCluster")
99+
}
100+
log.Info("AgentCluster Reconcile ended")
101+
}()
102+
103+
if !agentCluster.DeletionTimestamp.IsZero() {
104+
if err = r.handleDeletion(ctx, agentCluster); err != nil {
105+
log.WithError(err).Errorf("failed to remove AgentCluster")
106+
return ctrl.Result{}, err
107+
}
108+
return ctrl.Result{}, nil
109+
}
110+
111+
if !controllerutil.ContainsFinalizer(agentCluster, agentClusterFinalizer) {
112+
controllerutil.AddFinalizer(agentCluster, agentClusterFinalizer)
113+
}
114+
115+
if paused := agentCluster.Annotations[clusterv1.PausedAnnotation]; paused == "true" {
116+
log.Info("Skipping reconcile of AgentCluster as it's paused, but orphan its resources")
117+
if err = r.orphanClusterDeployment(ctx, agentCluster); err != nil {
118+
return ctrl.Result{}, err
119+
}
120+
return ctrl.Result{}, nil
121+
}
122+
123+
// If the agentCluster has no reference to a ClusterDeployment, find or create one
88124
if agentCluster.Status.ClusterDeploymentRef.Name == "" {
89-
return r.createClusterDeployment(ctx, log, agentCluster)
125+
return r.findOrCreateClusterDeployment(ctx, log, agentCluster)
90126
}
127+
91128
clusterDeployment := &hivev1.ClusterDeployment{}
92-
err := r.Get(ctx, types.NamespacedName{Namespace: agentCluster.Status.ClusterDeploymentRef.Namespace, Name: agentCluster.Status.ClusterDeploymentRef.Name}, clusterDeployment)
93-
if err != nil {
129+
if err = r.Get(ctx, types.NamespacedName{Namespace: agentCluster.Status.ClusterDeploymentRef.Namespace, Name: agentCluster.Status.ClusterDeploymentRef.Name}, clusterDeployment); err != nil {
94130
log.WithError(err).Error("Failed to get ClusterDeployment")
95131
return ctrl.Result{}, err
96132
}
97133

134+
err = r.ensureOwnedClusterDeployment(ctx, agentCluster, clusterDeployment)
135+
if err != nil {
136+
log.WithError(err).Errorf("failed to ensure ClusterDeployment %s is owned by AgentCluster %s", clusterDeployment.Name, agentCluster.Name)
137+
return ctrl.Result{}, err
138+
}
139+
98140
err = r.ensureAgentClusterInstall(ctx, log, clusterDeployment, agentCluster)
99141
if err != nil {
100142
return ctrl.Result{}, err
@@ -175,6 +217,38 @@ func (r *AgentClusterReconciler) getControlPlane(ctx context.Context, log logrus
175217
return &controlPlane, nil
176218
}
177219

220+
func (r *AgentClusterReconciler) findOrCreateClusterDeployment(ctx context.Context, log logrus.FieldLogger, agentCluster *capiproviderv1.AgentCluster) (ctrl.Result, error) {
221+
clusterDeployment, err := r.findClusterDeployment(ctx, agentCluster)
222+
if err != nil {
223+
return ctrl.Result{}, err
224+
}
225+
if clusterDeployment != nil {
226+
log.Infof("Found previously created clusterDeployment referencing agentCluster %s. Re-adding agentCluster status to reference clusterDeployment %s", agentCluster.Name, clusterDeployment.Name)
227+
agentCluster.Status.ClusterDeploymentRef.Name = clusterDeployment.Name
228+
agentCluster.Status.ClusterDeploymentRef.Namespace = clusterDeployment.Namespace
229+
return ctrl.Result{}, nil
230+
}
231+
return r.createClusterDeployment(ctx, log, agentCluster)
232+
}
233+
234+
func (r *AgentClusterReconciler) findClusterDeployment(ctx context.Context, agentCluster *capiproviderv1.AgentCluster) (*hivev1.ClusterDeployment, error) {
235+
labelSelector := metav1.LabelSelector{MatchLabels: map[string]string{AgentClusterRefLabel: agentCluster.Name}}
236+
selector, err := metav1.LabelSelectorAsSelector(&labelSelector)
237+
if err != nil {
238+
return nil, err
239+
}
240+
241+
clusterDeployments := &hivev1.ClusterDeploymentList{}
242+
if err := r.Client.List(ctx, clusterDeployments, &client.ListOptions{LabelSelector: selector}); err != nil {
243+
return nil, err
244+
}
245+
246+
if len(clusterDeployments.Items) == 0 {
247+
return nil, nil
248+
}
249+
return &clusterDeployments.Items[0], nil
250+
}
251+
178252
func (r *AgentClusterReconciler) createClusterDeploymentObject(agentCluster *capiproviderv1.AgentCluster,
179253
controlPlane *ControlPlane) *hivev1.ClusterDeployment {
180254
var kubeadminPassword *corev1.LocalObjectReference
@@ -193,6 +267,9 @@ func (r *AgentClusterReconciler) createClusterDeploymentObject(agentCluster *cap
193267
Name: agentCluster.Name,
194268
UID: agentCluster.UID,
195269
}},
270+
Labels: map[string]string{
271+
AgentClusterRefLabel: agentCluster.Name,
272+
},
196273
},
197274
Spec: hivev1.ClusterDeploymentSpec{
198275
Installed: true,
@@ -232,7 +309,6 @@ func (r *AgentClusterReconciler) createClusterDeployment(ctx context.Context, lo
232309
clusterDeployment := r.createClusterDeploymentObject(agentCluster, controlPlane)
233310

234311
r.labelControlPlaneSecrets(ctx, controlPlane, agentCluster.Namespace)
235-
236312
agentCluster.Status.ClusterDeploymentRef.Name = clusterDeployment.Name
237313
agentCluster.Status.ClusterDeploymentRef.Namespace = clusterDeployment.Namespace
238314
if err = r.Client.Create(ctx, clusterDeployment); err != nil {
@@ -243,13 +319,31 @@ func (r *AgentClusterReconciler) createClusterDeployment(ctx context.Context, lo
243319
return ctrl.Result{}, err
244320
}
245321
}
246-
if err = r.Client.Status().Update(ctx, agentCluster); err != nil {
247-
log.WithError(err).Error("Failed to update status")
248-
return ctrl.Result{}, err
249-
}
250322
return ctrl.Result{}, nil
251323
}
252324

325+
// ensureOwnedClusterDeployment makes sure that the ClusterDeployment has its owner set to this AgentCluster
326+
// and that the ClusterDeployment has a label referencing this AgentCluster.
327+
func (r *AgentClusterReconciler) ensureOwnedClusterDeployment(ctx context.Context, agentCluster *capiproviderv1.AgentCluster, clusterDeployment *hivev1.ClusterDeployment) error {
328+
alreadyOwned := clusterutilv1.IsOwnedByObject(clusterDeployment, agentCluster)
329+
agentClusterRef := clusterDeployment.ObjectMeta.Labels[AgentClusterRefLabel]
330+
if alreadyOwned && agentClusterRef != "" && agentClusterRef == agentCluster.Name {
331+
return nil
332+
}
333+
patch := client.MergeFrom(clusterDeployment.DeepCopy())
334+
if err := controllerutil.SetOwnerReference(agentCluster, clusterDeployment, r.Scheme); err != nil {
335+
return err
336+
}
337+
if clusterDeployment.ObjectMeta.Labels == nil {
338+
clusterDeployment.ObjectMeta.Labels = make(map[string]string)
339+
}
340+
clusterDeployment.ObjectMeta.Labels[AgentClusterRefLabel] = agentCluster.Name
341+
if err := r.Client.Patch(ctx, clusterDeployment, patch); err != nil {
342+
return err
343+
}
344+
return nil
345+
}
346+
253347
func (r *AgentClusterReconciler) ensureAgentClusterInstall(ctx context.Context, log logrus.FieldLogger, clusterDeployment *hivev1.ClusterDeployment, agentCluster *capiproviderv1.AgentCluster) error {
254348
log.Info("Setting AgentClusterInstall")
255349
agentClusterInstall := &hiveext.AgentClusterInstall{}
@@ -348,3 +442,48 @@ func (r *AgentClusterReconciler) ensureSecretLabel(ctx context.Context, name, na
348442
r.Log.WithError(err).Warn("Failed labeling secret")
349443
}
350444
}
445+
446+
func (r *AgentClusterReconciler) handleDeletion(ctx context.Context, agentCluster *capiproviderv1.AgentCluster) error {
447+
if paused := agentCluster.Annotations[clusterv1.PausedAnnotation]; paused == "true" {
448+
// unset finalizer, remove owner from ClusterDeployment to orphan it and return
449+
if err := r.orphanClusterDeployment(ctx, agentCluster); err != nil {
450+
return err
451+
}
452+
}
453+
controllerutil.RemoveFinalizer(agentCluster, agentClusterFinalizer)
454+
return nil
455+
}
456+
457+
// orphanClusterDeployment removes this AgentCluster as the owner of this ClusterDeployment. This ensures that there's
458+
// no cascade deletion of the ClusterDeployment (and its AgentClusterInstall) if the AgentCluster is deleted.
459+
func (r *AgentClusterReconciler) orphanClusterDeployment(ctx context.Context, agentCluster *capiproviderv1.AgentCluster) error {
460+
if agentCluster.Status.ClusterDeploymentRef.Name == "" {
461+
return nil
462+
}
463+
clusterDeployment := &hivev1.ClusterDeployment{}
464+
if err := r.Get(ctx, types.NamespacedName{Namespace: agentCluster.Status.ClusterDeploymentRef.Namespace, Name: agentCluster.Status.ClusterDeploymentRef.Name}, clusterDeployment); err != nil {
465+
if apierrors.IsNotFound(err) {
466+
return nil
467+
}
468+
return err
469+
}
470+
471+
if !clusterutilv1.IsOwnedByObject(clusterDeployment, agentCluster) {
472+
return nil
473+
}
474+
475+
var newOwners []metav1.OwnerReference
476+
for _, owner := range clusterDeployment.GetOwnerReferences() {
477+
if owner.Kind == agentCluster.Kind && owner.Name == agentCluster.Name && owner.APIVersion == agentCluster.APIVersion && owner.UID == agentCluster.UID {
478+
continue
479+
}
480+
newOwners = append(newOwners, owner)
481+
}
482+
483+
patch := client.MergeFrom(clusterDeployment.DeepCopy())
484+
clusterDeployment.SetOwnerReferences(newOwners)
485+
if err := r.Patch(ctx, clusterDeployment, patch); err != nil {
486+
return err
487+
}
488+
return nil
489+
}

controllers/agentcluster_controller_test.go

Lines changed: 98 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,11 @@ import (
1818
"k8s.io/apimachinery/pkg/types"
1919
"k8s.io/client-go/kubernetes/scheme"
2020
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
21+
clusterutilv1 "sigs.k8s.io/cluster-api/util"
2122
ctrl "sigs.k8s.io/controller-runtime"
2223
"sigs.k8s.io/controller-runtime/pkg/client"
2324
fakeclient "sigs.k8s.io/controller-runtime/pkg/client/fake"
25+
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
2426
)
2527

2628
func init() {
@@ -229,8 +231,6 @@ var _ = Describe("agentcluster reconcile", func() {
229231
Expect(result).To(Equal(ctrl.Result{RequeueAfter: agentClusterDependenciesWaitTime}))
230232
})
231233
It("no control plane reference in cluster", func() {
232-
clusterName := "test-cluster-name"
233-
234234
cluster := newCluster(&types.NamespacedName{Name: clusterName, Namespace: testNamespace})
235235
cluster.Spec.ControlPlaneRef = nil
236236

@@ -292,6 +292,102 @@ var _ = Describe("agentcluster reconcile", func() {
292292
Expect(err).To(BeNil())
293293
Expect(result).To(Equal(ctrl.Result{RequeueAfter: agentClusterDependenciesWaitTime}))
294294
})
295+
Context("pausing agent cluster", func() {
296+
It("doesn't create a clusterDeployment when paused", func() {
297+
agentCluster := newAgentCluster("agentCluster-1", testNamespace, capiproviderv1.AgentClusterSpec{
298+
IgnitionEndpoint: &capiproviderv1.IgnitionEndpoint{Url: "https://1.2.3.4:555/ignition"},
299+
})
300+
agentCluster.ObjectMeta.Annotations = map[string]string{clusterv1.PausedAnnotation: "true"}
301+
Expect(c.Create(ctx, agentCluster)).To(BeNil())
302+
303+
result, err := acr.Reconcile(ctx, newAgentClusterRequest(agentCluster))
304+
Expect(err).To(BeNil())
305+
Expect(result).To(Equal(ctrl.Result{}))
306+
clusterDeployment := &hivev1.ClusterDeployment{}
307+
Expect(c.Get(ctx, types.NamespacedName{Name: agentCluster.Name, Namespace: testNamespace}, clusterDeployment)).NotTo(Succeed())
308+
})
309+
It("doesn't error finding non-existing clusterDeployment when paused", func() {
310+
agentCluster := newAgentCluster("agentCluster-1", testNamespace, capiproviderv1.AgentClusterSpec{
311+
IgnitionEndpoint: &capiproviderv1.IgnitionEndpoint{Url: "https://1.2.3.4:555/ignition"},
312+
})
313+
agentCluster.Status.ClusterDeploymentRef.Name = "missing-cluster-deployment-name"
314+
agentCluster.ObjectMeta.Annotations = map[string]string{clusterv1.PausedAnnotation: "true"}
315+
Expect(c.Create(ctx, agentCluster)).To(BeNil())
316+
317+
result, err := acr.Reconcile(ctx, newAgentClusterRequest(agentCluster))
318+
Expect(err).To(BeNil())
319+
Expect(result).To(Equal(ctrl.Result{}))
320+
})
321+
It("orphans its cluster deployment when paused", func() {
322+
agentCluster := createDefaultResources(ctx, c, clusterName, testNamespace, baseDomain, pullSecret, kubeconfig, kubeadminPassword)
323+
createClusterDeployment(c, ctx, agentCluster, clusterName, baseDomain, pullSecret)
324+
agentCluster.Status.Ready = true
325+
agentCluster.ObjectMeta.Annotations = map[string]string{clusterv1.PausedAnnotation: "true"}
326+
Expect(c.Update(ctx, agentCluster)).To(BeNil())
327+
328+
result, err := acr.Reconcile(ctx, newAgentClusterRequest(agentCluster))
329+
Expect(err).To(BeNil())
330+
Expect(result).To(Equal(ctrl.Result{}))
331+
332+
clusterDeployment := &hivev1.ClusterDeployment{}
333+
Expect(c.Get(ctx, types.NamespacedName{Name: agentCluster.Name, Namespace: testNamespace}, clusterDeployment)).To(Succeed())
334+
Expect(clusterutilv1.IsOwnedByObject(clusterDeployment, agentCluster)).To(BeFalse())
335+
})
336+
It("recovers its cluster deployment when unpaused", func() {
337+
agentCluster := createDefaultResources(ctx, c, clusterName, testNamespace, baseDomain, pullSecret, kubeconfig, kubeadminPassword)
338+
createClusterDeployment(c, ctx, agentCluster, clusterName, baseDomain, pullSecret)
339+
340+
clusterDeployment := &hivev1.ClusterDeployment{}
341+
Expect(c.Get(ctx, types.NamespacedName{Name: agentCluster.Name, Namespace: testNamespace}, clusterDeployment)).To(Succeed())
342+
Expect(controllerutil.SetOwnerReference(agentCluster, clusterDeployment, acr.Scheme)).To(Succeed())
343+
clusterDeployment.Labels = map[string]string{AgentClusterRefLabel: agentCluster.Name}
344+
Expect(c.Update(ctx, clusterDeployment)).To(Succeed())
345+
agentCluster.ObjectMeta.Annotations = map[string]string{clusterv1.PausedAnnotation: "true"}
346+
Expect(c.Update(ctx, agentCluster)).To(BeNil())
347+
348+
result, err := acr.Reconcile(ctx, newAgentClusterRequest(agentCluster))
349+
Expect(err).To(BeNil())
350+
Expect(result).To(Equal(ctrl.Result{}))
351+
352+
Expect(c.Get(ctx, types.NamespacedName{Name: agentCluster.Name, Namespace: testNamespace}, clusterDeployment)).To(Succeed())
353+
Expect(clusterutilv1.IsOwnedByObject(clusterDeployment, agentCluster)).To(BeFalse())
354+
Expect(c.Get(ctx, types.NamespacedName{Name: agentCluster.Name, Namespace: testNamespace}, agentCluster)).To(Succeed())
355+
agentCluster.ObjectMeta.Annotations = nil
356+
Expect(c.Update(ctx, agentCluster)).To(BeNil())
357+
358+
result, err = acr.Reconcile(ctx, newAgentClusterRequest(agentCluster))
359+
Expect(err).To(BeNil())
360+
Expect(result).To(Equal(ctrl.Result{}))
361+
Expect(c.Get(ctx, types.NamespacedName{Name: agentCluster.Name, Namespace: testNamespace}, clusterDeployment)).To(Succeed())
362+
Expect(clusterutilv1.IsOwnedByObject(clusterDeployment, agentCluster)).To(BeTrue())
363+
})
364+
It("doesn't delete the cluster deployment when paused and agent cluster gets deleted", func() {
365+
agentCluster := createDefaultResources(ctx, c, clusterName, testNamespace, baseDomain, pullSecret, kubeconfig, kubeadminPassword)
366+
createClusterDeployment(c, ctx, agentCluster, clusterName, baseDomain, pullSecret)
367+
368+
clusterDeployment := &hivev1.ClusterDeployment{}
369+
Expect(c.Get(ctx, types.NamespacedName{Name: agentCluster.Name, Namespace: testNamespace}, clusterDeployment)).To(Succeed())
370+
Expect(controllerutil.SetOwnerReference(agentCluster, clusterDeployment, acr.Scheme)).To(Succeed())
371+
clusterDeployment.Labels = map[string]string{AgentClusterRefLabel: agentCluster.Name}
372+
Expect(c.Update(ctx, clusterDeployment)).To(Succeed())
373+
agentCluster.ObjectMeta.Annotations = map[string]string{clusterv1.PausedAnnotation: "true"}
374+
Expect(controllerutil.AddFinalizer(agentCluster, agentClusterFinalizer)).To(BeTrue())
375+
Expect(c.Update(ctx, agentCluster)).To(Succeed())
376+
377+
result, err := acr.Reconcile(ctx, newAgentClusterRequest(agentCluster))
378+
Expect(err).To(BeNil())
379+
Expect(result).To(Equal(ctrl.Result{}))
380+
381+
Expect(c.Delete(ctx, agentCluster)).To(Succeed())
382+
result, err = acr.Reconcile(ctx, newAgentClusterRequest(agentCluster))
383+
Expect(err).To(BeNil())
384+
Expect(result).To(Equal(ctrl.Result{}))
385+
386+
Expect(c.Get(ctx, types.NamespacedName{Name: agentCluster.Name, Namespace: testNamespace}, clusterDeployment)).To(Succeed())
387+
Expect(clusterutilv1.IsOwnedByObject(clusterDeployment, agentCluster)).To(BeFalse())
388+
Expect(c.Get(ctx, types.NamespacedName{Name: agentCluster.Name, Namespace: testNamespace}, agentCluster)).NotTo(Succeed())
389+
})
390+
})
295391
})
296392

297393
func createClusterDeployment(c client.Client, ctx context.Context, agentCluster *capiproviderv1.AgentCluster, clusterName, baseDomain, pullSecretName string) {

0 commit comments

Comments
 (0)