Skip to content

Commit a7f54f8

Browse files
feat(clusterctl): block move when Cluster or ClusterClass is paused
Add checks that ensure clusters and cluster classes are not paused before starting move operations. Signed-off-by: alexandre.vilain <[email protected]>
1 parent 7a28275 commit a7f54f8

File tree

5 files changed

+167
-10
lines changed

5 files changed

+167
-10
lines changed

cmd/clusterctl/client/cluster/mover.go

Lines changed: 73 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ import (
4141
clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2"
4242
clusterctlv1 "sigs.k8s.io/cluster-api/cmd/clusterctl/api/v1alpha3"
4343
logf "sigs.k8s.io/cluster-api/cmd/clusterctl/log"
44+
"sigs.k8s.io/cluster-api/util/annotations"
4445
"sigs.k8s.io/cluster-api/util/conditions"
4546
"sigs.k8s.io/cluster-api/util/patch"
4647
"sigs.k8s.io/cluster-api/util/yaml"
@@ -232,8 +233,7 @@ func (o *objectMover) checkProvisioningCompleted(ctx context.Context, graph *obj
232233
// Checking all the clusters have infrastructure is ready
233234
readClusterBackoff := newReadBackoff()
234235
clusters := graph.getClusters()
235-
for i := range clusters {
236-
cluster := clusters[i]
236+
for _, cluster := range clusters {
237237
clusterObj := &clusterv1.Cluster{}
238238
if err := retryWithExponentialBackoff(ctx, readClusterBackoff, func(ctx context.Context) error {
239239
return getClusterObj(ctx, o.fromProxy, cluster, clusterObj)
@@ -297,6 +297,25 @@ func getClusterObj(ctx context.Context, proxy Proxy, cluster *node, clusterObj *
297297
return nil
298298
}
299299

300+
// getClusterClassObj retrieves the clusterClassObj corresponding to a node with type ClusterClass.
301+
func getClusterClassObj(ctx context.Context, proxy Proxy, clusterClass *node, clusterClassObj *clusterv1.ClusterClass) error {
302+
c, err := proxy.NewClient(ctx)
303+
if err != nil {
304+
return err
305+
}
306+
307+
clusterClassObjKey := client.ObjectKey{
308+
Namespace: clusterClass.identity.Namespace,
309+
Name: clusterClass.identity.Name,
310+
}
311+
312+
if err := c.Get(ctx, clusterClassObjKey, clusterClassObj); err != nil {
313+
return errors.Wrapf(err, "error reading ClusterClass %s/%s",
314+
clusterClass.identity.Namespace, clusterClass.identity.Name)
315+
}
316+
return nil
317+
}
318+
300319
// getMachineObj retrieves the machineObj corresponding to a node with type Machine.
301320
func getMachineObj(ctx context.Context, proxy Proxy, machine *node, machineObj *clusterv1.Machine) error {
302321
c, err := proxy.NewClient(ctx)
@@ -320,9 +339,17 @@ func (o *objectMover) move(ctx context.Context, graph *objectGraph, toProxy Prox
320339
log := logf.Log
321340

322341
clusters := graph.getClusters()
342+
if err := checkClustersNotPaused(ctx, o.fromProxy, clusters); err != nil {
343+
return err
344+
}
345+
323346
log.Info("Moving Cluster API objects", "Clusters", len(clusters))
324347

325348
clusterClasses := graph.getClusterClasses()
349+
if err := checkClusterClassesNotPaused(ctx, o.fromProxy, clusterClasses); err != nil {
350+
return err
351+
}
352+
326353
log.Info("Moving Cluster API objects", "ClusterClasses", len(clusterClasses))
327354

328355
// Sets the pause field on the Cluster object in the source management cluster, so the controllers stop reconciling it.
@@ -395,9 +422,17 @@ func (o *objectMover) toDirectory(ctx context.Context, graph *objectGraph, direc
395422
log := logf.Log
396423

397424
clusters := graph.getClusters()
425+
if err := checkClustersNotPaused(ctx, o.fromProxy, clusters); err != nil {
426+
return err
427+
}
428+
398429
log.Info("Starting move of Cluster API objects", "Clusters", len(clusters))
399430

400431
clusterClasses := graph.getClusterClasses()
432+
if err := checkClusterClassesNotPaused(ctx, o.fromProxy, clusterClasses); err != nil {
433+
return err
434+
}
435+
401436
log.Info("Moving Cluster API objects", "ClusterClasses", len(clusterClasses))
402437

403438
// Sets the pause field on the Cluster object in the source management cluster, so the controllers stop reconciling it.
@@ -570,8 +605,7 @@ func setClusterPause(ctx context.Context, proxy Proxy, clusters []*node, value b
570605
patch := client.RawPatch(types.MergePatchType, []byte(fmt.Sprintf("{\"spec\":{\"paused\":%s}}", patchValue)))
571606

572607
setClusterPauseBackoff := newWriteBackoff()
573-
for i := range clusters {
574-
cluster := clusters[i]
608+
for _, cluster := range clusters {
575609
log.V(5).Info("Set Cluster.Spec.Paused", "paused", value, "Cluster", klog.KRef(cluster.identity.Namespace, cluster.identity.Name))
576610

577611
// Nb. The operation is wrapped in a retry loop to make setClusterPause more resilient to unexpected conditions.
@@ -593,8 +627,7 @@ func setClusterClassPause(ctx context.Context, proxy Proxy, clusterclasses []*no
593627
log := logf.Log
594628

595629
setClusterClassPauseBackoff := newWriteBackoff()
596-
for i := range clusterclasses {
597-
clusterclass := clusterclasses[i]
630+
for _, clusterclass := range clusterclasses {
598631
if pause {
599632
log.V(5).Info("Set Paused annotation", "ClusterClass", clusterclass.identity.Name, "Namespace", clusterclass.identity.Namespace)
600633
} else {
@@ -611,6 +644,38 @@ func setClusterClassPause(ctx context.Context, proxy Proxy, clusterclasses []*no
611644
return nil
612645
}
613646

647+
// checkClustersNotPaused checks that no cluster in the graph is paused before proceeding.
648+
func checkClustersNotPaused(ctx context.Context, proxy Proxy, clusters []*node) error {
649+
for _, cluster := range clusters {
650+
clusterObj := &clusterv1.Cluster{}
651+
if err := getClusterObj(ctx, proxy, cluster, clusterObj); err != nil {
652+
return err
653+
}
654+
655+
if ptr.Deref(clusterObj.Spec.Paused, false) || annotations.HasPaused(clusterObj) {
656+
return errors.Errorf("cannot start operation while Cluster %s/%s is paused", clusterObj.Namespace, clusterObj.Name)
657+
}
658+
}
659+
660+
return nil
661+
}
662+
663+
// checkClusterClassesNotPaused checks that no clusterClass in the graph is paused before proceeding.
664+
func checkClusterClassesNotPaused(ctx context.Context, proxy Proxy, clusterClasses []*node) error {
665+
for _, clusterClass := range clusterClasses {
666+
clusterClassObj := &clusterv1.ClusterClass{}
667+
if err := getClusterClassObj(ctx, proxy, clusterClass, clusterClassObj); err != nil {
668+
return err
669+
}
670+
671+
if annotations.HasPaused(clusterClassObj) {
672+
return errors.Errorf("cannot start operation while ClusterClass %s/%s is paused", clusterClassObj.Namespace, clusterClassObj.Name)
673+
}
674+
}
675+
676+
return nil
677+
}
678+
614679
func waitReadyForMove(ctx context.Context, proxy Proxy, nodes []*node, dryRun bool, backoff wait.Backoff) error {
615680
if dryRun {
616681
return nil
@@ -723,7 +788,8 @@ func pauseClusterClass(ctx context.Context, proxy Proxy, n *node, pause bool, mu
723788
ObjectMeta: metav1.ObjectMeta{
724789
Name: n.identity.Name,
725790
Namespace: n.identity.Namespace,
726-
}}, mutators...)
791+
},
792+
}, mutators...)
727793
if err != nil {
728794
return err
729795
}
@@ -1173,7 +1239,6 @@ func (o *objectMover) deleteGroup(ctx context.Context, group moveGroup) error {
11731239
err := retryWithExponentialBackoff(ctx, deleteSourceObjectBackoff, func(ctx context.Context) error {
11741240
return o.deleteSourceObject(ctx, nodeToDelete)
11751241
})
1176-
11771242
if err != nil {
11781243
errList = append(errList, err)
11791244
}

cmd/clusterctl/client/cluster/mover_test.go

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,40 @@ var moveTests = []struct {
9898
},
9999
wantErr: false,
100100
},
101+
{
102+
name: "Paused Cluster",
103+
fields: moveTestsFields{
104+
objs: test.NewFakeCluster("ns1", "foo").WithPaused().Objs(),
105+
},
106+
wantMoveGroups: [][]string{
107+
{ // group 1
108+
clusterv1.GroupVersion.String() + ", Kind=Cluster, ns1/foo",
109+
},
110+
{ // group 2 (objects with ownerReferences in group 1)
111+
// owned by Clusters
112+
"/v1, Kind=Secret, ns1/foo-ca",
113+
"/v1, Kind=Secret, ns1/foo-kubeconfig",
114+
clusterv1.GroupVersionInfrastructure.String() + ", Kind=GenericInfrastructureCluster, ns1/foo",
115+
},
116+
},
117+
wantErr: true,
118+
},
119+
{
120+
name: "Paused ClusterClass",
121+
fields: moveTestsFields{
122+
objs: test.NewFakeClusterClass("ns1", "class1").WithPaused().Objs(),
123+
},
124+
wantMoveGroups: [][]string{
125+
{ // group 1
126+
clusterv1.GroupVersion.String() + ", Kind=ClusterClass, ns1/class1",
127+
},
128+
{ // group 2
129+
clusterv1.GroupVersionInfrastructure.String() + ", Kind=GenericInfrastructureClusterTemplate, ns1/class1",
130+
clusterv1.GroupVersionControlPlane.String() + ", Kind=GenericControlPlaneTemplate, ns1/class1",
131+
},
132+
},
133+
wantErr: true,
134+
},
101135
{
102136
name: "Cluster with cloud config secret with the force move label",
103137
fields: moveTestsFields{
@@ -923,8 +957,29 @@ func Test_objectMover_restoreTargetObject(t *testing.T) {
923957
}
924958

925959
func Test_objectMover_toDirectory(t *testing.T) {
926-
// NB. we are testing the move and move sequence using the same set of moveTests, but checking the results at different stages of the move process
927-
for _, tt := range backupRestoreTests {
960+
tests := []struct {
961+
name string
962+
fields moveTestsFields
963+
files map[string]string
964+
wantErr bool
965+
}{
966+
{
967+
name: "Cluster is paused",
968+
fields: moveTestsFields{
969+
objs: test.NewFakeCluster("ns1", "foo").WithPaused().Objs(),
970+
},
971+
wantErr: true,
972+
},
973+
{
974+
name: "ClusterClass is paused",
975+
fields: moveTestsFields{
976+
objs: test.NewFakeClusterClass("ns1", "foo").WithPaused().Objs(),
977+
},
978+
wantErr: true,
979+
},
980+
}
981+
tests = append(tests, backupRestoreTests...)
982+
for _, tt := range tests {
928983
t.Run(tt.name, func(t *testing.T) {
929984
g := NewWithT(t)
930985

cmd/clusterctl/internal/test/fake_objects.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ import (
4848
type FakeCluster struct {
4949
namespace string
5050
name string
51+
paused bool
5152
controlPlane *FakeControlPlane
5253
machinePools []*FakeMachinePool
5354
machineDeployments []*FakeMachineDeployment
@@ -117,6 +118,11 @@ func (f *FakeCluster) WithTopologyClassNamespace(namespace string) *FakeCluster
117118
return f
118119
}
119120

121+
func (f *FakeCluster) WithPaused() *FakeCluster {
122+
f.paused = true
123+
return f
124+
}
125+
120126
func (f *FakeCluster) Objs() []client.Object {
121127
clusterInfrastructure := &fakeinfrastructure.GenericInfrastructureCluster{
122128
TypeMeta: metav1.TypeMeta{
@@ -161,6 +167,10 @@ func (f *FakeCluster) Objs() []client.Object {
161167
}
162168
}
163169

170+
if f.paused {
171+
cluster.Spec.Paused = ptr.To(true)
172+
}
173+
164174
// Ensure the cluster gets a UID to be used by dependant objects for creating OwnerReferences.
165175
setUID(cluster)
166176

@@ -1486,6 +1496,7 @@ func FakeCRDList() []*apiextensionsv1.CustomResourceDefinition {
14861496
type FakeClusterClass struct {
14871497
namespace string
14881498
name string
1499+
paused bool
14891500
infrastructureClusterTemplate *unstructured.Unstructured
14901501
controlPlaneTemplate *unstructured.Unstructured
14911502
controlPlaneInfrastructureMachineTemplate *unstructured.Unstructured
@@ -1519,6 +1530,11 @@ func (f *FakeClusterClass) WithWorkerMachineDeploymentClasses(classes []*FakeMac
15191530
return f
15201531
}
15211532

1533+
func (f *FakeClusterClass) WithPaused() *FakeClusterClass {
1534+
f.paused = true
1535+
return f
1536+
}
1537+
15221538
func (f *FakeClusterClass) Objs() []client.Object {
15231539
// objMap map where the key is the object to which the owner reference to the cluster class should be added
15241540
// and the value dictates if the onwner ref needs to be added.
@@ -1546,6 +1562,10 @@ func (f *FakeClusterClass) Objs() []client.Object {
15461562
objMap[f.controlPlaneInfrastructureMachineTemplate] = true
15471563
}
15481564

1565+
if f.paused {
1566+
clusterClassBuilder.WithAnnotations(map[string]string{clusterv1.PausedAnnotation: "true"})
1567+
}
1568+
15491569
if len(f.workerMachineDeploymentClasses) > 0 {
15501570
mdClasses := []clusterv1.MachineDeploymentClass{}
15511571
for _, fakeMDClass := range f.workerMachineDeploymentClasses {

util/test/builder/builders.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,7 @@ func (m *MachinePoolTopologyBuilder) Build() clusterv1.MachinePoolTopology {
342342
type ClusterClassBuilder struct {
343343
namespace string
344344
name string
345+
annotations map[string]string
345346
infrastructureClusterTemplate *unstructured.Unstructured
346347
controlPlaneMetadata *clusterv1.ObjectMeta
347348
controlPlaneReadinessGates []clusterv1.MachineReadinessGate
@@ -370,6 +371,12 @@ func ClusterClass(namespace, name string) *ClusterClassBuilder {
370371
}
371372
}
372373

374+
// WithAnnotations adds the passed annotations to the ClusterClassBuilder.
375+
func (c *ClusterClassBuilder) WithAnnotations(annotations map[string]string) *ClusterClassBuilder {
376+
c.annotations = annotations
377+
return c
378+
}
379+
373380
// WithInfrastructureClusterTemplate adds the passed InfrastructureClusterTemplate to the ClusterClassBuilder.
374381
func (c *ClusterClassBuilder) WithInfrastructureClusterTemplate(t *unstructured.Unstructured) *ClusterClassBuilder {
375382
c.infrastructureClusterTemplate = t
@@ -502,6 +509,9 @@ func (c *ClusterClassBuilder) Build() *clusterv1.ClusterClass {
502509
Variables: c.statusVariables,
503510
},
504511
}
512+
if c.annotations != nil {
513+
obj.Annotations = c.annotations
514+
}
505515
if c.conditions != nil {
506516
obj.Status.Conditions = c.conditions
507517
}

util/test/builder/zz_generated.deepcopy.go

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)