Skip to content

Commit 390549a

Browse files
authored
Merge pull request #8690 from nojnhuh/wait-pause
✨ block move with annotation
2 parents 62d734c + 29bb47d commit 390549a

File tree

7 files changed

+150
-1
lines changed

7 files changed

+150
-1
lines changed

cmd/clusterctl/api/v1alpha3/annotations.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,4 +31,14 @@ const (
3131
//
3232
// It will help any validation webhook to take decision based on it.
3333
DeleteForMoveAnnotation = "clusterctl.cluster.x-k8s.io/delete-for-move"
34+
35+
// BlockMoveAnnotation prevents the cluster move operation from starting if it is defined on at least one
36+
// of the objects in scope.
37+
// Provider controllers are expected to set the annotation on resources that cannot be instantaneously
38+
// paused and remove the annotation when the resource has been actually paused.
39+
//
40+
// e.g. If this annotation is defined with any value on an InfraMachine resource to be moved when
41+
// `clusterctl move` is invoked, then NO resources for ANY workload cluster will be created on the
42+
// destination management cluster until the annotation is removed.
43+
BlockMoveAnnotation = "clusterctl.cluster.x-k8s.io/block-move"
3444
)

cmd/clusterctl/client/cluster/mover.go

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"fmt"
2222
"os"
2323
"path/filepath"
24+
"time"
2425

2526
"github.com/pkg/errors"
2627
corev1 "k8s.io/api/core/v1"
@@ -32,6 +33,7 @@ import (
3233
kerrors "k8s.io/apimachinery/pkg/util/errors"
3334
"k8s.io/apimachinery/pkg/util/sets"
3435
"k8s.io/apimachinery/pkg/util/version"
36+
"k8s.io/apimachinery/pkg/util/wait"
3537
"k8s.io/klog/v2"
3638
"sigs.k8s.io/controller-runtime/pkg/client"
3739

@@ -333,6 +335,19 @@ func (o *objectMover) move(graph *objectGraph, toProxy Proxy, mutators ...Resour
333335
return errors.Wrap(err, "error pausing ClusterClasses")
334336
}
335337

338+
log.Info("Waiting for all resources to be ready to move")
339+
// exponential backoff configuration which returns durations for a total time of ~2m.
340+
// Example: 0, 5s, 8s, 11s, 17s, 26s, 38s, 57s, 86s, 128s
341+
waitForMoveUnblockedBackoff := wait.Backoff{
342+
Duration: 5 * time.Second,
343+
Factor: 1.5,
344+
Steps: 10,
345+
Jitter: 0.1,
346+
}
347+
if err := waitReadyForMove(o.fromProxy, graph.getMoveNodes(), o.dryRun, waitForMoveUnblockedBackoff); err != nil {
348+
return errors.Wrap(err, "error waiting for resources to be ready to move")
349+
}
350+
336351
// Nb. DO NOT call ensureNamespaces at this point because:
337352
// - namespace will be ensured to exist before creating the resource.
338353
// - If it's done here, we might create a namespace that can end up unused on target cluster (due to mutators).
@@ -595,6 +610,55 @@ func setClusterClassPause(proxy Proxy, clusterclasses []*node, pause bool, dryRu
595610
return nil
596611
}
597612

613+
func waitReadyForMove(proxy Proxy, nodes []*node, dryRun bool, backoff wait.Backoff) error {
614+
if dryRun {
615+
return nil
616+
}
617+
618+
log := logf.Log
619+
620+
c, err := proxy.NewClient()
621+
if err != nil {
622+
return errors.Wrap(err, "error creating client")
623+
}
624+
625+
for _, n := range nodes {
626+
obj := &metav1.PartialObjectMetadata{
627+
ObjectMeta: metav1.ObjectMeta{
628+
Name: n.identity.Name,
629+
Namespace: n.identity.Namespace,
630+
},
631+
TypeMeta: metav1.TypeMeta{
632+
APIVersion: n.identity.APIVersion,
633+
Kind: n.identity.Kind,
634+
},
635+
}
636+
key := client.ObjectKeyFromObject(obj)
637+
log := log.WithValues("apiVersion", obj.GroupVersionKind(), "resource", klog.KObj(obj))
638+
639+
blockLogged := false
640+
if err := retryWithExponentialBackoff(backoff, func() error {
641+
if err := c.Get(ctx, key, obj); err != nil {
642+
return errors.Wrapf(err, "error getting %s/%s", obj.GroupVersionKind(), key)
643+
}
644+
645+
if _, exists := obj.GetAnnotations()[clusterctlv1.BlockMoveAnnotation]; exists {
646+
if !blockLogged {
647+
log.Info(fmt.Sprintf("Move blocked by %s annotation, waiting for it to be removed", clusterctlv1.BlockMoveAnnotation))
648+
blockLogged = true
649+
}
650+
return errors.Errorf("resource is not ready to move: %s/%s", obj.GroupVersionKind(), key)
651+
}
652+
log.V(5).Info("Resource is ready to move")
653+
return nil
654+
}); err != nil {
655+
return err
656+
}
657+
}
658+
659+
return nil
660+
}
661+
598662
// patchCluster applies a patch to a node referring to a Cluster object.
599663
func patchCluster(proxy Proxy, n *node, patch client.Patch, mutators ...ResourceMutatorFunc) error {
600664
cFrom, err := proxy.NewClient()

cmd/clusterctl/client/cluster/mover_test.go

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ limitations under the License.
1717
package cluster
1818

1919
import (
20+
"context"
2021
"fmt"
2122
"os"
2223
"path/filepath"
@@ -29,7 +30,9 @@ import (
2930
apierrors "k8s.io/apimachinery/pkg/api/errors"
3031
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3132
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
33+
"k8s.io/apimachinery/pkg/types"
3234
"k8s.io/apimachinery/pkg/util/sets"
35+
"k8s.io/apimachinery/pkg/util/wait"
3336
"k8s.io/utils/pointer"
3437
"sigs.k8s.io/controller-runtime/pkg/client"
3538

@@ -2288,3 +2291,69 @@ func Test_deleteSourceObject(t *testing.T) {
22882291
})
22892292
}
22902293
}
2294+
2295+
func TestWaitReadyForMove(t *testing.T) {
2296+
tests := []struct {
2297+
name string
2298+
moveBlocked bool
2299+
wantErr bool
2300+
}{
2301+
{
2302+
name: "moving blocked cluster should fail",
2303+
moveBlocked: true,
2304+
wantErr: true,
2305+
},
2306+
{
2307+
name: "moving unblocked cluster should succeed",
2308+
moveBlocked: false,
2309+
wantErr: false,
2310+
},
2311+
}
2312+
2313+
for _, tt := range tests {
2314+
t.Run(tt.name, func(t *testing.T) {
2315+
g := NewWithT(t)
2316+
2317+
clusterName := "foo"
2318+
clusterNamespace := "ns1"
2319+
objs := test.NewFakeCluster(clusterNamespace, clusterName).Objs()
2320+
2321+
// Create an objectGraph bound a source cluster with all the CRDs for the types involved in the test.
2322+
graph := getObjectGraphWithObjs(objs)
2323+
2324+
if tt.moveBlocked {
2325+
c, err := graph.proxy.NewClient()
2326+
g.Expect(err).NotTo(HaveOccurred())
2327+
2328+
ctx := context.Background()
2329+
cluster := &clusterv1.Cluster{}
2330+
err = c.Get(ctx, types.NamespacedName{Namespace: clusterNamespace, Name: clusterName}, cluster)
2331+
g.Expect(err).NotTo(HaveOccurred())
2332+
anns := cluster.GetAnnotations()
2333+
if anns == nil {
2334+
anns = make(map[string]string)
2335+
}
2336+
anns[clusterctlv1.BlockMoveAnnotation] = "anything"
2337+
cluster.SetAnnotations(anns)
2338+
2339+
g.Expect(c.Update(ctx, cluster)).To(Succeed())
2340+
}
2341+
2342+
// Get all the types to be considered for discovery
2343+
g.Expect(getFakeDiscoveryTypes(graph)).To(Succeed())
2344+
2345+
// trigger discovery the content of the source cluster
2346+
g.Expect(graph.Discovery("")).To(Succeed())
2347+
2348+
backoff := wait.Backoff{
2349+
Steps: 1,
2350+
}
2351+
err := waitReadyForMove(graph.proxy, graph.getMoveNodes(), false, backoff)
2352+
if tt.wantErr {
2353+
g.Expect(err).To(HaveOccurred())
2354+
} else {
2355+
g.Expect(err).NotTo(HaveOccurred())
2356+
}
2357+
})
2358+
}
2359+
}

docs/book/src/clusterctl/commands/move.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ The discovery mechanism for determining the objects to be moved is in the [provi
3232

3333
Before moving a `Cluster`, clusterctl sets the `Cluster.Spec.Paused` field to `true` stopping
3434
the controllers from reconciling the workload cluster _in the source management cluster_.
35+
clusterctl will wait until the `clusterctl.cluster.x-k8s.io/block-move` annotation is not
36+
present on any resource targeted by the move operation.
3537

3638
The `Cluster` object created in the target management cluster instead will be actively reconciled as soon as the move
3739
process completes.

docs/book/src/clusterctl/provider-contract.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -469,7 +469,9 @@ If moving some of excluded object is required, the provider authors should creat
469469
exact move sequence to be executed by the user.
470470

471471
Additionally, provider authors should be aware that `clusterctl move` assumes all the provider's Controllers respect the
472-
`Cluster.Spec.Paused` field introduced in the v1alpha3 Cluster API specification.
472+
`Cluster.Spec.Paused` field introduced in the v1alpha3 Cluster API specification. If a provider needs to perform extra work in response to a
473+
cluster being paused, `clusterctl move` can be blocked from creating any resources on the destination
474+
management cluster by annotating any resource to be moved with `clusterctl.cluster.x-k8s.io/block-move`.
473475

474476
<aside class="note warning">
475477

docs/book/src/developer/providers/migrations/v1.5-to-v1.6.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ maintainers of providers and consumers of our Go API.
3232

3333

3434
### Other
35+
- `clusterctl move` can be blocked temporarily by a provider when an object to be moved is annotated with `clusterctl.cluster.x-k8s.io/block-move`.
3536

3637

3738
### Suggested changes for providers

docs/book/src/reference/labels_and_annotations.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
| :--------------------------------------------------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
2525
| clusterctl.cluster.x-k8s.io/skip-crd-name-preflight-check | Can be placed on provider CRDs, so that clusterctl doesn't emit an error if the CRD doesn't comply with Cluster APIs naming scheme. Only CRDs that are referenced by core Cluster API CRDs have to comply with the naming scheme. |
2626
| clusterctl.cluster.x-k8s.io/delete-for-move | DeleteForMoveAnnotation will be set to objects that are going to be deleted from the source cluster after being moved to the target cluster during the clusterctl move operation. It will help any validation webhook to take decision based on it. |
27+
| clusterctl.cluster.x-k8s.io/block-move | BlockMoveAnnotation prevents the cluster move operation from starting if it is defined on at least one of the objects in scope. Provider controllers are expected to set the annotation on resources that cannot be instantaneously paused and remove the annotation when the resource has been actually paused. |
2728
| unsafe.topology.cluster.x-k8s.io/disable-update-class-name-check | It can be used to disable the webhook check on update that disallows a pre-existing Cluster to be populated with Topology information and Class. |
2829
| cluster.x-k8s.io/cluster-name | It is set on nodes identifying the name of the cluster the node belongs to. |
2930
| cluster.x-k8s.io/cluster-namespace | It is set on nodes identifying the namespace of the cluster the node belongs to. |

0 commit comments

Comments
 (0)