diff --git a/exp/controllers/awsmachinepool_controller.go b/exp/controllers/awsmachinepool_controller.go index 4963a6456b..7b3576392f 100644 --- a/exp/controllers/awsmachinepool_controller.go +++ b/exp/controllers/awsmachinepool_controller.go @@ -22,12 +22,14 @@ import ( "fmt" "time" + "github.com/blang/semver/v4" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/client-go/tools/record" "k8s.io/klog/v2" @@ -305,6 +307,16 @@ func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machineP // But we want to update the LaunchTemplate because an error in the LaunchTemplate may be blocking the ASG creation. return true, nil } + + canProceed, err := r.isMachinePoolAllowedToUpgradeDueToControlPlaneVersionSkew(clusterScope, machinePoolScope) + if err != nil { + return true, err + } + if !canProceed { + machinePoolScope.Info("blocking the Launch Template update due to control plane k8s version skew") + return false, nil + } + return asgsvc.CanStartASGInstanceRefresh(machinePoolScope) } runPostLaunchTemplateUpdateOperation := func() error { @@ -442,6 +454,35 @@ func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machineP return ctrl.Result{}, nil } +// isMachinePoolAllowedToUpgradeDueToControlPlaneVersionSkew checks if the control plane is being upgraded, in which case we shouldn't update the launch template. +func (r *AWSMachinePoolReconciler) isMachinePoolAllowedToUpgradeDueToControlPlaneVersionSkew(clusterScope cloud.ClusterScoper, machinePoolScope *scope.MachinePoolScope) (bool, error) { + if machinePoolScope.Cluster.Spec.ControlPlaneRef == nil { + return false, errors.New("failed to find ControlPlane: cluster.spec.controlPlaneRef is empty") + } + + controlPlane, err := clusterScope.UnstructuredControlPlane() + if err != nil { + return false, errors.Wrap(err, "failed to get ControlPlane") + } + + cpVersion, found, err := unstructured.NestedString(controlPlane.Object, "status", "version") + if !found || err != nil { + return false, errors.Wrapf(err, "failed to get version of ControlPlane %s", machinePoolScope.Cluster.Spec.ControlPlaneRef.Name) + } + + controlPlaneCurrentK8sVersion, err := semver.ParseTolerant(cpVersion) + if err != nil { + return false, errors.Wrapf(err, "failed to parse version of ControlPlane %s", machinePoolScope.Cluster.Spec.ControlPlaneRef.Name) + } + + machinePoolDesiredK8sVersion, err := semver.ParseTolerant(*machinePoolScope.MachinePool.Spec.Template.Spec.Version) + if err != nil { + return false, errors.Wrap(err, "failed to parse version of MachinePool") + } + + return controlPlaneCurrentK8sVersion.GE(machinePoolDesiredK8sVersion), nil +} + func (r *AWSMachinePoolReconciler) reconcileDelete(ctx context.Context, machinePoolScope *scope.MachinePoolScope, clusterScope cloud.ClusterScoper, ec2Scope scope.EC2Scope) error { clusterScope.Info("Handling deleted AWSMachinePool") diff --git a/exp/controllers/awsmachinepool_controller_test.go b/exp/controllers/awsmachinepool_controller_test.go index 71f25c86de..e6f9a88f0a 100644 --- a/exp/controllers/awsmachinepool_controller_test.go +++ b/exp/controllers/awsmachinepool_controller_test.go @@ -33,7 +33,9 @@ import ( "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" apimachinerytypes "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/tools/record" utilfeature "k8s.io/component-base/featuregate/testing" @@ -145,6 +147,14 @@ func TestAWSMachinePoolReconciler(t *testing.T) { scope.MachinePoolScopeParams{ Client: testEnv.Client, Cluster: &clusterv1.Cluster{ + Spec: clusterv1.ClusterSpec{ + ControlPlaneRef: &corev1.ObjectReference{ + Name: "test", + Kind: "KubeadmControlPlane", + APIVersion: "controlplane.cluster.x-k8s.io/v1beta1", + Namespace: "default", + }, + }, Status: clusterv1.ClusterStatus{ InfrastructureReady: true, }, @@ -167,6 +177,8 @@ func TestAWSMachinePoolReconciler(t *testing.T) { Bootstrap: clusterv1.Bootstrap{ DataSecretName: ptr.To[string]("bootstrap-data"), }, + // This version should be older or equal to the version used for the control plane, or workers won't be upgraded + Version: ptr.To[string]("1.30.0"), }, }, }, @@ -879,6 +891,65 @@ func TestAWSMachinePoolReconciler(t *testing.T) { g.Expect(err).To(Succeed()) }) + t.Run("launch template and ASG exist, but control plane k8s version is older than machinepool k8s version, should not update ASG", func(t *testing.T) { + g := NewWithT(t) + setup(t, g) + reconciler.reconcileServiceFactory = nil // use real implementation, but keep EC2 calls mocked (`ec2ServiceFactory`) + reconSvc = nil // not used + defer teardown(t, g) + + // Latest ID and version already stored, no need to retrieve it + ms.AWSMachinePool.Status.LaunchTemplateID = launchTemplateIDExisting + ms.AWSMachinePool.Status.LaunchTemplateVersion = ptr.To[string]("1") + // Set the MachinePool k8s version to a version that is newer than the control plane k8s version + ms.MachinePool.Spec.Template.Spec.Version = ptr.To[string]("99.99.99") + + ec2Svc.EXPECT().GetLaunchTemplate(gomock.Eq("test")).Return( + &expinfrav1.AWSLaunchTemplate{ + Name: "test", + AMI: infrav1.AMIReference{ + ID: ptr.To[string]("ami-existing"), + }, + }, + // No change to user data + userdata.ComputeHash([]byte("shell-script")), + // But the name of the secret changes from `previous-secret-name` to `bootstrap-data` + &apimachinerytypes.NamespacedName{Namespace: "default", Name: "previous-secret-name"}, + nil, + nil) + ec2Svc.EXPECT().DiscoverLaunchTemplateAMI(gomock.Any(), gomock.Any()).Return(ptr.To[string]("ami-existing"), nil) + // Mock changes to the launch template, ASG would be updated, but k8s version skew will prevent it + ec2Svc.EXPECT().LaunchTemplateNeedsUpdate(gomock.Any(), gomock.Any(), gomock.Any()).Return(true, nil) + + // Control plane kubernetes version skew is checked before, so it won't be called + asgSvc.EXPECT().CanStartASGInstanceRefresh(gomock.Any()).Times(0) + // Won't be called due to version skew + ec2Svc.EXPECT().CreateLaunchTemplateVersion(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Times(0) + // Won't be called due to version skew + asgSvc.EXPECT().StartASGInstanceRefresh(gomock.Any()).Times(0) + + asgSvc.EXPECT().GetASGByName(gomock.Any()).DoAndReturn(func(scope *scope.MachinePoolScope) (*expinfrav1.AutoScalingGroup, error) { + g.Expect(scope.Name()).To(Equal("test")) + + // Add differences to `AWSMachinePool.spec`, ASG would be updated, but k8s version skew will prevent it + return &expinfrav1.AutoScalingGroup{ + Name: scope.Name(), + Subnets: []string{ + "subnet-1", + }, + MinSize: awsMachinePool.Spec.MinSize - 1, + MaxSize: awsMachinePool.Spec.MaxSize + 1, + MixedInstancesPolicy: awsMachinePool.Spec.MixedInstancesPolicy.DeepCopy(), + }, nil + }) + // No upgrade + asgSvc.EXPECT().UpdateASG(gomock.Any()).Times(0) + + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) + // We expect an error because the workers use a k8s version newer than the control plane version + g.Expect(err).To(HaveOccurred()) + }) + t.Run("launch template and ASG created from zero, then bootstrap config reference changes", func(t *testing.T) { g := NewWithT(t) setup(t, g) @@ -1396,13 +1467,42 @@ func setupCluster(clusterName string) (*scope.ClusterScope, error) { ObjectMeta: metav1.ObjectMeta{Name: "test"}, Spec: infrav1.AWSClusterSpec{}, } - client := fake.NewClientBuilder().WithScheme(scheme).WithObjects(awsCluster).Build() + controlPlane := &unstructured.Unstructured{} + controlPlane.Object = map[string]interface{}{ + "metadata": map[string]interface{}{ + "name": "test", + "namespace": "default", + }, + "spec": map[string]interface{}{}, + "status": map[string]interface{}{ + "version": "1.30.0", + }, + } + controlPlane.SetGroupVersionKind(schema.GroupVersionKind{ + Group: "controlplane.cluster.x-k8s.io", + Kind: "KubeadmControlPlane", + Version: "v1beta1", + }) + fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(awsCluster).Build() + err := fakeClient.Create(context.Background(), controlPlane) + if err != nil { + return nil, err + } + return scope.NewClusterScope(scope.ClusterScopeParams{ Cluster: &clusterv1.Cluster{ ObjectMeta: metav1.ObjectMeta{Name: clusterName}, + Spec: clusterv1.ClusterSpec{ + ControlPlaneRef: &corev1.ObjectReference{ + Kind: "KubeadmControlPlane", + Namespace: "default", + Name: "test", + APIVersion: "controlplane.cluster.x-k8s.io/v1beta1", + }, + }, }, AWSCluster: awsCluster, - Client: client, + Client: fakeClient, }) } diff --git a/go.mod b/go.mod index 0971e1d1ce..e9688f7294 100644 --- a/go.mod +++ b/go.mod @@ -25,6 +25,7 @@ require ( github.com/aws/smithy-go v1.22.4 github.com/awslabs/goformation/v4 v4.19.5 github.com/blang/semver v3.5.1+incompatible + github.com/blang/semver/v4 v4.0.0 github.com/coreos/ignition v0.35.0 github.com/coreos/ignition/v2 v2.16.2 github.com/go-logr/logr v1.4.2 @@ -103,7 +104,6 @@ require ( github.com/aws/aws-sdk-go-v2/service/ssooidc v1.23.4 // indirect github.com/aymerick/douceur v0.2.0 // indirect github.com/beorn7/perks v1.0.1 // indirect - github.com/blang/semver/v4 v4.0.0 // indirect github.com/briandowns/spinner v1.11.1 // indirect github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect