Skip to content

✨ Check control plane version skew before creating new launch template version #5560

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions exp/controllers/awsmachinepool_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,14 @@ import (
"fmt"
"time"

"github.com/blang/semver/v4"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/pkg/errors"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/client-go/tools/record"
"k8s.io/klog/v2"
Expand Down Expand Up @@ -305,6 +307,16 @@ func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machineP
// But we want to update the LaunchTemplate because an error in the LaunchTemplate may be blocking the ASG creation.
return true, nil
}

canProceed, err := r.isMachinePoolAllowedToUpgradeDueToControlPlaneVersionSkew(clusterScope, machinePoolScope)
if err != nil {
return true, err
}
if !canProceed {
machinePoolScope.Info("blocking the Launch Template update due to control plane k8s version skew")
return false, nil
}

return asgsvc.CanStartASGInstanceRefresh(machinePoolScope)
}
runPostLaunchTemplateUpdateOperation := func() error {
Expand Down Expand Up @@ -442,6 +454,35 @@ func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machineP
return ctrl.Result{}, nil
}

// isMachinePoolAllowedToUpgradeDueToControlPlaneVersionSkew checks if the control plane is being upgraded, in which case we shouldn't update the launch template.
func (r *AWSMachinePoolReconciler) isMachinePoolAllowedToUpgradeDueToControlPlaneVersionSkew(clusterScope cloud.ClusterScoper, machinePoolScope *scope.MachinePoolScope) (bool, error) {
if machinePoolScope.Cluster.Spec.ControlPlaneRef == nil {
return false, errors.New("failed to find ControlPlane: cluster.spec.controlPlaneRef is empty")
}

controlPlane, err := clusterScope.UnstructuredControlPlane()
if err != nil {
return false, errors.Wrap(err, "failed to get ControlPlane")
}

cpVersion, found, err := unstructured.NestedString(controlPlane.Object, "status", "version")
if !found || err != nil {
return false, errors.Wrapf(err, "failed to get version of ControlPlane %s", machinePoolScope.Cluster.Spec.ControlPlaneRef.Name)
}

controlPlaneCurrentK8sVersion, err := semver.ParseTolerant(cpVersion)
if err != nil {
return false, errors.Wrapf(err, "failed to parse version of ControlPlane %s", machinePoolScope.Cluster.Spec.ControlPlaneRef.Name)
}

machinePoolDesiredK8sVersion, err := semver.ParseTolerant(*machinePoolScope.MachinePool.Spec.Template.Spec.Version)
if err != nil {
return false, errors.Wrap(err, "failed to parse version of MachinePool")
}

return controlPlaneCurrentK8sVersion.GE(machinePoolDesiredK8sVersion), nil
}

func (r *AWSMachinePoolReconciler) reconcileDelete(ctx context.Context, machinePoolScope *scope.MachinePoolScope, clusterScope cloud.ClusterScoper, ec2Scope scope.EC2Scope) error {
clusterScope.Info("Handling deleted AWSMachinePool")

Expand Down
104 changes: 102 additions & 2 deletions exp/controllers/awsmachinepool_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ import (
"github.com/pkg/errors"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
apimachinerytypes "k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/tools/record"
utilfeature "k8s.io/component-base/featuregate/testing"
Expand Down Expand Up @@ -145,6 +147,14 @@ func TestAWSMachinePoolReconciler(t *testing.T) {
scope.MachinePoolScopeParams{
Client: testEnv.Client,
Cluster: &clusterv1.Cluster{
Spec: clusterv1.ClusterSpec{
ControlPlaneRef: &corev1.ObjectReference{
Name: "test",
Kind: "KubeadmControlPlane",
APIVersion: "controlplane.cluster.x-k8s.io/v1beta1",
Namespace: "default",
},
},
Status: clusterv1.ClusterStatus{
InfrastructureReady: true,
},
Expand All @@ -167,6 +177,8 @@ func TestAWSMachinePoolReconciler(t *testing.T) {
Bootstrap: clusterv1.Bootstrap{
DataSecretName: ptr.To[string]("bootstrap-data"),
},
// This version should be older or equal to the version used for the control plane, or workers won't be upgraded
Version: ptr.To[string]("1.30.0"),
},
},
},
Expand Down Expand Up @@ -879,6 +891,65 @@ func TestAWSMachinePoolReconciler(t *testing.T) {
g.Expect(err).To(Succeed())
})

t.Run("launch template and ASG exist, but control plane k8s version is older than machinepool k8s version, should not update ASG", func(t *testing.T) {
g := NewWithT(t)
setup(t, g)
reconciler.reconcileServiceFactory = nil // use real implementation, but keep EC2 calls mocked (`ec2ServiceFactory`)
reconSvc = nil // not used
defer teardown(t, g)

// Latest ID and version already stored, no need to retrieve it
ms.AWSMachinePool.Status.LaunchTemplateID = launchTemplateIDExisting
ms.AWSMachinePool.Status.LaunchTemplateVersion = ptr.To[string]("1")
// Set the MachinePool k8s version to a version that is newer than the control plane k8s version
ms.MachinePool.Spec.Template.Spec.Version = ptr.To[string]("99.99.99")

ec2Svc.EXPECT().GetLaunchTemplate(gomock.Eq("test")).Return(
&expinfrav1.AWSLaunchTemplate{
Name: "test",
AMI: infrav1.AMIReference{
ID: ptr.To[string]("ami-existing"),
},
},
// No change to user data
userdata.ComputeHash([]byte("shell-script")),
// But the name of the secret changes from `previous-secret-name` to `bootstrap-data`
&apimachinerytypes.NamespacedName{Namespace: "default", Name: "previous-secret-name"},
nil,
nil)
ec2Svc.EXPECT().DiscoverLaunchTemplateAMI(gomock.Any(), gomock.Any()).Return(ptr.To[string]("ami-existing"), nil)
// Mock changes to the launch template, ASG would be updated, but k8s version skew will prevent it
ec2Svc.EXPECT().LaunchTemplateNeedsUpdate(gomock.Any(), gomock.Any(), gomock.Any()).Return(true, nil)

// Control plane kubernetes version skew is checked before, so it won't be called
asgSvc.EXPECT().CanStartASGInstanceRefresh(gomock.Any()).Times(0)
// Won't be called due to version skew
ec2Svc.EXPECT().CreateLaunchTemplateVersion(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Times(0)
// Won't be called due to version skew
asgSvc.EXPECT().StartASGInstanceRefresh(gomock.Any()).Times(0)

asgSvc.EXPECT().GetASGByName(gomock.Any()).DoAndReturn(func(scope *scope.MachinePoolScope) (*expinfrav1.AutoScalingGroup, error) {
g.Expect(scope.Name()).To(Equal("test"))

// Add differences to `AWSMachinePool.spec`, ASG would be updated, but k8s version skew will prevent it
return &expinfrav1.AutoScalingGroup{
Name: scope.Name(),
Subnets: []string{
"subnet-1",
},
MinSize: awsMachinePool.Spec.MinSize - 1,
MaxSize: awsMachinePool.Spec.MaxSize + 1,
MixedInstancesPolicy: awsMachinePool.Spec.MixedInstancesPolicy.DeepCopy(),
}, nil
})
// No upgrade
asgSvc.EXPECT().UpdateASG(gomock.Any()).Times(0)

_, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs)
// We expect an error because the workers use a k8s version newer than the control plane version
g.Expect(err).To(HaveOccurred())
})

t.Run("launch template and ASG created from zero, then bootstrap config reference changes", func(t *testing.T) {
g := NewWithT(t)
setup(t, g)
Expand Down Expand Up @@ -1396,13 +1467,42 @@ func setupCluster(clusterName string) (*scope.ClusterScope, error) {
ObjectMeta: metav1.ObjectMeta{Name: "test"},
Spec: infrav1.AWSClusterSpec{},
}
client := fake.NewClientBuilder().WithScheme(scheme).WithObjects(awsCluster).Build()
controlPlane := &unstructured.Unstructured{}
controlPlane.Object = map[string]interface{}{
"metadata": map[string]interface{}{
"name": "test",
"namespace": "default",
},
"spec": map[string]interface{}{},
"status": map[string]interface{}{
"version": "1.30.0",
},
}
controlPlane.SetGroupVersionKind(schema.GroupVersionKind{
Group: "controlplane.cluster.x-k8s.io",
Kind: "KubeadmControlPlane",
Version: "v1beta1",
})
fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(awsCluster).Build()
err := fakeClient.Create(context.Background(), controlPlane)
if err != nil {
return nil, err
}

return scope.NewClusterScope(scope.ClusterScopeParams{
Cluster: &clusterv1.Cluster{
ObjectMeta: metav1.ObjectMeta{Name: clusterName},
Spec: clusterv1.ClusterSpec{
ControlPlaneRef: &corev1.ObjectReference{
Kind: "KubeadmControlPlane",
Namespace: "default",
Name: "test",
APIVersion: "controlplane.cluster.x-k8s.io/v1beta1",
},
},
},
AWSCluster: awsCluster,
Client: client,
Client: fakeClient,
})
}

Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ require (
github.com/aws/smithy-go v1.22.4
github.com/awslabs/goformation/v4 v4.19.5
github.com/blang/semver v3.5.1+incompatible
github.com/blang/semver/v4 v4.0.0
github.com/coreos/ignition v0.35.0
github.com/coreos/ignition/v2 v2.16.2
github.com/go-logr/logr v1.4.2
Expand Down Expand Up @@ -103,7 +104,6 @@ require (
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.23.4 // indirect
github.com/aymerick/douceur v0.2.0 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/blang/semver/v4 v4.0.0 // indirect
github.com/briandowns/spinner v1.11.1 // indirect
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
Expand Down