Skip to content

Commit 8a507ce

Browse files
committed
Check control plane version skew before creating new launch template version
1 parent a65a2f6 commit 8a507ce

File tree

2 files changed

+143
-2
lines changed

2 files changed

+143
-2
lines changed

exp/controllers/awsmachinepool_controller.go

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,14 @@ import (
2222
"fmt"
2323
"time"
2424

25+
"github.com/blang/semver/v4"
2526
"github.com/google/go-cmp/cmp"
2627
"github.com/google/go-cmp/cmp/cmpopts"
2728
"github.com/pkg/errors"
2829
corev1 "k8s.io/api/core/v1"
2930
apierrors "k8s.io/apimachinery/pkg/api/errors"
3031
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
32+
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
3133
"k8s.io/apimachinery/pkg/runtime/schema"
3234
"k8s.io/client-go/tools/record"
3335
"k8s.io/klog/v2"
@@ -305,6 +307,16 @@ func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machineP
305307
// But we want to update the LaunchTemplate because an error in the LaunchTemplate may be blocking the ASG creation.
306308
return true, nil
307309
}
310+
311+
canProceed, err := r.isMachinePoolAllowedToUpgradeDueToControlPlaneVersionSkew(clusterScope, machinePoolScope)
312+
if err != nil {
313+
return true, err
314+
}
315+
if !canProceed {
316+
machinePoolScope.Info("blocking the Launch Template update due to control plane k8s version skew")
317+
return false, nil
318+
}
319+
308320
return asgsvc.CanStartASGInstanceRefresh(machinePoolScope)
309321
}
310322
runPostLaunchTemplateUpdateOperation := func() error {
@@ -442,6 +454,35 @@ func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machineP
442454
return ctrl.Result{}, nil
443455
}
444456

457+
// isMachinePoolAllowedToUpgradeDueToControlPlaneVersionSkew checks if the control plane is being upgraded, in which case we shouldn't update the launch template.
458+
func (r *AWSMachinePoolReconciler) isMachinePoolAllowedToUpgradeDueToControlPlaneVersionSkew(clusterScope cloud.ClusterScoper, machinePoolScope *scope.MachinePoolScope) (bool, error) {
459+
if machinePoolScope.Cluster.Spec.ControlPlaneRef == nil {
460+
return true, nil
461+
}
462+
463+
controlPlane, err := clusterScope.UnstructuredControlPlane()
464+
if err != nil {
465+
return true, errors.Wrap(err, "failed to get ControlPlane")
466+
}
467+
468+
cpVersion, found, err := unstructured.NestedString(controlPlane.Object, "status", "version")
469+
if !found || err != nil {
470+
return true, errors.Wrapf(err, "failed to get version of ControlPlane %s", machinePoolScope.Cluster.Spec.ControlPlaneRef.Name)
471+
}
472+
473+
controlPlaneCurrentK8sVersion, err := semver.ParseTolerant(cpVersion)
474+
if err != nil {
475+
return true, errors.Wrapf(err, "failed to parse version of ControlPlane %s", machinePoolScope.Cluster.Spec.ControlPlaneRef.Name)
476+
}
477+
478+
machinePoolDesiredK8sVersion, err := semver.ParseTolerant(*machinePoolScope.MachinePool.Spec.Template.Spec.Version)
479+
if err != nil {
480+
return true, errors.Wrap(err, "failed to parse version of MachinePool")
481+
}
482+
483+
return controlPlaneCurrentK8sVersion.GE(machinePoolDesiredK8sVersion), nil
484+
}
485+
445486
func (r *AWSMachinePoolReconciler) reconcileDelete(ctx context.Context, machinePoolScope *scope.MachinePoolScope, clusterScope cloud.ClusterScoper, ec2Scope scope.EC2Scope) error {
446487
clusterScope.Info("Handling deleted AWSMachinePool")
447488

exp/controllers/awsmachinepool_controller_test.go

Lines changed: 102 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,9 @@ import (
3333
"github.com/pkg/errors"
3434
corev1 "k8s.io/api/core/v1"
3535
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
36+
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
3637
"k8s.io/apimachinery/pkg/runtime"
38+
"k8s.io/apimachinery/pkg/runtime/schema"
3739
apimachinerytypes "k8s.io/apimachinery/pkg/types"
3840
"k8s.io/client-go/tools/record"
3941
utilfeature "k8s.io/component-base/featuregate/testing"
@@ -145,6 +147,14 @@ func TestAWSMachinePoolReconciler(t *testing.T) {
145147
scope.MachinePoolScopeParams{
146148
Client: testEnv.Client,
147149
Cluster: &clusterv1.Cluster{
150+
Spec: clusterv1.ClusterSpec{
151+
ControlPlaneRef: &corev1.ObjectReference{
152+
Name: "test",
153+
Kind: "KubeadmControlPlane",
154+
APIVersion: "controlplane.cluster.x-k8s.io/v1beta1",
155+
Namespace: "default",
156+
},
157+
},
148158
Status: clusterv1.ClusterStatus{
149159
InfrastructureReady: true,
150160
},
@@ -167,6 +177,8 @@ func TestAWSMachinePoolReconciler(t *testing.T) {
167177
Bootstrap: clusterv1.Bootstrap{
168178
DataSecretName: ptr.To[string]("bootstrap-data"),
169179
},
180+
// This version should be older or equal to the version used for the control plane, or workers won't be upgraded
181+
Version: ptr.To[string]("1.30.0"),
170182
},
171183
},
172184
},
@@ -879,6 +891,65 @@ func TestAWSMachinePoolReconciler(t *testing.T) {
879891
g.Expect(err).To(Succeed())
880892
})
881893

894+
t.Run("launch template and ASG exist, but control plane k8s version is older than machinepool k8s version, should not update ASG", func(t *testing.T) {
895+
g := NewWithT(t)
896+
setup(t, g)
897+
reconciler.reconcileServiceFactory = nil // use real implementation, but keep EC2 calls mocked (`ec2ServiceFactory`)
898+
reconSvc = nil // not used
899+
defer teardown(t, g)
900+
901+
// Latest ID and version already stored, no need to retrieve it
902+
ms.AWSMachinePool.Status.LaunchTemplateID = launchTemplateIDExisting
903+
ms.AWSMachinePool.Status.LaunchTemplateVersion = ptr.To[string]("1")
904+
// Set the MachinePool k8s version to a version that is newer than the control plane k8s version
905+
ms.MachinePool.Spec.Template.Spec.Version = ptr.To[string]("99.99.99")
906+
907+
ec2Svc.EXPECT().GetLaunchTemplate(gomock.Eq("test")).Return(
908+
&expinfrav1.AWSLaunchTemplate{
909+
Name: "test",
910+
AMI: infrav1.AMIReference{
911+
ID: ptr.To[string]("ami-existing"),
912+
},
913+
},
914+
// No change to user data
915+
userdata.ComputeHash([]byte("shell-script")),
916+
// But the name of the secret changes from `previous-secret-name` to `bootstrap-data`
917+
&apimachinerytypes.NamespacedName{Namespace: "default", Name: "previous-secret-name"},
918+
nil,
919+
nil)
920+
ec2Svc.EXPECT().DiscoverLaunchTemplateAMI(gomock.Any()).Return(ptr.To[string]("ami-existing"), nil)
921+
// Mock changes to the launch template, ASG would be updated, but k8s version skew will prevent it
922+
ec2Svc.EXPECT().LaunchTemplateNeedsUpdate(gomock.Any(), gomock.Any(), gomock.Any()).Return(true, nil)
923+
924+
// Control plane kubernetes version skew is checked before, so it won't be called
925+
asgSvc.EXPECT().CanStartASGInstanceRefresh(gomock.Any()).Times(0)
926+
// Won't be called due to version skew
927+
ec2Svc.EXPECT().CreateLaunchTemplateVersion(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Times(0)
928+
// Won't be called due to version skew
929+
asgSvc.EXPECT().StartASGInstanceRefresh(gomock.Any()).Times(0)
930+
931+
asgSvc.EXPECT().GetASGByName(gomock.Any()).DoAndReturn(func(scope *scope.MachinePoolScope) (*expinfrav1.AutoScalingGroup, error) {
932+
g.Expect(scope.Name()).To(Equal("test"))
933+
934+
// Add differences to `AWSMachinePool.spec`, ASG would be updated, but k8s version skew will prevent it
935+
return &expinfrav1.AutoScalingGroup{
936+
Name: scope.Name(),
937+
Subnets: []string{
938+
"subnet-1",
939+
},
940+
MinSize: awsMachinePool.Spec.MinSize - 1,
941+
MaxSize: awsMachinePool.Spec.MaxSize + 1,
942+
MixedInstancesPolicy: awsMachinePool.Spec.MixedInstancesPolicy.DeepCopy(),
943+
}, nil
944+
})
945+
// No upgrade
946+
asgSvc.EXPECT().UpdateASG(gomock.Any()).Times(0)
947+
948+
_, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs)
949+
// We expect an error because the workers use a k8s version newer than the control plane version
950+
g.Expect(err).To(HaveOccurred())
951+
})
952+
882953
t.Run("launch template and ASG created from zero, then bootstrap config reference changes", func(t *testing.T) {
883954
g := NewWithT(t)
884955
setup(t, g)
@@ -1389,13 +1460,42 @@ func setupCluster(clusterName string) (*scope.ClusterScope, error) {
13891460
ObjectMeta: metav1.ObjectMeta{Name: "test"},
13901461
Spec: infrav1.AWSClusterSpec{},
13911462
}
1392-
client := fake.NewClientBuilder().WithScheme(scheme).WithObjects(awsCluster).Build()
1463+
controlPlane := &unstructured.Unstructured{}
1464+
controlPlane.Object = map[string]interface{}{
1465+
"metadata": map[string]interface{}{
1466+
"name": "test",
1467+
"namespace": "default",
1468+
},
1469+
"spec": map[string]interface{}{},
1470+
"status": map[string]interface{}{
1471+
"version": "1.30.0",
1472+
},
1473+
}
1474+
controlPlane.SetGroupVersionKind(schema.GroupVersionKind{
1475+
Group: "controlplane.cluster.x-k8s.io",
1476+
Kind: "KubeadmControlPlane",
1477+
Version: "v1beta1",
1478+
})
1479+
fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(awsCluster).Build()
1480+
err := fakeClient.Create(context.Background(), controlPlane)
1481+
if err != nil {
1482+
return nil, err
1483+
}
1484+
13931485
return scope.NewClusterScope(scope.ClusterScopeParams{
13941486
Cluster: &clusterv1.Cluster{
13951487
ObjectMeta: metav1.ObjectMeta{Name: clusterName},
1488+
Spec: clusterv1.ClusterSpec{
1489+
ControlPlaneRef: &corev1.ObjectReference{
1490+
Kind: "KubeadmControlPlane",
1491+
Namespace: "default",
1492+
Name: "test",
1493+
APIVersion: "controlplane.cluster.x-k8s.io/v1beta1",
1494+
},
1495+
},
13961496
},
13971497
AWSCluster: awsCluster,
1398-
Client: client,
1498+
Client: fakeClient,
13991499
})
14001500
}
14011501

0 commit comments

Comments
 (0)