Skip to content

Commit e75c09f

Browse files
Add support for CP rollout strategy configuration (#135)
* test: Adds Upgrade test scenario with MaxSurge=0 Currently, we only test the rolling upgrade with the default MaxSurge=1 (for a 3-node control plane cluster, a 4th node is created and joined before removing one of the nodes). This adds a test for the rolling upgrade with MaxSurge=0 scenario ( for a 3-node control plane cluster, it first removes a node, then creates and joins a new one). The test ensures that the numbers of Machines present in CAPI is only 3 for this scenario. * Add RollingUpdate MaxSurge The CK8sControlPlane will have the extra configuration option to set the maxSurge spec.strategy.rollingUpdate.maxSurge to 0. spec: strategy: rollingUpdate: maxSurge: 1 | 0 By default the maxSurge is set to value 1, meaning that when a cluster rollingUpdate is performed, the ControlPlanes are rolled out in the N, N+1, N fashion (where N is the number of total Control Planes). This workflow always required one extra spare Machine to be available during the rolling update. If the maxSurge is set to value 0, meaning that when a cluster rollingUpdate is performed, the ControlPlanes are rolled out in the N, N-1, N fashion (where N is the number of total Control Planes). This workflow does not require one Machine to be available during the rolling update. --------- Co-authored-by: Claudiu Belu <[email protected]>
1 parent 394a84f commit e75c09f

12 files changed

+397
-3
lines changed

controlplane/api/v1beta2/ck8scontrolplane_types.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121

2222
corev1 "k8s.io/api/core/v1"
2323
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
24+
"k8s.io/apimachinery/pkg/util/intstr"
2425
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
2526

2627
bootstrapv1 "github.com/canonical/cluster-api-k8s/bootstrap/api/v1beta2"
@@ -81,6 +82,12 @@ type CK8sControlPlaneSpec struct {
8182
// The RemediationStrategy that controls how control plane machine remediation happens.
8283
// +optional
8384
RemediationStrategy *RemediationStrategy `json:"remediationStrategy,omitempty"`
85+
86+
// rolloutStrategy is the RolloutStrategy to use to replace control plane machines with
87+
// new ones.
88+
// +optional
89+
// +kubebuilder:default={rollingUpdate: {maxSurge: 1}}
90+
RolloutStrategy *RolloutStrategy `json:"rolloutStrategy,omitempty"`
8491
}
8592

8693
// MachineTemplate contains information about how machines should be shaped
@@ -109,6 +116,26 @@ type CK8sControlPlaneMachineTemplate struct {
109116
NodeDeletionTimeout *metav1.Duration `json:"nodeDeletionTimeout,omitempty"`
110117
}
111118

119+
// RolloutStrategy describes how to replace existing machines
120+
// with new ones.
121+
type RolloutStrategy struct {
122+
// rollingUpdate is the rolling update config params.
123+
// +optional
124+
RollingUpdate *RollingUpdate `json:"rollingUpdate,omitempty"`
125+
}
126+
127+
// RollingUpdate is used to control the desired behavior of rolling update.
128+
type RollingUpdate struct {
129+
// maxSurge is the maximum number of control planes that can be scheduled above or under the
130+
// desired number of control planes.
131+
// Value can be an absolute number 1 or 0.
132+
// Defaults to 1.
133+
// Example: when this is set to 1, the control plane can be scaled
134+
// up immediately when the rolling update starts.
135+
// +optional
136+
MaxSurge *intstr.IntOrString `json:"maxSurge,omitempty"`
137+
}
138+
112139
// RemediationStrategy allows to define how control plane machine remediation happens.
113140
type RemediationStrategy struct {
114141
// MaxRetry is the Max number of retries while attempting to remediate an unhealthy machine.

controlplane/api/v1beta2/ck8scontrolplane_webhook.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222

2323
apierrors "k8s.io/apimachinery/pkg/api/errors"
2424
"k8s.io/apimachinery/pkg/runtime"
25+
"k8s.io/apimachinery/pkg/util/intstr"
2526
ctrl "sigs.k8s.io/controller-runtime"
2627
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
2728
)
@@ -76,4 +77,24 @@ func defaultCK8sControlPlaneSpec(s *CK8sControlPlaneSpec, namespace string) {
7677
if s.MachineTemplate.InfrastructureRef.Namespace == "" {
7778
s.MachineTemplate.InfrastructureRef.Namespace = namespace
7879
}
80+
81+
s.RolloutStrategy = defaultRolloutStrategy(s.RolloutStrategy)
82+
}
83+
84+
func defaultRolloutStrategy(rolloutStrategy *RolloutStrategy) *RolloutStrategy {
85+
ios1 := intstr.FromInt(1)
86+
87+
if rolloutStrategy == nil {
88+
rolloutStrategy = &RolloutStrategy{}
89+
}
90+
91+
// Enforce RollingUpdate strategy and default MaxSurge if not set.
92+
if rolloutStrategy != nil {
93+
if rolloutStrategy.RollingUpdate == nil {
94+
rolloutStrategy.RollingUpdate = &RollingUpdate{}
95+
rolloutStrategy.RollingUpdate.MaxSurge = intstr.ValueOrDefault(rolloutStrategy.RollingUpdate.MaxSurge, ios1)
96+
}
97+
}
98+
99+
return rolloutStrategy
79100
}

controlplane/api/v1beta2/ck8scontrolplanetemplate_types.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,12 @@ type CK8sControlPlaneTemplateResourceSpec struct {
5454
// The RemediationStrategy that controls how control plane machine remediation happens.
5555
// +optional
5656
RemediationStrategy *RemediationStrategy `json:"remediationStrategy,omitempty"`
57+
58+
// rolloutStrategy is the RolloutStrategy to use to replace control plane machines with
59+
// new ones.
60+
// +optional
61+
// +kubebuilder:default={rollingUpdate: {maxSurge: 1}}
62+
RolloutStrategy *RolloutStrategy `json:"rolloutStrategy,omitempty"`
5763
}
5864

5965
// +kubebuilder:object:root=true

controlplane/api/v1beta2/zz_generated.deepcopy.go

Lines changed: 51 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

controlplane/config/crd/bases/controlplane.cluster.x-k8s.io_ck8scontrolplanes.yaml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,31 @@ spec:
227227
CK8sControlPlane
228228
format: date-time
229229
type: string
230+
rolloutStrategy:
231+
default:
232+
rollingUpdate:
233+
maxSurge: 1
234+
description: |-
235+
rolloutStrategy is the RolloutStrategy to use to replace control plane machines with
236+
new ones.
237+
properties:
238+
rollingUpdate:
239+
description: rollingUpdate is the rolling update config params.
240+
properties:
241+
maxSurge:
242+
anyOf:
243+
- type: integer
244+
- type: string
245+
description: |-
246+
maxSurge is the maximum number of control planes that can be scheduled above or under the
247+
desired number of control planes.
248+
Value can be an absolute number 1 or 0.
249+
Defaults to 1.
250+
Example: when this is set to 1, the control plane can be scaled
251+
up immediately when the rolling update starts.
252+
x-kubernetes-int-or-string: true
253+
type: object
254+
type: object
230255
spec:
231256
description: |-
232257
CK8sConfigSpec is a CK8sConfigSpec

controlplane/config/crd/bases/controlplane.cluster.x-k8s.io_ck8scontrolplanetemplates.yaml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,32 @@ spec:
201201
CK8sControlPlane
202202
format: date-time
203203
type: string
204+
rolloutStrategy:
205+
default:
206+
rollingUpdate:
207+
maxSurge: 1
208+
description: |-
209+
rolloutStrategy is the RolloutStrategy to use to replace control plane machines with
210+
new ones.
211+
properties:
212+
rollingUpdate:
213+
description: rollingUpdate is the rolling update config
214+
params.
215+
properties:
216+
maxSurge:
217+
anyOf:
218+
- type: integer
219+
- type: string
220+
description: |-
221+
maxSurge is the maximum number of control planes that can be scheduled above or under the
222+
desired number of control planes.
223+
Value can be an absolute number 1 or 0.
224+
Defaults to 1.
225+
Example: when this is set to 1, the control plane can be scaled
226+
up immediately when the rolling update starts.
227+
x-kubernetes-int-or-string: true
228+
type: object
229+
type: object
204230
spec:
205231
description: |-
206232
CK8sConfigSpec is a CK8sConfigSpec

controlplane/controllers/ck8scontrolplane_controller.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -738,8 +738,15 @@ func (r *CK8sControlPlaneReconciler) upgradeControlPlane(
738738
return reconcile.Result{}, fmt.Errorf("failed to upgrade kubelet config map")
739739
}
740740
**/
741+
logger := r.Log.WithValues("namespace", kcp.Namespace, "CK8sControlPlane", kcp.Name, "cluster", cluster.Name)
742+
743+
if kcp.Spec.RolloutStrategy == nil {
744+
logger.Info("RolloutStrategy is empty, unable to continue")
745+
return ctrl.Result{}, nil
746+
}
741747

742-
if controlPlane.Machines.Len() <= int(*kcp.Spec.Replicas) {
748+
maxNodes := *kcp.Spec.Replicas + int32(kcp.Spec.RolloutStrategy.RollingUpdate.MaxSurge.IntValue())
749+
if int32(controlPlane.Machines.Len()) < maxNodes {
743750
// scaleUp ensures that we don't continue scaling up while waiting for Machines to have NodeRefs
744751
return r.scaleUpControlPlane(ctx, cluster, kcp, controlPlane)
745752
}

test/e2e/cluster_upgrade.go

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,11 @@ import (
3434
"sigs.k8s.io/cluster-api/util"
3535
)
3636

37+
const (
38+
flavorUpgrades = "upgrades"
39+
flavorUpgradesMaxSurge0 = "upgrades-max-surge-0"
40+
)
41+
3742
// ClusterUpgradeSpecInput is the input for ClusterUpgradeConformanceSpec.
3843
type ClusterUpgradeSpecInput struct {
3944
E2EConfig *clusterctl.E2EConfig
@@ -80,6 +85,9 @@ func ClusterUpgradeSpec(ctx context.Context, inputGetter func() ClusterUpgradeSp
8085
result *ApplyClusterTemplateAndWaitResult
8186
clusterName string
8287
clusterctlLogFolder string
88+
89+
flavor string
90+
maxControlPlaneMachineCount int64
8391
)
8492

8593
BeforeEach(func() {
@@ -107,6 +115,17 @@ func ClusterUpgradeSpec(ctx context.Context, inputGetter func() ClusterUpgradeSp
107115
workerMachineCount = *input.WorkerMachineCount
108116
}
109117

118+
// Default flavor with the default MaxSurge=1 (we expect to see at most 1 extra Machine).
119+
flavor = flavorUpgrades
120+
maxControlPlaneMachineCount = controlPlaneMachineCount + 1
121+
if input.Flavor != nil {
122+
flavor = *input.Flavor
123+
if flavor == flavorUpgradesMaxSurge0 {
124+
// MaxSurge=0 (we should see no additional Machine).
125+
maxControlPlaneMachineCount = controlPlaneMachineCount
126+
}
127+
}
128+
110129
// Setup a Namespace where to host objects for this spec and create a watcher for the namespace events.
111130
namespace, cancelWatches = setupSpecNamespace(ctx, specName, input.BootstrapClusterProxy, input.ArtifactFolder)
112131

@@ -139,7 +158,7 @@ func ClusterUpgradeSpec(ctx context.Context, inputGetter func() ClusterUpgradeSp
139158
ClusterctlConfigPath: input.ClusterctlConfigPath,
140159
KubeconfigPath: input.BootstrapClusterProxy.GetKubeconfigPath(),
141160
InfrastructureProvider: *input.InfrastructureProvider,
142-
Flavor: ptr.Deref(input.Flavor, "upgrades"),
161+
Flavor: flavor,
143162
Namespace: namespace.Name,
144163
ClusterName: clusterName,
145164
KubernetesVersion: input.E2EConfig.GetVariable(KubernetesVersion),
@@ -157,6 +176,7 @@ func ClusterUpgradeSpec(ctx context.Context, inputGetter func() ClusterUpgradeSp
157176
ClusterProxy: input.BootstrapClusterProxy,
158177
Cluster: result.Cluster,
159178
ControlPlane: result.ControlPlane,
179+
MaxControlPlaneMachineCount: maxControlPlaneMachineCount,
160180
KubernetesUpgradeVersion: input.E2EConfig.GetVariable(KubernetesVersionUpgradeTo),
161181
UpgradeMachineTemplate: ptr.To(fmt.Sprintf("%s-control-plane-old", clusterName)),
162182
WaitForMachinesToBeUpgraded: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"),

test/e2e/cluster_upgrade_test.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,3 +41,21 @@ var _ = Describe("Workload cluster upgrade [CK8s-Upgrade]", func() {
4141
})
4242
})
4343
})
44+
45+
var _ = Describe("Workload cluster upgrade with MaxSurge=0 [CK8s-Upgrade]", func() {
46+
Context("Upgrading a cluster with HA control plane", func() {
47+
ClusterUpgradeSpec(ctx, func() ClusterUpgradeSpecInput {
48+
return ClusterUpgradeSpecInput{
49+
E2EConfig: e2eConfig,
50+
ClusterctlConfigPath: clusterctlConfigPath,
51+
BootstrapClusterProxy: bootstrapClusterProxy,
52+
ArtifactFolder: artifactFolder,
53+
SkipCleanup: skipCleanup,
54+
InfrastructureProvider: ptr.To(clusterctl.DefaultInfrastructureProvider),
55+
ControlPlaneMachineCount: ptr.To[int64](3),
56+
WorkerMachineCount: ptr.To[int64](1),
57+
Flavor: ptr.To[string](flavorUpgradesMaxSurge0),
58+
}
59+
})
60+
})
61+
})

test/e2e/config/ck8s-docker.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ providers:
5454
- sourcePath: "../data/infrastructure-docker/cluster-template-kcp-remediation.yaml"
5555
- sourcePath: "../data/infrastructure-docker/cluster-template-md-remediation.yaml"
5656
- sourcePath: "../data/infrastructure-docker/cluster-template-upgrades.yaml"
57+
- sourcePath: "../data/infrastructure-docker/cluster-template-upgrades-max-surge-0.yaml"
5758
- sourcePath: "../data/infrastructure-docker/cluster-template.yaml"
5859
- name: ck8s
5960
type: BootstrapProvider

0 commit comments

Comments
 (0)