Skip to content

Commit 91d645f

Browse files
authored
Merge pull request #8432 from ykakarap/pr-concurrent-worker-upgrades
✨ add support or concurrent MD upgrades in classy clusters
2 parents 4df2e09 + 9cbf19e commit 91d645f

File tree

10 files changed

+321
-69
lines changed

10 files changed

+321
-69
lines changed

api/v1beta1/common_types.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,10 @@ const (
5353
// will not be completed until the annotation is removed and all MachineDeployments are upgraded.
5454
ClusterTopologyDeferUpgradeAnnotation = "topology.cluster.x-k8s.io/defer-upgrade"
5555

56+
// ClusterTopologyUpgradeConcurrencyAnnotation can be set as top-level annotation on the Cluster object of
57+
// a classy Cluster to define the maximum concurrency while upgrading MachineDeployments.
58+
ClusterTopologyUpgradeConcurrencyAnnotation = "topology.cluster.x-k8s.io/upgrade-concurrency"
59+
5660
// ClusterTopologyUnsafeUpdateClassNameAnnotation can be used to disable the webhook check on
5761
// update that disallows a pre-existing Cluster to be populated with Topology information and Class.
5862
ClusterTopologyUnsafeUpdateClassNameAnnotation = "unsafe.topology.cluster.x-k8s.io/disable-update-class-name-check"

docs/book/src/reference/labels_and_annotations.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
| topology.cluster.x-k8s.io/defer-upgrade | It can be used to defer the Kubernetes upgrade of a single MachineDeployment topology. If the annotation is set on a MachineDeployment topology in Cluster.spec.topology.workers, the Kubernetes upgrade for this MachineDeployment topology is deferred. It doesn't affect other MachineDeployment topologies. |
4141
| topology.cluster.x-k8s.io/dry-run | It is an annotation that gets set on objects by the topology controller only during a server side dry run apply operation. It is used for validating update webhooks for objects which get updated by template rotation (e.g. InfrastructureMachineTemplate). When the annotation is set and the admission request is a dry run, the webhook should deny validation due to immutability. By that the request will succeed (without any changes to the actual object because it is a dry run) and the topology controller will receive the resulting object. |
4242
| topology.cluster.x-k8s.io/hold-upgrade-sequence | It can be used to hold the entire MachineDeployment upgrade sequence. If the annotation is set on a MachineDeployment topology in Cluster.spec.topology.workers, the Kubernetes upgrade for this MachineDeployment topology and all subsequent ones is deferred. |
43+
| topology.cluster.x-k8s.io/upgrade-concurrency | It can be used to configure the maximum concurrency while upgrading MachineDeployments of a classy Cluster. It is set as a top level annotation on the Cluster object. The value should be >= 1. If unspecified the upgrade concurrency will default to 1. |
4344
| machine.cluster.x-k8s.io/certificates-expiry | It captures the expiry date of the machine certificates in RFC3339 format. It is used to trigger rollout of control plane machines before certificates expire. It can be set on BootstrapConfig and Machine objects. The value set on Machine object takes precedence. The annotation is only used by control plane machines. |
4445
| machine.cluster.x-k8s.io/exclude-node-draining | It explicitly skips node draining if set. |
4546
| machine.cluster.x-k8s.io/exclude-wait-for-node-volume-detach | It explicitly skips the waiting for node volume detaching if set. |

internal/controllers/topology/cluster/desired_state.go

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -842,14 +842,6 @@ func computeMachineDeploymentVersion(s *scope.Scope, machineDeploymentTopology c
842842
return currentVersion, nil
843843
}
844844

845-
// At this point the control plane is stable (not scaling, not upgrading, not being upgraded).
846-
// Checking to see if the machine deployments are also stable.
847-
// If any of the MachineDeployments is rolling out, do not upgrade the machine deployment yet.
848-
if s.Current.MachineDeployments.IsAnyRollingOut() {
849-
s.UpgradeTracker.MachineDeployments.MarkPendingUpgrade(currentMDState.Object.Name)
850-
return currentVersion, nil
851-
}
852-
853845
// Control plane and machine deployments are stable.
854846
// Ready to pick up the topology version.
855847
s.UpgradeTracker.MachineDeployments.MarkRollingOut(currentMDState.Object.Name)

internal/controllers/topology/cluster/desired_state_test.go

Lines changed: 93 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1636,23 +1636,25 @@ func TestComputeMachineDeployment(t *testing.T) {
16361636
}).
16371637
Build()
16381638

1639-
machineDeploymentStable := builder.MachineDeployment("test-namespace", "md-1").
1639+
machineDeploymentStable := builder.MachineDeployment("test-namespace", "md-stable").
16401640
WithGeneration(1).
16411641
WithReplicas(2).
16421642
WithStatus(clusterv1.MachineDeploymentStatus{
16431643
ObservedGeneration: 2,
16441644
Replicas: 2,
1645+
ReadyReplicas: 2,
16451646
UpdatedReplicas: 2,
16461647
AvailableReplicas: 2,
16471648
}).
16481649
Build()
16491650

1650-
machineDeploymentRollingOut := builder.MachineDeployment("test-namespace", "md-1").
1651+
machineDeploymentRollingOut := builder.MachineDeployment("test-namespace", "md-rolling").
16511652
WithGeneration(1).
16521653
WithReplicas(2).
16531654
WithStatus(clusterv1.MachineDeploymentStatus{
16541655
ObservedGeneration: 2,
16551656
Replicas: 1,
1657+
ReadyReplicas: 1,
16561658
UpdatedReplicas: 1,
16571659
AvailableReplicas: 1,
16581660
}).
@@ -1670,28 +1672,46 @@ func TestComputeMachineDeployment(t *testing.T) {
16701672
name string
16711673
machineDeploymentsState scope.MachineDeploymentsStateMap
16721674
currentMDVersion *string
1675+
upgradeConcurrency string
16731676
topologyVersion string
16741677
expectedVersion string
16751678
}{
16761679
{
16771680
name: "use cluster.spec.topology.version if creating a new machine deployment",
16781681
machineDeploymentsState: nil,
1682+
upgradeConcurrency: "1",
16791683
currentMDVersion: nil,
16801684
topologyVersion: "v1.2.3",
16811685
expectedVersion: "v1.2.3",
16821686
},
16831687
{
1684-
name: "use machine deployment's spec.template.spec.version if one of the machine deployments is rolling out",
1688+
name: "use machine deployment's spec.template.spec.version if one of the machine deployments is rolling out, concurrency limit reached",
16851689
machineDeploymentsState: machineDeploymentsStateRollingOut,
1690+
upgradeConcurrency: "1",
16861691
currentMDVersion: pointer.String("v1.2.2"),
16871692
topologyVersion: "v1.2.3",
16881693
expectedVersion: "v1.2.2",
16891694
},
1695+
{
1696+
name: "use cluster.spec.topology.version if one of the machine deployments is rolling out, concurrency limit not reached",
1697+
machineDeploymentsState: machineDeploymentsStateRollingOut,
1698+
upgradeConcurrency: "2",
1699+
currentMDVersion: pointer.String("v1.2.2"),
1700+
topologyVersion: "v1.2.3",
1701+
expectedVersion: "v1.2.3",
1702+
},
16901703
}
16911704
for _, tt := range tests {
16921705
t.Run(tt.name, func(t *testing.T) {
16931706
g := NewWithT(t)
1694-
s := scope.New(cluster)
1707+
1708+
testCluster := cluster.DeepCopy()
1709+
if testCluster.Annotations == nil {
1710+
testCluster.Annotations = map[string]string{}
1711+
}
1712+
testCluster.Annotations[clusterv1.ClusterTopologyUpgradeConcurrencyAnnotation] = tt.upgradeConcurrency
1713+
1714+
s := scope.New(testCluster)
16951715
s.Blueprint = blueprint
16961716
s.Blueprint.Topology.Version = tt.topologyVersion
16971717
s.Blueprint.Topology.ControlPlane = clusterv1.ControlPlaneTopology{
@@ -1710,6 +1730,7 @@ func TestComputeMachineDeployment(t *testing.T) {
17101730
WithStatus(clusterv1.MachineDeploymentStatus{
17111731
ObservedGeneration: 2,
17121732
Replicas: 2,
1733+
ReadyReplicas: 2,
17131734
UpdatedReplicas: 2,
17141735
AvailableReplicas: 2,
17151736
}).
@@ -1723,6 +1744,7 @@ func TestComputeMachineDeployment(t *testing.T) {
17231744
s.Current.ControlPlane = &scope.ControlPlaneState{
17241745
Object: controlPlaneStable123,
17251746
}
1747+
s.UpgradeTracker.MachineDeployments.MarkRollingOut(s.Current.MachineDeployments.RollingOut()...)
17261748
desiredControlPlaneState := &scope.ControlPlaneState{
17271749
Object: controlPlaneStable123,
17281750
}
@@ -1829,45 +1851,55 @@ func TestComputeMachineDeploymentVersion(t *testing.T) {
18291851
//
18301852
// A machine deployment is considered upgrading if any of the above conditions
18311853
// is false.
1832-
machineDeploymentStable := builder.MachineDeployment("test-namespace", "md-1").
1833-
WithGeneration(1).
1834-
WithReplicas(2).
1835-
WithStatus(clusterv1.MachineDeploymentStatus{
1836-
ObservedGeneration: 2,
1837-
Replicas: 2,
1838-
UpdatedReplicas: 2,
1839-
AvailableReplicas: 2,
1840-
ReadyReplicas: 2,
1841-
UnavailableReplicas: 0,
1842-
}).
1843-
Build()
1844-
machineDeploymentRollingOut := builder.MachineDeployment("test-namespace", "md-2").
1845-
WithGeneration(1).
1846-
WithReplicas(2).
1847-
WithStatus(clusterv1.MachineDeploymentStatus{
1848-
ObservedGeneration: 2,
1849-
Replicas: 1,
1850-
UpdatedReplicas: 1,
1851-
AvailableReplicas: 1,
1852-
ReadyReplicas: 1,
1853-
UnavailableReplicas: 1,
1854-
}).
1855-
Build()
1854+
stableMachineDeployment := func(ns, name string) *clusterv1.MachineDeployment {
1855+
return builder.MachineDeployment(ns, name).
1856+
WithGeneration(1).
1857+
WithReplicas(2).
1858+
WithStatus(clusterv1.MachineDeploymentStatus{
1859+
ObservedGeneration: 2,
1860+
Replicas: 2,
1861+
UpdatedReplicas: 2,
1862+
AvailableReplicas: 2,
1863+
ReadyReplicas: 2,
1864+
UnavailableReplicas: 0,
1865+
}).
1866+
Build()
1867+
}
18561868

1857-
machineDeploymentsStateStable := scope.MachineDeploymentsStateMap{
1858-
"md1": &scope.MachineDeploymentState{Object: machineDeploymentStable},
1859-
"md2": &scope.MachineDeploymentState{Object: machineDeploymentStable},
1869+
rollingMachineDeployment := func(ns, name string) *clusterv1.MachineDeployment {
1870+
return builder.MachineDeployment(ns, name).
1871+
WithGeneration(1).
1872+
WithReplicas(2).
1873+
WithStatus(clusterv1.MachineDeploymentStatus{
1874+
ObservedGeneration: 2,
1875+
Replicas: 1,
1876+
UpdatedReplicas: 1,
1877+
AvailableReplicas: 1,
1878+
ReadyReplicas: 1,
1879+
UnavailableReplicas: 1,
1880+
}).
1881+
Build()
1882+
}
1883+
1884+
twoMachineDeploymentsStateStable := scope.MachineDeploymentsStateMap{
1885+
"md1": &scope.MachineDeploymentState{Object: stableMachineDeployment("test1", "md1")},
1886+
"md2": &scope.MachineDeploymentState{Object: stableMachineDeployment("test1", "md2")},
1887+
}
1888+
oneStableOneRollingMachineDeploymentState := scope.MachineDeploymentsStateMap{
1889+
"md1": &scope.MachineDeploymentState{Object: stableMachineDeployment("test1", "md1")},
1890+
"md2": &scope.MachineDeploymentState{Object: rollingMachineDeployment("test1", "md2")},
18601891
}
1861-
machineDeploymentsStateRollingOut := scope.MachineDeploymentsStateMap{
1862-
"md1": &scope.MachineDeploymentState{Object: machineDeploymentStable},
1863-
"md2": &scope.MachineDeploymentState{Object: machineDeploymentRollingOut},
1892+
twoRollingMachineDeploymentState := scope.MachineDeploymentsStateMap{
1893+
"md1": &scope.MachineDeploymentState{Object: rollingMachineDeployment("test1", "md1")},
1894+
"md2": &scope.MachineDeploymentState{Object: rollingMachineDeployment("test1", "md2")},
18641895
}
18651896

18661897
tests := []struct {
18671898
name string
18681899
machineDeploymentTopology clusterv1.MachineDeploymentTopology
18691900
currentMachineDeploymentState *scope.MachineDeploymentState
18701901
machineDeploymentsStateMap scope.MachineDeploymentsStateMap
1902+
upgradeConcurrency int
18711903
currentControlPlane *unstructured.Unstructured
18721904
desiredControlPlane *unstructured.Unstructured
18731905
topologyVersion string
@@ -1890,16 +1922,7 @@ func TestComputeMachineDeploymentVersion(t *testing.T) {
18901922
},
18911923
},
18921924
currentMachineDeploymentState: &scope.MachineDeploymentState{Object: builder.MachineDeployment("test1", "md-current").WithVersion("v1.2.2").Build()},
1893-
machineDeploymentsStateMap: machineDeploymentsStateStable,
1894-
currentControlPlane: controlPlaneStable123,
1895-
desiredControlPlane: controlPlaneDesired,
1896-
topologyVersion: "v1.2.3",
1897-
expectedVersion: "v1.2.2",
1898-
},
1899-
{
1900-
name: "should return machine deployment's spec.template.spec.version if any one of the machine deployments is rolling out",
1901-
currentMachineDeploymentState: &scope.MachineDeploymentState{Object: builder.MachineDeployment("test1", "md-current").WithVersion("v1.2.2").Build()},
1902-
machineDeploymentsStateMap: machineDeploymentsStateRollingOut,
1925+
machineDeploymentsStateMap: twoMachineDeploymentsStateStable,
19031926
currentControlPlane: controlPlaneStable123,
19041927
desiredControlPlane: controlPlaneDesired,
19051928
topologyVersion: "v1.2.3",
@@ -1909,7 +1932,7 @@ func TestComputeMachineDeploymentVersion(t *testing.T) {
19091932
// Control plane is considered upgrading if the control plane's spec.version and status.version is not equal.
19101933
name: "should return machine deployment's spec.template.spec.version if control plane is upgrading",
19111934
currentMachineDeploymentState: &scope.MachineDeploymentState{Object: builder.MachineDeployment("test1", "md-current").WithVersion("v1.2.2").Build()},
1912-
machineDeploymentsStateMap: machineDeploymentsStateStable,
1935+
machineDeploymentsStateMap: twoMachineDeploymentsStateStable,
19131936
currentControlPlane: controlPlaneUpgrading,
19141937
topologyVersion: "v1.2.3",
19151938
expectedVersion: "v1.2.2",
@@ -1918,7 +1941,7 @@ func TestComputeMachineDeploymentVersion(t *testing.T) {
19181941
// Control plane is considered ready to upgrade if spec.version of current and desired control planes are not equal.
19191942
name: "should return machine deployment's spec.template.spec.version if control plane is ready to upgrade",
19201943
currentMachineDeploymentState: &scope.MachineDeploymentState{Object: builder.MachineDeployment("test1", "md-current").WithVersion("v1.2.2").Build()},
1921-
machineDeploymentsStateMap: machineDeploymentsStateStable,
1944+
machineDeploymentsStateMap: twoMachineDeploymentsStateStable,
19221945
currentControlPlane: controlPlaneStable122,
19231946
desiredControlPlane: controlPlaneDesired,
19241947
topologyVersion: "v1.2.3",
@@ -1928,20 +1951,40 @@ func TestComputeMachineDeploymentVersion(t *testing.T) {
19281951
// Control plane is considered scaling if its spec.replicas is not equal to any of status.replicas, status.readyReplicas or status.updatedReplicas.
19291952
name: "should return machine deployment's spec.template.spec.version if control plane is scaling",
19301953
currentMachineDeploymentState: &scope.MachineDeploymentState{Object: builder.MachineDeployment("test1", "md-current").WithVersion("v1.2.2").Build()},
1931-
machineDeploymentsStateMap: machineDeploymentsStateStable,
1954+
machineDeploymentsStateMap: twoMachineDeploymentsStateStable,
19321955
currentControlPlane: controlPlaneScaling,
19331956
topologyVersion: "v1.2.3",
19341957
expectedVersion: "v1.2.2",
19351958
},
19361959
{
19371960
name: "should return cluster.spec.topology.version if the control plane is not upgrading, not scaling, not ready to upgrade and none of the machine deployments are rolling out",
19381961
currentMachineDeploymentState: &scope.MachineDeploymentState{Object: builder.MachineDeployment("test1", "md-current").WithVersion("v1.2.2").Build()},
1939-
machineDeploymentsStateMap: machineDeploymentsStateStable,
1962+
machineDeploymentsStateMap: twoMachineDeploymentsStateStable,
19401963
currentControlPlane: controlPlaneStable123,
19411964
desiredControlPlane: controlPlaneDesired,
19421965
topologyVersion: "v1.2.3",
19431966
expectedVersion: "v1.2.3",
19441967
},
1968+
{
1969+
name: "should return cluster.spec.topology.version if control plane is stable, other machine deployments are rolling out, concurrency limit not reached",
1970+
currentMachineDeploymentState: &scope.MachineDeploymentState{Object: builder.MachineDeployment("test1", "md-current").WithVersion("v1.2.2").Build()},
1971+
machineDeploymentsStateMap: oneStableOneRollingMachineDeploymentState,
1972+
upgradeConcurrency: 2,
1973+
currentControlPlane: controlPlaneStable123,
1974+
desiredControlPlane: controlPlaneDesired,
1975+
topologyVersion: "v1.2.3",
1976+
expectedVersion: "v1.2.3",
1977+
},
1978+
{
1979+
name: "should return machine deployment's spec.template.spec.version if control plane is stable, other machine deployments are rolling out, concurrency limit reached",
1980+
currentMachineDeploymentState: &scope.MachineDeploymentState{Object: builder.MachineDeployment("test1", "md-current").WithVersion("v1.2.2").Build()},
1981+
machineDeploymentsStateMap: twoRollingMachineDeploymentState,
1982+
upgradeConcurrency: 2,
1983+
currentControlPlane: controlPlaneStable123,
1984+
desiredControlPlane: controlPlaneDesired,
1985+
topologyVersion: "v1.2.3",
1986+
expectedVersion: "v1.2.2",
1987+
},
19451988
}
19461989

19471990
for _, tt := range tests {
@@ -1960,9 +2003,10 @@ func TestComputeMachineDeploymentVersion(t *testing.T) {
19602003
ControlPlane: &scope.ControlPlaneState{Object: tt.currentControlPlane},
19612004
MachineDeployments: tt.machineDeploymentsStateMap,
19622005
},
1963-
UpgradeTracker: scope.NewUpgradeTracker(),
2006+
UpgradeTracker: scope.NewUpgradeTracker(scope.MaxMDUpgradeConcurrency(tt.upgradeConcurrency)),
19642007
}
19652008
desiredControlPlaneState := &scope.ControlPlaneState{Object: tt.desiredControlPlane}
2009+
s.UpgradeTracker.MachineDeployments.MarkRollingOut(s.Current.MachineDeployments.RollingOut()...)
19662010
version, err := computeMachineDeploymentVersion(s, tt.machineDeploymentTopology, desiredControlPlaneState, tt.currentMachineDeploymentState)
19672011
g.Expect(err).NotTo(HaveOccurred())
19682012
g.Expect(version).To(Equal(tt.expectedVersion))

0 commit comments

Comments
 (0)