@@ -41,6 +41,7 @@ import (
4141 "sigs.k8s.io/controller-runtime/pkg/client"
4242
4343 clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
44+ controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1"
4445 "sigs.k8s.io/cluster-api/test/e2e/internal/log"
4546 "sigs.k8s.io/cluster-api/test/framework"
4647 "sigs.k8s.io/cluster-api/test/framework/clusterctl"
@@ -123,6 +124,9 @@ type scaleSpecInput struct {
123124 // if the test suit should fail as soon as c6 fails or if it should fail after all cluster creations are done.
124125 FailFast bool
125126
127+ // SkipUpgrade if set to true will skip upgrading the workload clusters.
128+ SkipUpgrade bool
129+
126130 // SkipCleanup if set to true will skip deleting the workload clusters.
127131 SkipCleanup bool
128132
@@ -133,7 +137,7 @@ type scaleSpecInput struct {
133137 SkipWaitForCreation bool
134138}
135139
136- // scaleSpec implements a scale test.
140+ // scaleSpec implements a scale test for clusters with MachineDeployments .
137141func scaleSpec (ctx context.Context , inputGetter func () scaleSpecInput ) {
138142 var (
139143 specName = "scale"
@@ -253,7 +257,7 @@ func scaleSpec(ctx context.Context, inputGetter func() scaleSpecInput) {
253257 Flavor : flavor ,
254258 Namespace : scaleClusterNamespacePlaceholder ,
255259 ClusterName : scaleClusterNamePlaceholder ,
256- KubernetesVersion : input .E2EConfig .GetVariable (KubernetesVersion ),
260+ KubernetesVersion : input .E2EConfig .GetVariable (KubernetesVersionUpgradeFrom ),
257261 ControlPlaneMachineCount : controlPlaneMachineCount ,
258262 WorkerMachineCount : workerMachineCount ,
259263 })
@@ -333,6 +337,47 @@ func scaleSpec(ctx context.Context, inputGetter func() scaleSpecInput) {
333337 Fail ("" )
334338 }
335339
340+ if ! input .SkipUpgrade {
341+ By ("Upgrade the workload clusters concurrently" )
342+ // Get the upgrade function for upgrading the workload clusters.
343+ upgrader := getClusterUpgradeAndWaitFn (framework.UpgradeClusterTopologyAndWaitForUpgradeInput {
344+ ClusterProxy : input .BootstrapClusterProxy ,
345+ KubernetesUpgradeVersion : input .E2EConfig .GetVariable (KubernetesVersionUpgradeTo ),
346+ EtcdImageTag : input .E2EConfig .GetVariable (EtcdVersionUpgradeTo ),
347+ DNSImageTag : input .E2EConfig .GetVariable (CoreDNSVersionUpgradeTo ),
348+ WaitForMachinesToBeUpgraded : input .E2EConfig .GetIntervals (specName , "wait-machine-upgrade" ),
349+ WaitForKubeProxyUpgrade : input .E2EConfig .GetIntervals (specName , "wait-machine-upgrade" ),
350+ WaitForDNSUpgrade : input .E2EConfig .GetIntervals (specName , "wait-machine-upgrade" ),
351+ WaitForEtcdUpgrade : input .E2EConfig .GetIntervals (specName , "wait-machine-upgrade" ),
352+ })
353+
354+ clusterNamesToUpgrade := []string {}
355+ for _ , result := range clusterCreateResults {
356+ clusterNamesToUpgrade = append (clusterNamesToUpgrade , result .clusterName )
357+ }
358+
359+ // Upgrade all the workload clusters.
360+ _ , err = workConcurrentlyAndWait (ctx , workConcurrentlyAndWaitInput {
361+ ClusterNames : clusterNamesToUpgrade ,
362+ Concurrency : concurrency ,
363+ FailFast : input .FailFast ,
364+ WorkerFunc : func (ctx context.Context , inputChan chan string , resultChan chan workResult , wg * sync.WaitGroup ) {
365+ upgradeClusterAndWaitWorker (ctx , inputChan , resultChan , wg , namespace .Name , input .DeployClusterInSeparateNamespaces , baseClusterTemplateYAML , upgrader )
366+ },
367+ })
368+ if err != nil {
369+ // Call Fail to notify ginkgo that the suit has failed.
370+ // Ginkgo will print the first observed error failure in this case.
371+ // Example: If cluster c1, c2 and c3 failed then ginkgo will only print the first
372+ // observed failure among the these 3 clusters.
373+ // Since ginkgo only captures one failure, to help with this we are logging the error
374+ // that will contain the full stack trace of failure for each cluster to help with debugging.
375+ // TODO(ykakarap): Follow-up: Explore options for improved error reporting.
376+ log .Logf ("Failed to upgrade clusters. Error: %s" , err .Error ())
377+ Fail ("" )
378+ }
379+ }
380+
336381 // TODO(ykakarap): Follow-up: Dump resources for the failed clusters (creation).
337382
338383 clusterNamesToDelete := []string {}
@@ -494,11 +539,9 @@ func getClusterCreateAndWaitFn(input clusterctl.ApplyCustomClusterTemplateAndWai
494539 CustomTemplateYAML : clusterTemplateYAML ,
495540 ClusterName : clusterName ,
496541 Namespace : namespace ,
497- CNIManifestPath : input .CNIManifestPath ,
498542 WaitForClusterIntervals : input .WaitForClusterIntervals ,
499543 WaitForControlPlaneIntervals : input .WaitForControlPlaneIntervals ,
500544 WaitForMachineDeployments : input .WaitForMachineDeployments ,
501- WaitForMachinePools : input .WaitForMachinePools ,
502545 Args : input .Args ,
503546 PreWaitForCluster : input .PreWaitForCluster ,
504547 PostMachinesProvisioned : input .PostMachinesProvisioned ,
@@ -594,7 +637,7 @@ func deleteClusterAndWaitWorker(ctx context.Context, inputChan <-chan string, re
594637 return true
595638 case clusterName , open := <- inputChan :
596639 // Read the cluster name from the channel.
597- // If the channel is closed it implies there is not more work to be done. Return.
640+ // If the channel is closed it implies there is no more work to be done. Return.
598641 if ! open {
599642 return true
600643 }
@@ -648,6 +691,111 @@ func deleteClusterAndWaitWorker(ctx context.Context, inputChan <-chan string, re
648691 }
649692}
650693
694+ type clusterUpgrader func (ctx context.Context , namespace , clusterName string , clusterTemplateYAML []byte )
695+
696+ func getClusterUpgradeAndWaitFn (input framework.UpgradeClusterTopologyAndWaitForUpgradeInput ) clusterUpgrader {
697+ return func (ctx context.Context , namespace , clusterName string , clusterTemplateYAML []byte ) {
698+ resources := getClusterResourcesForUpgrade (ctx , input .ClusterProxy .GetClient (), namespace , clusterName )
699+ // Nb. We cannot directly modify and use `input` in this closure function because this function
700+ // will be called multiple times and this closure will keep modifying the same `input` multiple
701+ // times. It is safer to pass the values explicitly into `UpgradeClusterTopologyAndWaitForUpgradeInput`.
702+ framework .UpgradeClusterTopologyAndWaitForUpgrade (ctx , framework.UpgradeClusterTopologyAndWaitForUpgradeInput {
703+ ClusterProxy : input .ClusterProxy ,
704+ Cluster : resources .cluster ,
705+ ControlPlane : resources .controlPlane ,
706+ MachineDeployments : resources .machineDeployments ,
707+ KubernetesUpgradeVersion : input .KubernetesUpgradeVersion ,
708+ WaitForMachinesToBeUpgraded : input .WaitForMachinesToBeUpgraded ,
709+ WaitForKubeProxyUpgrade : input .WaitForKubeProxyUpgrade ,
710+ WaitForDNSUpgrade : input .WaitForDNSUpgrade ,
711+ WaitForEtcdUpgrade : input .WaitForEtcdUpgrade ,
712+ // TODO: (killianmuldoon) Checking the kube-proxy, etcd and DNS version doesn't work as we can't access the control plane endpoint for the workload cluster
713+ // from the host. Need to figure out a way to route the calls to the workload Cluster correctly.
714+ EtcdImageTag : "" ,
715+ DNSImageTag : "" ,
716+ SkipKubeProxyCheck : true ,
717+ })
718+ }
719+ }
720+
721+ func upgradeClusterAndWaitWorker (ctx context.Context , inputChan <- chan string , resultChan chan <- workResult , wg * sync.WaitGroup , defaultNamespace string , deployClusterInSeparateNamespaces bool , clusterTemplateYAML []byte , upgrade clusterUpgrader ) {
722+ defer wg .Done ()
723+
724+ for {
725+ done := func () bool {
726+ select {
727+ case <- ctx .Done ():
728+ // If the context is cancelled, return and shutdown the worker.
729+ return true
730+ case clusterName , open := <- inputChan :
731+ // Read the cluster name from the channel.
732+ // If the channel is closed it implies there is no more work to be done. Return.
733+ if ! open {
734+ return true
735+ }
736+ log .Logf ("Upgrading cluster %s" , clusterName )
737+
738+ // This defer will catch ginkgo failures and record them.
739+ // The recorded panics are then handled by the parent goroutine.
740+ defer func () {
741+ e := recover ()
742+ resultChan <- workResult {
743+ clusterName : clusterName ,
744+ err : e ,
745+ }
746+ }()
747+
748+ // Calculate namespace.
749+ namespaceName := defaultNamespace
750+ if deployClusterInSeparateNamespaces {
751+ namespaceName = clusterName
752+ }
753+ upgrade (ctx , namespaceName , clusterName , clusterTemplateYAML )
754+ return false
755+ }
756+ }()
757+ if done {
758+ break
759+ }
760+ }
761+ }
762+
763+ type clusterResources struct {
764+ cluster * clusterv1.Cluster
765+ machineDeployments []* clusterv1.MachineDeployment
766+ controlPlane * controlplanev1.KubeadmControlPlane
767+ }
768+
769+ func getClusterResourcesForUpgrade (ctx context.Context , c client.Client , namespace , clusterName string ) clusterResources {
770+ cluster := & clusterv1.Cluster {}
771+ err := c .Get (ctx , client.ObjectKey {Namespace : namespace , Name : clusterName }, cluster )
772+ Expect (err ).ToNot (HaveOccurred (), fmt .Sprintf ("error getting Cluster %s: %s" , klog .KRef (namespace , clusterName ), err ))
773+
774+ controlPlane := & controlplanev1.KubeadmControlPlane {}
775+ err = c .Get (ctx , client.ObjectKey {Namespace : namespace , Name : cluster .Spec .ControlPlaneRef .Name }, controlPlane )
776+ Expect (err ).ToNot (HaveOccurred (), fmt .Sprintf ("error getting ControlPlane for Cluster %s: %s," , klog .KObj (cluster ), err ))
777+
778+ mds := []* clusterv1.MachineDeployment {}
779+ machineDeployments := & clusterv1.MachineDeploymentList {}
780+ err = c .List (ctx , machineDeployments ,
781+ client.MatchingLabels {
782+ clusterv1 .ClusterNameLabel : cluster .Name ,
783+ clusterv1 .ClusterTopologyOwnedLabel : "" ,
784+ },
785+ client .InNamespace (cluster .Namespace ),
786+ )
787+ Expect (err ).ToNot (HaveOccurred (), fmt .Sprintf ("error getting MachineDeployments for Cluster %s: %s" , klog .KObj (cluster ), err ))
788+ for _ , md := range machineDeployments .Items {
789+ mds = append (mds , md .DeepCopy ())
790+ }
791+
792+ return clusterResources {
793+ cluster : cluster ,
794+ machineDeployments : mds ,
795+ controlPlane : controlPlane ,
796+ }
797+ }
798+
651799type workResult struct {
652800 clusterName string
653801 err any
0 commit comments