@@ -41,6 +41,7 @@ import (
41
41
"sigs.k8s.io/controller-runtime/pkg/client"
42
42
43
43
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
44
+ controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1"
44
45
"sigs.k8s.io/cluster-api/test/e2e/internal/log"
45
46
"sigs.k8s.io/cluster-api/test/framework"
46
47
"sigs.k8s.io/cluster-api/test/framework/clusterctl"
@@ -123,6 +124,9 @@ type scaleSpecInput struct {
123
124
// if the test suit should fail as soon as c6 fails or if it should fail after all cluster creations are done.
124
125
FailFast bool
125
126
127
+ // SkipUpgrade if set to true will skip upgrading the workload clusters.
128
+ SkipUpgrade bool
129
+
126
130
// SkipCleanup if set to true will skip deleting the workload clusters.
127
131
SkipCleanup bool
128
132
@@ -133,7 +137,7 @@ type scaleSpecInput struct {
133
137
SkipWaitForCreation bool
134
138
}
135
139
136
- // scaleSpec implements a scale test.
140
+ // scaleSpec implements a scale test for clusters with MachineDeployments .
137
141
func scaleSpec (ctx context.Context , inputGetter func () scaleSpecInput ) {
138
142
var (
139
143
specName = "scale"
@@ -253,7 +257,7 @@ func scaleSpec(ctx context.Context, inputGetter func() scaleSpecInput) {
253
257
Flavor : flavor ,
254
258
Namespace : scaleClusterNamespacePlaceholder ,
255
259
ClusterName : scaleClusterNamePlaceholder ,
256
- KubernetesVersion : input .E2EConfig .GetVariable (KubernetesVersion ),
260
+ KubernetesVersion : input .E2EConfig .GetVariable (KubernetesVersionUpgradeFrom ),
257
261
ControlPlaneMachineCount : controlPlaneMachineCount ,
258
262
WorkerMachineCount : workerMachineCount ,
259
263
})
@@ -333,6 +337,47 @@ func scaleSpec(ctx context.Context, inputGetter func() scaleSpecInput) {
333
337
Fail ("" )
334
338
}
335
339
340
+ if ! input .SkipUpgrade {
341
+ By ("Upgrade the workload clusters concurrently" )
342
+ // Get the upgrade function for upgrading the workload clusters.
343
+ upgrader := getClusterUpgradeAndWaitFn (framework.UpgradeClusterTopologyAndWaitForUpgradeInput {
344
+ ClusterProxy : input .BootstrapClusterProxy ,
345
+ KubernetesUpgradeVersion : input .E2EConfig .GetVariable (KubernetesVersionUpgradeTo ),
346
+ EtcdImageTag : input .E2EConfig .GetVariable (EtcdVersionUpgradeTo ),
347
+ DNSImageTag : input .E2EConfig .GetVariable (CoreDNSVersionUpgradeTo ),
348
+ WaitForMachinesToBeUpgraded : input .E2EConfig .GetIntervals (specName , "wait-machine-upgrade" ),
349
+ WaitForKubeProxyUpgrade : input .E2EConfig .GetIntervals (specName , "wait-machine-upgrade" ),
350
+ WaitForDNSUpgrade : input .E2EConfig .GetIntervals (specName , "wait-machine-upgrade" ),
351
+ WaitForEtcdUpgrade : input .E2EConfig .GetIntervals (specName , "wait-machine-upgrade" ),
352
+ })
353
+
354
+ clusterNamesToUpgrade := []string {}
355
+ for _ , result := range clusterCreateResults {
356
+ clusterNamesToUpgrade = append (clusterNamesToUpgrade , result .clusterName )
357
+ }
358
+
359
+ // Upgrade all the workload clusters.
360
+ _ , err = workConcurrentlyAndWait (ctx , workConcurrentlyAndWaitInput {
361
+ ClusterNames : clusterNamesToUpgrade ,
362
+ Concurrency : concurrency ,
363
+ FailFast : input .FailFast ,
364
+ WorkerFunc : func (ctx context.Context , inputChan chan string , resultChan chan workResult , wg * sync.WaitGroup ) {
365
+ upgradeClusterAndWaitWorker (ctx , inputChan , resultChan , wg , namespace .Name , input .DeployClusterInSeparateNamespaces , baseClusterTemplateYAML , upgrader )
366
+ },
367
+ })
368
+ if err != nil {
369
+ // Call Fail to notify ginkgo that the suit has failed.
370
+ // Ginkgo will print the first observed error failure in this case.
371
+ // Example: If cluster c1, c2 and c3 failed then ginkgo will only print the first
372
+ // observed failure among the these 3 clusters.
373
+ // Since ginkgo only captures one failure, to help with this we are logging the error
374
+ // that will contain the full stack trace of failure for each cluster to help with debugging.
375
+ // TODO(ykakarap): Follow-up: Explore options for improved error reporting.
376
+ log .Logf ("Failed to upgrade clusters. Error: %s" , err .Error ())
377
+ Fail ("" )
378
+ }
379
+ }
380
+
336
381
// TODO(ykakarap): Follow-up: Dump resources for the failed clusters (creation).
337
382
338
383
clusterNamesToDelete := []string {}
@@ -494,11 +539,9 @@ func getClusterCreateAndWaitFn(input clusterctl.ApplyCustomClusterTemplateAndWai
494
539
CustomTemplateYAML : clusterTemplateYAML ,
495
540
ClusterName : clusterName ,
496
541
Namespace : namespace ,
497
- CNIManifestPath : input .CNIManifestPath ,
498
542
WaitForClusterIntervals : input .WaitForClusterIntervals ,
499
543
WaitForControlPlaneIntervals : input .WaitForControlPlaneIntervals ,
500
544
WaitForMachineDeployments : input .WaitForMachineDeployments ,
501
- WaitForMachinePools : input .WaitForMachinePools ,
502
545
Args : input .Args ,
503
546
PreWaitForCluster : input .PreWaitForCluster ,
504
547
PostMachinesProvisioned : input .PostMachinesProvisioned ,
@@ -594,7 +637,7 @@ func deleteClusterAndWaitWorker(ctx context.Context, inputChan <-chan string, re
594
637
return true
595
638
case clusterName , open := <- inputChan :
596
639
// Read the cluster name from the channel.
597
- // If the channel is closed it implies there is not more work to be done. Return.
640
+ // If the channel is closed it implies there is no more work to be done. Return.
598
641
if ! open {
599
642
return true
600
643
}
@@ -648,6 +691,111 @@ func deleteClusterAndWaitWorker(ctx context.Context, inputChan <-chan string, re
648
691
}
649
692
}
650
693
694
+ type clusterUpgrader func (ctx context.Context , namespace , clusterName string , clusterTemplateYAML []byte )
695
+
696
+ func getClusterUpgradeAndWaitFn (input framework.UpgradeClusterTopologyAndWaitForUpgradeInput ) clusterUpgrader {
697
+ return func (ctx context.Context , namespace , clusterName string , clusterTemplateYAML []byte ) {
698
+ resources := getClusterResourcesForUpgrade (ctx , input .ClusterProxy .GetClient (), namespace , clusterName )
699
+ // Nb. We cannot directly modify and use `input` in this closure function because this function
700
+ // will be called multiple times and this closure will keep modifying the same `input` multiple
701
+ // times. It is safer to pass the values explicitly into `UpgradeClusterTopologyAndWaitForUpgradeInput`.
702
+ framework .UpgradeClusterTopologyAndWaitForUpgrade (ctx , framework.UpgradeClusterTopologyAndWaitForUpgradeInput {
703
+ ClusterProxy : input .ClusterProxy ,
704
+ Cluster : resources .cluster ,
705
+ ControlPlane : resources .controlPlane ,
706
+ MachineDeployments : resources .machineDeployments ,
707
+ KubernetesUpgradeVersion : input .KubernetesUpgradeVersion ,
708
+ WaitForMachinesToBeUpgraded : input .WaitForMachinesToBeUpgraded ,
709
+ WaitForKubeProxyUpgrade : input .WaitForKubeProxyUpgrade ,
710
+ WaitForDNSUpgrade : input .WaitForDNSUpgrade ,
711
+ WaitForEtcdUpgrade : input .WaitForEtcdUpgrade ,
712
+ // TODO: (killianmuldoon) Checking the kube-proxy, etcd and DNS version doesn't work as we can't access the control plane endpoint for the workload cluster
713
+ // from the host. Need to figure out a way to route the calls to the workload Cluster correctly.
714
+ EtcdImageTag : "" ,
715
+ DNSImageTag : "" ,
716
+ SkipKubeProxyCheck : true ,
717
+ })
718
+ }
719
+ }
720
+
721
+ func upgradeClusterAndWaitWorker (ctx context.Context , inputChan <- chan string , resultChan chan <- workResult , wg * sync.WaitGroup , defaultNamespace string , deployClusterInSeparateNamespaces bool , clusterTemplateYAML []byte , upgrade clusterUpgrader ) {
722
+ defer wg .Done ()
723
+
724
+ for {
725
+ done := func () bool {
726
+ select {
727
+ case <- ctx .Done ():
728
+ // If the context is cancelled, return and shutdown the worker.
729
+ return true
730
+ case clusterName , open := <- inputChan :
731
+ // Read the cluster name from the channel.
732
+ // If the channel is closed it implies there is no more work to be done. Return.
733
+ if ! open {
734
+ return true
735
+ }
736
+ log .Logf ("Upgrading cluster %s" , clusterName )
737
+
738
+ // This defer will catch ginkgo failures and record them.
739
+ // The recorded panics are then handled by the parent goroutine.
740
+ defer func () {
741
+ e := recover ()
742
+ resultChan <- workResult {
743
+ clusterName : clusterName ,
744
+ err : e ,
745
+ }
746
+ }()
747
+
748
+ // Calculate namespace.
749
+ namespaceName := defaultNamespace
750
+ if deployClusterInSeparateNamespaces {
751
+ namespaceName = clusterName
752
+ }
753
+ upgrade (ctx , namespaceName , clusterName , clusterTemplateYAML )
754
+ return false
755
+ }
756
+ }()
757
+ if done {
758
+ break
759
+ }
760
+ }
761
+ }
762
+
763
+ type clusterResources struct {
764
+ cluster * clusterv1.Cluster
765
+ machineDeployments []* clusterv1.MachineDeployment
766
+ controlPlane * controlplanev1.KubeadmControlPlane
767
+ }
768
+
769
+ func getClusterResourcesForUpgrade (ctx context.Context , c client.Client , namespace , clusterName string ) clusterResources {
770
+ cluster := & clusterv1.Cluster {}
771
+ err := c .Get (ctx , client.ObjectKey {Namespace : namespace , Name : clusterName }, cluster )
772
+ Expect (err ).ToNot (HaveOccurred (), fmt .Sprintf ("error getting Cluster %s: %s" , klog .KRef (namespace , clusterName ), err ))
773
+
774
+ controlPlane := & controlplanev1.KubeadmControlPlane {}
775
+ err = c .Get (ctx , client.ObjectKey {Namespace : namespace , Name : cluster .Spec .ControlPlaneRef .Name }, controlPlane )
776
+ Expect (err ).ToNot (HaveOccurred (), fmt .Sprintf ("error getting ControlPlane for Cluster %s: %s," , klog .KObj (cluster ), err ))
777
+
778
+ mds := []* clusterv1.MachineDeployment {}
779
+ machineDeployments := & clusterv1.MachineDeploymentList {}
780
+ err = c .List (ctx , machineDeployments ,
781
+ client.MatchingLabels {
782
+ clusterv1 .ClusterNameLabel : cluster .Name ,
783
+ clusterv1 .ClusterTopologyOwnedLabel : "" ,
784
+ },
785
+ client .InNamespace (cluster .Namespace ),
786
+ )
787
+ Expect (err ).ToNot (HaveOccurred (), fmt .Sprintf ("error getting MachineDeployments for Cluster %s: %s" , klog .KObj (cluster ), err ))
788
+ for _ , md := range machineDeployments .Items {
789
+ mds = append (mds , md .DeepCopy ())
790
+ }
791
+
792
+ return clusterResources {
793
+ cluster : cluster ,
794
+ machineDeployments : mds ,
795
+ controlPlane : controlPlane ,
796
+ }
797
+ }
798
+
651
799
type workResult struct {
652
800
clusterName string
653
801
err any
0 commit comments