@@ -20,10 +20,12 @@ import (
2020 "bytes"
2121 "context"
2222 "fmt"
23+ "math"
2324 "os"
2425 "path/filepath"
2526 "strconv"
2627 "sync"
28+ "time"
2729
2830 . "github.com/onsi/ginkgo/v2"
2931 "github.com/onsi/ginkgo/v2/types"
@@ -32,7 +34,9 @@ import (
3234 corev1 "k8s.io/api/core/v1"
3335 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3436 "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
37+ "k8s.io/apimachinery/pkg/runtime"
3538 kerrors "k8s.io/apimachinery/pkg/util/errors"
39+ "k8s.io/klog/v2"
3640 "k8s.io/utils/pointer"
3741 "sigs.k8s.io/controller-runtime/pkg/client"
3842
@@ -48,6 +52,7 @@ const (
4852 scaleConcurrency = "CAPI_SCALE_CONCURRENCY"
4953 scaleControlPlaneMachineCount = "CAPI_SCALE_CONTROL_PLANE_MACHINE_COUNT"
5054 scaleWorkerMachineCount = "CAPI_SCALE_WORKER_MACHINE_COUNT"
55+ scaleMachineDeploymentCount = "CAPI_SCALE_MACHINE_DEPLOYMENT_COUNT"
5156)
5257
5358// scaleSpecInput is the input for scaleSpec.
@@ -66,6 +71,7 @@ type scaleSpecInput struct {
6671
6772 // Flavor, if specified is the template flavor used to create the cluster for testing.
6873 // If not specified, the default flavor for the selected infrastructure provider is used.
74+ // The ClusterTopology of this flavor should have exactly one MachineDeployment.
6975 Flavor * string
7076
7177 // ClusterCount is the number of target workload clusters.
@@ -83,11 +89,23 @@ type scaleSpecInput struct {
8389 // Can be overridden by variable CAPI_SCALE_CONTROLPLANE_MACHINE_COUNT.
8490 ControlPlaneMachineCount * int64
8591
86- // WorkerMachineCount defines number of worker machines to be added to each workload cluster.
92+ // WorkerMachineCount defines number of worker machines per machine deployment of the workload cluster.
8793 // If not specified, 1 will be used.
8894 // Can be overridden by variable CAPI_SCALE_WORKER_MACHINE_COUNT.
95+ // The resulting number of worker nodes for each of the workload cluster will
96+ // be MachineDeploymentCount*WorkerMachineCount (CAPI_SCALE_MACHINE_DEPLOYMENT_COUNT*CAPI_SCALE_WORKER_MACHINE_COUNT).
8997 WorkerMachineCount * int64
9098
99+ // MachineDeploymentCount defines the number of MachineDeployments to be used per workload cluster.
100+ // If not specified, 1 will be used.
101+ // Can be overridden by variable CAPI_SCALE_MACHINE_DEPLOYMENT_COUNT.
102+ // Note: This assumes that the cluster template of the specified flavor has exactly one machine deployment.
103+ // It uses this machine deployment to create additional copies.
104+ // Names of the MachineDeployments will be overridden to "md-1", "md-2", etc.
105+ // The resulting number of worker nodes for each of the workload cluster will
106+ // be MachineDeploymentCount*WorkerMachineCount (CAPI_SCALE_MACHINE_DEPLOYMENT_COUNT*CAPI_SCALE_WORKER_MACHINE_COUNT).
107+ MachineDeploymentCount * int64
108+
91109 // FailFast if set to true will return immediately after the first cluster operation fails.
92110 // If set to false, the test suite will not exit immediately after the first cluster operation fails.
93111 // Example: When creating clusters from c1 to c20 consider c6 fails creation. If FailFast is set to true
@@ -96,6 +114,15 @@ type scaleSpecInput struct {
96114 // Note: Please note that the test suit will still fail since c6 creation failed. FailFast will determine
97115 // if the test suit should fail as soon as c6 fails or if it should fail after all cluster creations are done.
98116 FailFast bool
117+
118+ // SkipCleanup if set to true will skip deleting the workload clusters.
119+ SkipCleanup bool
120+
121+ // SkipWaitForCreation defines if the test should wait for the workload clusters to be fully provisioned
122+ // before moving on.
123+ // If set to true, the test will create the workload clusters and immediately continue without waiting
124+ // for the clusters to be fully provisioned.
125+ SkipWaitForCreation bool
99126}
100127
101128// scaleSpec implements a scale test.
@@ -118,7 +145,17 @@ func scaleSpec(ctx context.Context, inputGetter func() scaleSpecInput) {
118145 Expect (input .E2EConfig .Variables ).To (HaveKey (KubernetesVersion ))
119146
120147 // Setup a Namespace where to host objects for this spec and create a watcher for the namespace events.
121- namespace , cancelWatches = setupSpecNamespace (ctx , specName , input .BootstrapClusterProxy , input .ArtifactFolder )
148+ // We are pinning the namespace for the test to help with debugging and testing.
149+ // Example: Queries to look up state of the clusters can be re-used.
150+ // Since we don't run multiple instances of this test concurrently on a management cluster it is okay to pin the namespace.
151+ Byf ("Creating a namespace for hosting the %q test spec" , specName )
152+ namespace , cancelWatches = framework .CreateNamespaceAndWatchEvents (ctx , framework.CreateNamespaceAndWatchEventsInput {
153+ Creator : input .BootstrapClusterProxy .GetClient (),
154+ ClientSet : input .BootstrapClusterProxy .GetClientSet (),
155+ Name : specName ,
156+ LogFolder : filepath .Join (input .ArtifactFolder , "clusters" , input .BootstrapClusterProxy .GetName ()),
157+ IgnoreAlreadyExists : true ,
158+ })
122159 })
123160
124161 It ("Should create and delete workload clusters" , func () {
@@ -156,6 +193,18 @@ func scaleSpec(ctx context.Context, inputGetter func() scaleSpecInput) {
156193 workerMachineCount = pointer .Int64 (int64 (workerMachineCountInt ))
157194 }
158195
196+ machineDeploymentCount := pointer .Int64 (1 )
197+ if input .MachineDeploymentCount != nil {
198+ machineDeploymentCount = input .MachineDeploymentCount
199+ }
200+ // If variable is defined that will take precedence.
201+ if input .E2EConfig .HasVariable (scaleMachineDeploymentCount ) {
202+ machineDeploymentCountStr := input .E2EConfig .GetVariable (scaleMachineDeploymentCount )
203+ machineDeploymentCountInt , err := strconv .Atoi (machineDeploymentCountStr )
204+ Expect (err ).NotTo (HaveOccurred ())
205+ machineDeploymentCount = pointer .Int64 (int64 (machineDeploymentCountInt ))
206+ }
207+
159208 clusterCount := int64 (10 )
160209 if input .ClusterCount != nil {
161210 clusterCount = * input .ClusterCount
@@ -211,33 +260,55 @@ func scaleSpec(ctx context.Context, inputGetter func() scaleSpecInput) {
211260 log .Logf ("Extract ClusterClass and Cluster from template YAML" )
212261 clusterClassYAML , baseClusterTemplateYAML := extractClusterClassAndClusterFromTemplate (baseWorkloadClusterTemplate )
213262
214- // Apply the ClusterClass.
215- log .Logf ("Create ClusterClass" )
216- Eventually (func () error {
217- return input .BootstrapClusterProxy .Apply (ctx , clusterClassYAML , "-n" , namespace .Name )
218- }).Should (Succeed ())
263+ // Modify the baseClusterTemplateYAML so that it has the desired number of machine deployments.
264+ modifiedBaseTemplateYAML := modifyMachineDeployments (baseClusterTemplateYAML , int (* machineDeploymentCount ))
265+
266+ if len (clusterClassYAML ) > 0 {
267+ // Apply the ClusterClass.
268+ log .Logf ("Create ClusterClass" )
269+ Eventually (func () error {
270+ return input .BootstrapClusterProxy .Apply (ctx , clusterClassYAML )
271+ }).Should (Succeed ())
272+ } else {
273+ log .Logf ("ClusterClass already exists. Skipping creation." )
274+ }
219275
220276 By ("Create workload clusters concurrently" )
221277 // Create multiple clusters concurrently from the same base cluster template.
222278
223279 clusterNames := make ([]string , 0 , clusterCount )
280+ clusterNameDigits := 1 + int (math .Log10 (float64 (clusterCount )))
224281 for i := int64 (1 ); i <= clusterCount ; i ++ {
225- name := fmt .Sprintf ("%s-%d" , specName , i )
282+ // This ensures we always have the right number of leading zeros in our cluster names, e.g.
283+ // clusterCount=1000 will lead to cluster names like scale-0001, scale-0002, ... .
284+ // This makes it possible to have correct ordering of clusters in diagrams in tools like Grafana.
285+ name := fmt .Sprintf ("%s-%0*d" , specName , clusterNameDigits , i )
226286 clusterNames = append (clusterNames , name )
227287 }
228288
289+ // Use the correct creator function for creating the workload clusters.
290+ // Default to using the "create and wait" creator function. If SkipWaitForCreation=true then
291+ // use the "create only" creator function.
292+ creator := getClusterCreateAndWaitFn (clusterctl.ApplyCustomClusterTemplateAndWaitInput {
293+ ClusterProxy : input .BootstrapClusterProxy ,
294+ Namespace : namespace .Name ,
295+ WaitForClusterIntervals : input .E2EConfig .GetIntervals (specName , "wait-cluster" ),
296+ WaitForControlPlaneIntervals : input .E2EConfig .GetIntervals (specName , "wait-control-plane" ),
297+ WaitForMachineDeployments : input .E2EConfig .GetIntervals (specName , "wait-worker-nodes" ),
298+ })
299+ if input .SkipWaitForCreation {
300+ if ! input .SkipCleanup {
301+ log .Logf ("WARNING! Using SkipWaitForCreation=true while SkipCleanup=false can lead to workload clusters getting deleted before they are fully provisioned." )
302+ }
303+ creator = getClusterCreateFn (input .BootstrapClusterProxy , namespace .Name )
304+ }
305+
229306 clusterCreateResults , err := workConcurrentlyAndWait (ctx , workConcurrentlyAndWaitInput {
230307 ClusterNames : clusterNames ,
231308 Concurrency : concurrency ,
232309 FailFast : input .FailFast ,
233310 WorkerFunc : func (ctx context.Context , inputChan chan string , resultChan chan workResult , wg * sync.WaitGroup ) {
234- createClusterAndWaitWorker (ctx , inputChan , resultChan , wg , baseClusterTemplateYAML , baseClusterName , clusterctl.ApplyCustomClusterTemplateAndWaitInput {
235- ClusterProxy : input .BootstrapClusterProxy ,
236- Namespace : namespace .Name ,
237- WaitForClusterIntervals : input .E2EConfig .GetIntervals (specName , "wait-cluster" ),
238- WaitForControlPlaneIntervals : input .E2EConfig .GetIntervals (specName , "wait-control-plane" ),
239- WaitForMachineDeployments : input .E2EConfig .GetIntervals (specName , "wait-worker-nodes" ),
240- })
311+ createClusterWorker (ctx , inputChan , resultChan , wg , modifiedBaseTemplateYAML , baseClusterName , creator )
241312 },
242313 })
243314 if err != nil {
@@ -259,6 +330,10 @@ func scaleSpec(ctx context.Context, inputGetter func() scaleSpecInput) {
259330 clusterNamesToDelete = append (clusterNamesToDelete , result .clusterName )
260331 }
261332
333+ if input .SkipCleanup {
334+ return
335+ }
336+
262337 By ("Delete the workload clusters concurrently" )
263338 // Now delete all the workload clusters.
264339 _ , err = workConcurrentlyAndWait (ctx , workConcurrentlyAndWaitInput {
@@ -396,7 +471,42 @@ outer:
396471 return results , kerrors .NewAggregate (errs )
397472}
398473
399- func createClusterAndWaitWorker (ctx context.Context , inputChan <- chan string , resultChan chan <- workResult , wg * sync.WaitGroup , baseTemplate []byte , baseClusterName string , input clusterctl.ApplyCustomClusterTemplateAndWaitInput ) {
474+ type clusterCreator func (ctx context.Context , clusterName string , clusterTemplateYAML []byte )
475+
476+ func getClusterCreateAndWaitFn (input clusterctl.ApplyCustomClusterTemplateAndWaitInput ) clusterCreator {
477+ return func (ctx context.Context , clusterName string , clusterTemplateYAML []byte ) {
478+ clusterResources := & clusterctl.ApplyCustomClusterTemplateAndWaitResult {}
479+ // Nb. We cannot directly modify and use `input` in this closure function because this function
480+ // will be called multiple times and this closure will keep modifying the same `input` multiple
481+ // times. It is safer to pass the values explicitly into `ApplyCustomClusterTemplateAndWait`.
482+ clusterctl .ApplyCustomClusterTemplateAndWait (ctx , clusterctl.ApplyCustomClusterTemplateAndWaitInput {
483+ ClusterProxy : input .ClusterProxy ,
484+ CustomTemplateYAML : clusterTemplateYAML ,
485+ ClusterName : clusterName ,
486+ Namespace : input .Namespace ,
487+ CNIManifestPath : input .CNIManifestPath ,
488+ WaitForClusterIntervals : input .WaitForClusterIntervals ,
489+ WaitForControlPlaneIntervals : input .WaitForControlPlaneIntervals ,
490+ WaitForMachineDeployments : input .WaitForMachineDeployments ,
491+ WaitForMachinePools : input .WaitForMachinePools ,
492+ Args : input .Args ,
493+ PreWaitForCluster : input .PreWaitForCluster ,
494+ PostMachinesProvisioned : input .PostMachinesProvisioned ,
495+ ControlPlaneWaiters : input .ControlPlaneWaiters ,
496+ }, clusterResources )
497+ }
498+ }
499+
500+ func getClusterCreateFn (clusterProxy framework.ClusterProxy , namespace string ) clusterCreator {
501+ return func (ctx context.Context , clusterName string , clusterTemplateYAML []byte ) {
502+ log .Logf ("Applying the cluster template yaml of cluster %s" , klog .KRef (namespace , clusterName ))
503+ Eventually (func () error {
504+ return clusterProxy .Apply (ctx , clusterTemplateYAML )
505+ }, 10 * time .Second ).Should (Succeed (), "Failed to apply the cluster template of cluster %s" , klog .KRef (namespace , clusterName ))
506+ }
507+ }
508+
509+ func createClusterWorker (ctx context.Context , inputChan <- chan string , resultChan chan <- workResult , wg * sync.WaitGroup , baseTemplate []byte , baseClusterName string , create clusterCreator ) {
400510 defer wg .Done ()
401511
402512 for {
@@ -425,13 +535,7 @@ func createClusterAndWaitWorker(ctx context.Context, inputChan <-chan string, re
425535
426536 // Create the cluster template YAML with the target cluster name.
427537 clusterTemplateYAML := bytes .Replace (baseTemplate , []byte (baseClusterName ), []byte (clusterName ), - 1 )
428- // Nb. Input is passed as a copy therefore we can safely update the value here, and it won't affect other
429- // workers.
430- input .CustomTemplateYAML = clusterTemplateYAML
431- input .ClusterName = clusterName
432-
433- clusterResources := & clusterctl.ApplyCustomClusterTemplateAndWaitResult {}
434- clusterctl .ApplyCustomClusterTemplateAndWait (ctx , input , clusterResources )
538+ create (ctx , clusterName , clusterTemplateYAML )
435539 return false
436540 }
437541 }()
@@ -495,3 +599,39 @@ type workResult struct {
495599 clusterName string
496600 err any
497601}
602+
603+ func modifyMachineDeployments (baseClusterTemplateYAML []byte , count int ) []byte {
604+ Expect (baseClusterTemplateYAML ).NotTo (BeEmpty (), "Invalid argument. baseClusterTemplateYAML cannot be empty when calling modifyMachineDeployments" )
605+ Expect (count ).To (BeNumerically (">=" , 0 ), "Invalid argument. count cannot be less than 0 when calling modifyMachineDeployments" )
606+
607+ objs , err := yaml .ToUnstructured (baseClusterTemplateYAML )
608+ Expect (err ).NotTo (HaveOccurred ())
609+ Expect (objs ).To (HaveLen (1 ), "Unexpected number of objects found in baseClusterTemplateYAML" )
610+
611+ scheme := runtime .NewScheme ()
612+ framework .TryAddDefaultSchemes (scheme )
613+ cluster := & clusterv1.Cluster {}
614+ Expect (scheme .Convert (& objs [0 ], cluster , nil )).Should (Succeed ())
615+ // Verify the Cluster Topology.
616+ Expect (cluster .Spec .Topology ).NotTo (BeNil (), "Should be a ClusterClass based Cluster" )
617+ Expect (cluster .Spec .Topology .Workers ).NotTo (BeNil (), "ClusterTopology should have exactly one MachineDeployment. Cannot be empty" )
618+ Expect (cluster .Spec .Topology .Workers .MachineDeployments ).To (HaveLen (1 ), "ClusterTopology should have exactly one MachineDeployment" )
619+
620+ baseMD := cluster .Spec .Topology .Workers .MachineDeployments [0 ]
621+ allMDs := make ([]clusterv1.MachineDeploymentTopology , count )
622+ allMDDigits := 1 + int (math .Log10 (float64 (count )))
623+ for i := 1 ; i <= count ; i ++ {
624+ md := baseMD .DeepCopy ()
625+ // This ensures we always have the right number of leading zeros in our machine deployment names, e.g.
626+ // count=1000 will lead to machine deployment names like md-0001, md-0002, so on.
627+ md .Name = fmt .Sprintf ("md-%0*d" , allMDDigits , i )
628+ allMDs [i - 1 ] = * md
629+ }
630+ cluster .Spec .Topology .Workers .MachineDeployments = allMDs
631+ u := & unstructured.Unstructured {}
632+ Expect (scheme .Convert (cluster , u , nil )).To (Succeed ())
633+ modifiedClusterYAML , err := yaml .FromUnstructured ([]unstructured.Unstructured {* u })
634+ Expect (err ).NotTo (HaveOccurred ())
635+
636+ return modifiedClusterYAML
637+ }
0 commit comments