@@ -411,7 +411,7 @@ func TestMultiNodeHADisasterRecovery(t *testing.T) {
411411
412412 tc := docker .NewCluster (& docker.ClusterInput {
413413 T : t ,
414- Nodes : 3 ,
414+ Nodes : 4 ,
415415 Distro : "debian-bookworm" ,
416416 LicensePath : "licenses/snapshot-license.yaml" ,
417417 ECBinaryPath : "../output/bin/embedded-cluster" ,
@@ -424,14 +424,17 @@ func TestMultiNodeHADisasterRecovery(t *testing.T) {
424424 t .Fatalf ("fail to run playwright test deploy-app: %v: %s: %s" , err , stdout , stderr )
425425 }
426426
427+ // join a worker
428+ joinWorkerNode (t , tc , 1 )
429+
427430 // join a controller
428- joinControllerNode (t , tc , 1 )
431+ joinControllerNode (t , tc , 2 )
429432
430433 // join another controller in HA mode
431- joinControllerNodeWithOptions (t , tc , 2 , joinOptions {isHA : true })
434+ joinControllerNodeWithOptions (t , tc , 3 , joinOptions {isHA : true })
432435
433436 // wait for the nodes to report as ready.
434- waitForNodes (t , tc , 3 , nil )
437+ waitForNodes (t , tc , 4 , nil )
435438
436439 t .Logf ("%s: checking installation state after enabling high availability" , time .Now ().Format (time .RFC3339 ))
437440 line := []string {"check-post-ha-state.sh" , os .Getenv ("SHORT_SHA" ), k8sVersion ()}
@@ -443,24 +446,46 @@ func TestMultiNodeHADisasterRecovery(t *testing.T) {
443446 t .Fatalf ("fail to run playwright test create-backup: %v: %s: %s" , err , stdout , stderr )
444447 }
445448
449+ bin := "embedded-cluster"
450+ t .Logf ("%s: resetting controller node 0" , time .Now ().Format (time .RFC3339 ))
451+ stdout , stderr , err := tc .RunCommandOnNode (0 , []string {bin , "reset" , "--yes" })
452+ if err != nil {
453+ t .Fatalf ("fail to remove controller node 0: %v: %s: %s" , err , stdout , stderr )
454+ }
455+ if ! strings .Contains (stdout , "High-availability is enabled and requires at least three controller-test nodes" ) {
456+ t .Errorf ("reset output does not contain the ha warning" )
457+ t .Logf ("stdout: %s\n stderr: %s" , stdout , stderr )
458+ }
459+
460+ stdout , stderr , err = tc .RunCommandOnNode (2 , []string {"check-nodes-removed.sh" , "3" })
461+ if err != nil {
462+ t .Fatalf ("fail to check nodes removed: %v: %s: %s" , err , stdout , stderr )
463+ }
464+
465+ t .Logf ("%s: checking nllb" , time .Now ().Format (time .RFC3339 ))
466+ line = []string {"check-nllb.sh" }
467+ if stdout , stderr , err := tc .RunCommandOnNode (2 , line ); err != nil {
468+ t .Fatalf ("fail to check nllb: %v: %s: %s" , err , stdout , stderr )
469+ }
470+
446471 // reset the cluster
447472 runInParallel (t ,
448473 func (t * testing.T ) error {
449- stdout , stderr , err := resetInstallationWithError (t , tc , 2 , resetInstallationOptions {force : true })
474+ stdout , stderr , err := resetInstallationWithError (t , tc , 3 , resetInstallationOptions {force : true })
450475 if err != nil {
451- return fmt .Errorf ("fail to reset the installation on node 2 : %v: %s: %s" , err , stdout , stderr )
476+ return fmt .Errorf ("fail to reset the installation on node 3 : %v: %s: %s" , err , stdout , stderr )
452477 }
453478 return nil
454479 }, func (t * testing.T ) error {
455- stdout , stderr , err := resetInstallationWithError (t , tc , 1 , resetInstallationOptions {force : true })
480+ stdout , stderr , err := resetInstallationWithError (t , tc , 2 , resetInstallationOptions {force : true })
456481 if err != nil {
457- return fmt .Errorf ("fail to reset the installation on node 1 : %v: %s: %s" , err , stdout , stderr )
482+ return fmt .Errorf ("fail to reset the installation on node 2 : %v: %s: %s" , err , stdout , stderr )
458483 }
459484 return nil
460485 }, func (t * testing.T ) error {
461- stdout , stderr , err := resetInstallationWithError (t , tc , 0 , resetInstallationOptions {force : true })
486+ stdout , stderr , err := resetInstallationWithError (t , tc , 1 , resetInstallationOptions {force : true })
462487 if err != nil {
463- return fmt .Errorf ("fail to reset the installation on node 0 : %v: %s: %s" , err , stdout , stderr )
488+ return fmt .Errorf ("fail to reset the installation on node 1 : %v: %s: %s" , err , stdout , stderr )
464489 }
465490 return nil
466491 },
@@ -479,14 +504,17 @@ func TestMultiNodeHADisasterRecovery(t *testing.T) {
479504 // restore phase 1 completes when the prompt for adding nodes is reached.
480505 // add the expected nodes to the cluster, then continue to phase 2.
481506
507+ // join a worker
508+ joinWorkerNode (t , tc , 1 )
509+
482510 // join a controller
483- joinControllerNodeWithOptions (t , tc , 1 , joinOptions {isRestore : true })
511+ joinControllerNodeWithOptions (t , tc , 2 , joinOptions {isRestore : true })
484512
485513 // join another controller in non-HA mode
486- joinControllerNodeWithOptions (t , tc , 2 , joinOptions {isRestore : true })
514+ joinControllerNodeWithOptions (t , tc , 3 , joinOptions {isRestore : true })
487515
488516 // wait for the nodes to report as ready.
489- waitForNodes (t , tc , 3 , nil , "true" )
517+ waitForNodes (t , tc , 4 , nil , "true" )
490518
491519 t .Logf ("%s: restoring the installation: phase 2" , time .Now ().Format (time .RFC3339 ))
492520 if stdout , stderr , err := tc .RunCommandOnNode (0 , []string {"restore-multi-node-phase2.exp" }); err != nil {
@@ -808,13 +836,8 @@ func TestMultiNodeAirgapHADisasterRecovery(t *testing.T) {
808836 t .Fatalf ("fail to run playwright test deploy-upgrade: %v: %s: %s" , err , stdout , stderr )
809837 }
810838
811- postUpgradeEnv := make (map [string ]string )
812- for k , v := range withEnv {
813- postUpgradeEnv [k ] = v
814- }
815- postUpgradeEnv ["ALLOW_PENDING_PODS" ] = "true"
816839 checkPostUpgradeStateWithOptions (t , tc , postUpgradeStateOptions {
817- withEnv : postUpgradeEnv ,
840+ withEnv : withEnv ,
818841 })
819842
820843 t .Logf ("%s: test complete" , time .Now ().Format (time .RFC3339 ))
0 commit comments