@@ -18,16 +18,20 @@ import (
1818 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1919 "k8s.io/apimachinery/pkg/fields"
2020 "k8s.io/apimachinery/pkg/labels"
21+ "k8s.io/apimachinery/pkg/runtime"
22+ "k8s.io/apimachinery/pkg/runtime/schema"
2123 "k8s.io/apimachinery/pkg/util/sets"
2224 "k8s.io/apimachinery/pkg/util/wait"
2325 "k8s.io/apimachinery/pkg/watch"
26+ "k8s.io/client-go/dynamic"
2427 "k8s.io/client-go/kubernetes"
2528 "k8s.io/client-go/rest"
2629 "k8s.io/client-go/tools/cache"
2730 "k8s.io/client-go/tools/clientcmd"
2831 clientwatch "k8s.io/client-go/tools/watch"
2932
3033 configv1 "github.com/openshift/api/config/v1"
34+ operatorv1 "github.com/openshift/api/operator/v1"
3135 configclient "github.com/openshift/client-go/config/clientset/versioned"
3236 configinformers "github.com/openshift/client-go/config/informers/externalversions"
3337 configlisters "github.com/openshift/client-go/config/listers/config/v1"
@@ -431,7 +435,15 @@ func waitForBootstrapComplete(ctx context.Context, config *rest.Config) *cluster
431435 return newAPIError (err )
432436 }
433437
434- return waitForBootstrapConfigMap (ctx , client )
438+ if err := waitForBootstrapConfigMap (ctx , client ); err != nil {
439+ return err
440+ }
441+
442+ if err := waitForStableSNOBootstrap (ctx , config ); err != nil {
443+ return newBootstrapError (err )
444+ }
445+
446+ return nil
435447}
436448
437449// waitForBootstrapConfigMap watches the configmaps in the kube-system namespace
@@ -488,6 +500,64 @@ func waitForBootstrapConfigMap(ctx context.Context, client *kubernetes.Clientset
488500 return nil
489501}
490502
503+ // When bootstrap on SNO deployments, we should not remove the bootstrap node prematurely,
504+ // here we make sure that the deployment is stable.
505+ // Given the nature of single node we just need to make sure things such as etcd are in the proper state
506+ // before continuing.
507+ func waitForStableSNOBootstrap (ctx context.Context , config * rest.Config ) error {
508+ timeout := 5 * time .Minute
509+
510+ // If we're not in a single node deployment, bail early
511+ if isSNO , err := IsSingleNode (); err != nil {
512+ logrus .Warningf ("Can not determine if installing a Single Node cluster, continuing as normal install: %v" , err )
513+ return nil
514+ } else if ! isSNO {
515+ return nil
516+ }
517+
518+ snoBootstrapContext , cancel := context .WithTimeout (ctx , timeout )
519+ defer cancel ()
520+
521+ untilTime := time .Now ().Add (timeout )
522+ timezone , _ := untilTime .Zone ()
523+ logrus .Info ("Detected Single Node deployment" )
524+ logrus .Infof ("Waiting up to %v (until %v %s) for the bootstrap etcd member to be removed..." ,
525+ timeout , untilTime .Format (time .Kitchen ), timezone )
526+
527+ client , err := dynamic .NewForConfig (config )
528+ if err != nil {
529+ return fmt .Errorf ("error creating dynamic client: %w" , err )
530+ }
531+ gvr := schema.GroupVersionResource {
532+ Group : operatorv1 .SchemeGroupVersion .Group ,
533+ Version : operatorv1 .SchemeGroupVersion .Version ,
534+ Resource : "etcds" ,
535+ }
536+ resourceClient := client .Resource (gvr )
537+ // Validate the etcd operator has removed the bootstrap etcd member
538+ return wait .PollUntilContextCancel (snoBootstrapContext , 1 * time .Second , true , func (ctx context.Context ) (done bool , err error ) {
539+ etcdOperator := & operatorv1.Etcd {}
540+ etcdUnstructured , err := resourceClient .Get (ctx , "cluster" , metav1.GetOptions {})
541+ if err != nil {
542+ // There might be service disruptions in SNO, we log those here but keep trying with in the time limit
543+ logrus .Debugf ("Error getting ETCD Cluster resource, retrying: %v" , err )
544+ return false , nil
545+ }
546+ err = runtime .DefaultUnstructuredConverter .FromUnstructured (etcdUnstructured .Object , etcdOperator )
547+ if err != nil {
548+ // This error should not happen, if we do, we log the error and keep retrying until we hit the limit
549+ logrus .Debugf ("Error parsing etcds resource, retrying: %v" , err )
550+ return false , nil
551+ }
552+ for _ , condition := range etcdOperator .Status .Conditions {
553+ if condition .Type == "EtcdBootstrapMemberRemoved" {
554+ return configv1 .ConditionStatus (condition .Status ) == configv1 .ConditionTrue , nil
555+ }
556+ }
557+ return false , nil
558+ })
559+ }
560+
491561// waitForInitializedCluster watches the ClusterVersion waiting for confirmation
492562// that the cluster has been initialized.
493563func waitForInitializedCluster (ctx context.Context , config * rest.Config ) error {
@@ -839,3 +909,25 @@ func handleUnreachableAPIServer(config *rest.Config) error {
839909
840910 return nil
841911}
912+
913+ // IsSingleNode determines if we are in a single node configuration based off of the install config
914+ // loaded from the asset store.
915+ func IsSingleNode () (bool , error ) {
916+ assetStore , err := assetstore .NewStore (command .RootOpts .Dir )
917+ if err != nil {
918+ return false , fmt .Errorf ("error loading asset store: %w" , err )
919+ }
920+ installConfig , err := assetStore .Load (& installconfig.InstallConfig {})
921+ if err != nil {
922+ return false , fmt .Errorf ("error loading installConfig: %w" , err )
923+ }
924+ if installConfig == nil {
925+ return false , fmt .Errorf ("installConfig loaded from asset store was nil" )
926+ }
927+
928+ config := installConfig .(* installconfig.InstallConfig ).Config
929+ if machinePool := config .ControlPlane ; machinePool != nil {
930+ return * machinePool .Replicas == int64 (1 ), nil
931+ }
932+ return false , nil
933+ }
0 commit comments